eu.monnetproject.bliss.CLIOpts.roFile java code examples

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File doc = opts.roFile("document", "The document on which to estimate the perplexity");
  final File lmFile = opts.roFile("lm", "The language model file");
  if (!opts.verify(Perplexity.class)) {
    return;
  }
  final ARPALM lm = new ARPALM(lmFile);
  final Scanner scanner = new Scanner(doc);
  double perplexity = calculatePerplexity(scanner, lm);
  System.err.println("Log2 Perplexity=" + (perplexity ));
}
private static final double LOG_10_2 = 0.3010299956639812;

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File countedFile = opts.roFile("countFile", "The file with counts");
  final int H = opts.nonNegIntValue("H", "The maximum count of count to store");
  final PrintStream out = opts.outFileOrStdout();
  if(!opts.verify(COCAndMean.class)) {
    return;
  }
  final Scanner in = new Scanner(countedFile);
  final Data data = calculate(in, H);
  out.println(Arrays.toString(data.CoC));
  out.println(data.mean);
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File inFile = opts.roFile("trainFile", "The training corpus");
  final File queryFile = opts.roFile("queryFile", "The file to adapt to");
  final File freqFile = opts.roFile("freqFile", "The frequency file");
  final File wordMapFile = opts.roFile("wordMap", "The word map");
  final int N = opts.nonNegIntValue("N", "The largetst n-gram to consider");
  final File testDoc = opts.roFile("test.txt", "The test document to evaluate on");

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File corpusFile = opts.roFile("corpus", "The corpus");
  final File salientFile = opts.roFile("salientList", "The salient list");
  final File wordMapFile = opts.roFile("wordMap", "The wordmap");

  public static void main(String[] args) throws Exception {
    final CLIOpts opts = new CLIOpts(args);
    final SourceType sourceType = opts.enumOptional("t", SourceType.class, SourceType.FIRST, "The type of source: SIMPLE, FIRST or SECOND");
    final File corpus = opts.roFile("corpus[.gz|.bz2]", "The corpus");
    final int N = opts.nonNegIntValue("N", "The largest n-gram to count for");
    final File out = opts.woFile("out", "The files to write to");
    if (!opts.verify(DoCount.class)) {
      return;
    }
    final PrintWriter[] outs = new PrintWriter[N * 2];
    for (int i = 0; i < N; i++) {
      outs[i] = new PrintWriter(out.getName() + "." + i);
      if (i != 0) {
        outs[i + N] = new PrintWriter(out.getName() + ".h" + i);
      }
    }
//        doCount(corpus, N, outs, new BetaSimFunction() {
//            @Override
//            public double score(Vector<Integer> document) {
//                return 1.0;
//            }
//        }, sourceType, 0);

  }
  private static final DecimalFormat df = new DecimalFormat("0.000000000");

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File file = opts.roFile("file", "The file to count uniqueness among");
  final PrintStream out = opts.outFileOrStdout();
  if (!opts.verify(Uniq.class)) {
    return;
  }
  uniq(file, out);
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  StringBuilder betalmString = new StringBuilder("The BetaLM method: ");
  for (BetaLMImpl.Method method : BetaLMImpl.Method.values()) {
    betalmString.append(method.name()).append(" ");
  }
  final BetaLMImpl.Method betaMethod = opts.enumOptional("b", BetaLMImpl.Method.class, null, betalmString.toString());
  final SourceType sourceType = opts.enumOptional("t", SourceType.class, SourceType.FIRST, "The type of source: SIMPLE, FIRST or SECOND");
  final Smoothing smoothing = opts.enumOptional("smooth", Smoothing.class, Smoothing.NONE, "The type of smoothing: NONE, ADD_ALPHA, GOOD_TURING, KNESER_NEY");
  final File queryFile = opts.roFile("f", "The query file (ontology)", null);
  final double smoothness = opts.doubleValue("s", 1.0, "The selective smoothing parameter");
  final double alpha = opts.doubleValue("a", 0.0, "The minimal smoothing parameter");
  final int salience = opts.intValue("salience", "The salience (filtering on query document)", -1);
  final int stopWordCount = opts.intValue("stop", "The number of stop words to ignore", 150);
  
  final boolean writeDocs = opts.flag("writeDocs", "Write documents in corpus with ranking");
  final File inFile = opts.roFile("corpus[.gz|.bz2]", "The corpus file");
  final int N = opts.nonNegIntValue("N", "The largest n-gram to calculate");
  final File wordMapFile = opts.roFile("wordMap", "The word map file");
  final File freqFile = opts.roFile("freqs", "The frequency file for the corpus");
  final PrintStream out = opts.outFileOrStdout();
  if (!opts.verify(CompileModel.class)) {
    return;
  }
  compile(wordMapFile, freqFile, stopWordCount, betaMethod, inFile, sourceType, queryFile, smoothness, salience, alpha, N, writeDocs, out);
}

  public static void main(String[] args) throws Exception {
    final CLIOpts opts = new CLIOpts(args);
    final SourceType sourceType = opts.enumOptional("t", SourceType.class, SourceType.FIRST, "The corpus type");
    final File refFile = opts.roFile("reference", "The reference ontology");
    final File corpusFile = opts.roFile("corpus", "The corpus file");
    final File wordMapFile = opts.roFile("wordMap", "The word map");
    final int N = opts.intValue("N", "The maximal n-gram to consider");
    final double thresh = opts.doubleValue("threshold", "The threshold of salience to filter at");
    final File outFile = opts.woFile("out", "The file to write the salient n-gram list to");
    if(!opts.verify(MostSalient.class)) {
      return;
    }
    final int W = WordMap.calcW(wordMapFile);
    final String[] wordMap = WordMap.inverseFromFile(wordMapFile, W, true);
    final Object2DoubleMap<NGram> salientNGrams = mostSalientNGrams(refFile, corpusFile, N, sourceType);
    final DataOutputStream out = new DataOutputStream(CLIOpts.openOutputAsMaybeZipped(outFile));
    for(Object2DoubleMap.Entry<NGram> e : salientNGrams.object2DoubleEntrySet()) {
      if(e.getDoubleValue() > thresh) {
        final NGram ng = e.getKey();
        out.writeInt(ng.ngram.length);
        for(int i = 0; i < ng.ngram.length; i++) {
          out.writeInt(ng.ngram[i]);
        }
        out.writeDouble(e.getDoubleValue());
      }
    }
    out.flush();
    out.close();
  }
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File histFile = opts.roFile("history", "The history list");
  final boolean future = opts.flag("future", "Future history format");
  final PrintStream out = opts.outFileOrStdout();
  if(!opts.verify(Hist.class)) {
    return;
  }
  hist(histFile, future, out);
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File corpus = opts.roFile("corpus[.gz|bz2]", "The corpus file");
  double alpha = opts.doubleValue("alpha", -1, "The alpha parameter");
  final double beta = opts.doubleValue("beta", 0.01, "The beta parameter");
  final int W = opts.intValue("W", "The number of distinct tokens");
  final int J = opts.intValue("J", "The number of documents (per language)");
  final int K = opts.intValue("K", "The number of topics");
  final int N = opts.intValue("N", "The number of iterations to perform");
  final File outFile = opts.woFile("output", "The file to write the SVD to");
  if (!opts.verify(CPLSATrain.class)) {
    return;
  }
  if (alpha == -1.0) {
    alpha = 2.0 / K;
  }
  if (alpha < 0 || beta < 0) {
    throw new IllegalArgumentException("Alpha and beta cannot be negative");
  }
  System.err.println("Preparing corpus");
  final CPLSATrain train = new CPLSATrain(corpus, J, W, K, alpha, beta);
  train.solve(N, 1e-12, true);
  System.err.println("Writing model");
  train.writeModel(CLIOpts.openOutputAsMaybeZipped(outFile));
}

  public static void main(String[] args) throws Exception {
    final CLIOpts opts = new CLIOpts(args);
    double alpha = opts.doubleValue("alpha", -1, "The alpha parameter");
    final double beta = opts.doubleValue("beta", 0.01, "The beta parameter");
    final File corpus = opts.roFile("corpus[.gz|.bz2]", "The corpus file");
    final int W = opts.intValue("W", "The number of distinct tokens");
    final int J = opts.intValue("J", "The number of documents (per language)");
    final int K = opts.intValue("K", "The number of topics");
    final int N = opts.intValue("N", "The number of iterations");
    final File outFile = opts.woFile("model[.gz|.bz2]", "The file to write the model to");

    if (!opts.verify(LDATrain.class)) {
      return;
    }
    if (alpha == -1.0) {
      alpha = 2.0 / K;
    }
    if (alpha < 0 || beta < 0) {
      throw new IllegalArgumentException("Alpha and beta cannot be negative");
    }
    final LDATrain ldaTrain = new LDATrain(corpus, K, J, W, alpha, beta);
    ldaTrain.train(N);
    ldaTrain.writeModel(outFile);
  }
}

Popular methods of CLIOpts

<init>
doubleValue
intValue
openInputAsMaybeZipped
Return a file as an input stream, that unzips if the file ends in .gz or .bz2.
openOutputAsMaybeZipped
Return a file as an output stream, that zips if the file ends in .gz or .bz2.
verify
Call this after calling all CLIOpts to verify the CLIOpts are valid
woFile
enumOptional
flag
nonNegIntValue
outFileOrStdout

outFileOrStdout

Popular in Java

Making http requests using okhttp
startActivity (Activity)
runOnUiThread (Activity)
getSharedPreferences (Context)
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
JPanel (javax.swing)
Best IntelliJ plugins

How to use roFilemethodin eu.monnetproject.bliss.CLIOpts

Best Java code snippets using eu.monnetproject.bliss.CLIOpts.roFile (Showing top 11 results out of 315)

How to use
roFile
method
in
eu.monnetproject.bliss.CLIOpts