eu.monnetproject.bliss.CLIOpts java code examples

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File corpus = opts.roFile("corpus[.gz|bz2]", "The corpus file");
  double alpha = opts.doubleValue("alpha", -1, "The alpha parameter");
  final double beta = opts.doubleValue("beta", 0.01, "The beta parameter");
  final int W = opts.intValue("W", "The number of distinct tokens");
  final int J = opts.intValue("J", "The number of documents (per language)");
  final int K = opts.intValue("K", "The number of topics");
  final int N = opts.intValue("N", "The number of iterations to perform");
  final File outFile = opts.woFile("output", "The file to write the SVD to");
  if (!opts.verify(CPLSATrain.class)) {
    return;
  }
  if (alpha == -1.0) {
    alpha = 2.0 / K;
  }
  if (alpha < 0 || beta < 0) {
    throw new IllegalArgumentException("Alpha and beta cannot be negative");
  }
  System.err.println("Preparing corpus");
  final CPLSATrain train = new CPLSATrain(corpus, J, W, K, alpha, beta);
  train.solve(N, 1e-12, true);
  System.err.println("Writing model");
  train.writeModel(CLIOpts.openOutputAsMaybeZipped(outFile));
}

@Override
public IntIterator iterator() {
  try {
    return new DataInputStreamAsIntIterator(CLIOpts.openInputAsMaybeZipped(file));
  } catch(IOException x) {
    throw new RuntimeException(x);
  } 
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  StringBuilder betalmString = new StringBuilder("The BetaLM method: ");
  for (BetaLMImpl.Method method : BetaLMImpl.Method.values()) {
    betalmString.append(method.name()).append(" ");
  }
  final BetaLMImpl.Method betaMethod = opts.enumOptional("b", BetaLMImpl.Method.class, null, betalmString.toString());
  final SourceType sourceType = opts.enumOptional("t", SourceType.class, SourceType.FIRST, "The type of source: SIMPLE, FIRST or SECOND");
  final Smoothing smoothing = opts.enumOptional("smooth", Smoothing.class, Smoothing.NONE, "The type of smoothing: NONE, ADD_ALPHA, GOOD_TURING, KNESER_NEY");
  final File queryFile = opts.roFile("f", "The query file (ontology)", null);
  final double smoothness = opts.doubleValue("s", 1.0, "The selective smoothing parameter");
  final double alpha = opts.doubleValue("a", 0.0, "The minimal smoothing parameter");
  final int salience = opts.intValue("salience", "The salience (filtering on query document)", -1);
  final int stopWordCount = opts.intValue("stop", "The number of stop words to ignore", 150);
  
  final boolean writeDocs = opts.flag("writeDocs", "Write documents in corpus with ranking");
  final File inFile = opts.roFile("corpus[.gz|.bz2]", "The corpus file");
  final int N = opts.nonNegIntValue("N", "The largest n-gram to calculate");
  final File wordMapFile = opts.roFile("wordMap", "The word map file");
  final File freqFile = opts.roFile("freqs", "The frequency file for the corpus");
  final PrintStream out = opts.outFileOrStdout();
  if (!opts.verify(CompileModel.class)) {
    return;
  }
  compile(wordMapFile, freqFile, stopWordCount, betaMethod, inFile, sourceType, queryFile, smoothness, salience, alpha, N, writeDocs, out);
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File histFile = opts.roFile("history", "The history list");
  final boolean future = opts.flag("future", "Future history format");
  final PrintStream out = opts.outFileOrStdout();
  if(!opts.verify(Hist.class)) {
    return;
  }
  hist(histFile, future, out);
}

  public static void main(String[] args) throws Exception {
    final CLIOpts opts = new CLIOpts(args);
    final SourceType sourceType = opts.enumOptional("t", SourceType.class, SourceType.FIRST, "The type of source: SIMPLE, FIRST or SECOND");
    final File corpus = opts.roFile("corpus[.gz|.bz2]", "The corpus");
    final int N = opts.nonNegIntValue("N", "The largest n-gram to count for");
    final File out = opts.woFile("out", "The files to write to");
    if (!opts.verify(DoCount.class)) {
      return;
    }
    final PrintWriter[] outs = new PrintWriter[N * 2];
    for (int i = 0; i < N; i++) {
      outs[i] = new PrintWriter(out.getName() + "." + i);
      if (i != 0) {
        outs[i + N] = new PrintWriter(out.getName() + ".h" + i);
      }
    }
//        doCount(corpus, N, outs, new BetaSimFunction() {
//            @Override
//            public double score(Vector<Integer> document) {
//                return 1.0;
//            }
//        }, sourceType, 0);

  }
  private static final DecimalFormat df = new DecimalFormat("0.000000000");

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File doc = opts.roFile("document", "The document on which to estimate the perplexity");
  final File lmFile = opts.roFile("lm", "The language model file");
  if (!opts.verify(Perplexity.class)) {
    return;
  }
  final ARPALM lm = new ARPALM(lmFile);
  final Scanner scanner = new Scanner(doc);
  double perplexity = calculatePerplexity(scanner, lm);
  System.err.println("Log2 Perplexity=" + (perplexity ));
}
private static final double LOG_10_2 = 0.3010299956639812;

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File file = opts.roFile("file", "The file to count uniqueness among");
  final PrintStream out = opts.outFileOrStdout();
  if (!opts.verify(Uniq.class)) {
    return;
  }
  uniq(file, out);
}

  public static void main(String[] args) throws Exception {
    final CLIOpts opts = new CLIOpts(args);
    double alpha = opts.doubleValue("alpha", -1, "The alpha parameter");
    final double beta = opts.doubleValue("beta", 0.01, "The beta parameter");
    final File corpus = opts.roFile("corpus[.gz|.bz2]", "The corpus file");
    final int W = opts.intValue("W", "The number of distinct tokens");
    final int J = opts.intValue("J", "The number of documents (per language)");
    final int K = opts.intValue("K", "The number of topics");
    final int N = opts.intValue("N", "The number of iterations");
    final File outFile = opts.woFile("model[.gz|.bz2]", "The file to write the model to");

    if (!opts.verify(LDATrain.class)) {
      return;
    }
    if (alpha == -1.0) {
      alpha = 2.0 / K;
    }
    if (alpha < 0 || beta < 0) {
      throw new IllegalArgumentException("Alpha and beta cannot be negative");
    }
    final LDATrain ldaTrain = new LDATrain(corpus, K, J, W, alpha, beta);
    ldaTrain.train(N);
    ldaTrain.writeModel(outFile);
  }
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File countedFile = opts.roFile("countFile", "The file with counts");
  final int H = opts.nonNegIntValue("H", "The maximum count of count to store");
  final PrintStream out = opts.outFileOrStdout();
  if(!opts.verify(COCAndMean.class)) {
    return;
  }
  final Scanner in = new Scanner(countedFile);
  final Data data = calculate(in, H);
  out.println(Arrays.toString(data.CoC));
  out.println(data.mean);
}

public static AssignmentBuffer interleavedFrom(File corpus) throws IOException {
  final DataInputStream data = new DataInputStream(CLIOpts.openInputAsMaybeZipped(corpus));
  final File tmpFile = File.createTempFile("assign", ".buf");
  tmpFile.deleteOnExit();
  final DataOutputStream out = new DataOutputStream(new FileOutputStream(tmpFile));
  while(data.available() > 0) {
    try {
      int i = data.readInt();
      out.writeInt(i);
      out.writeInt(0);
    } catch(EOFException x) {
      break;
    }
  }
  out.flush();
  out.close();
  return new AssignmentBuffer(new RandomAccessFile(tmpFile, "rw").getChannel(), 4194304, tmpFile.length());
}

  public static void main(String[] args) throws Exception {
    final CLIOpts opts = new CLIOpts(args);
    final SourceType sourceType = opts.enumOptional("t", SourceType.class, SourceType.FIRST, "The corpus type");
    final File refFile = opts.roFile("reference", "The reference ontology");
    final File corpusFile = opts.roFile("corpus", "The corpus file");
    final File wordMapFile = opts.roFile("wordMap", "The word map");
    final int N = opts.intValue("N", "The maximal n-gram to consider");
    final double thresh = opts.doubleValue("threshold", "The threshold of salience to filter at");
    final File outFile = opts.woFile("out", "The file to write the salient n-gram list to");
    if(!opts.verify(MostSalient.class)) {
      return;
    }
    final int W = WordMap.calcW(wordMapFile);
    final String[] wordMap = WordMap.inverseFromFile(wordMapFile, W, true);
    final Object2DoubleMap<NGram> salientNGrams = mostSalientNGrams(refFile, corpusFile, N, sourceType);
    final DataOutputStream out = new DataOutputStream(CLIOpts.openOutputAsMaybeZipped(outFile));
    for(Object2DoubleMap.Entry<NGram> e : salientNGrams.object2DoubleEntrySet()) {
      if(e.getDoubleValue() > thresh) {
        final NGram ng = e.getKey();
        out.writeInt(ng.ngram.length);
        for(int i = 0; i < ng.ngram.length; i++) {
          out.writeInt(ng.ngram[i]);
        }
        out.writeDouble(e.getDoubleValue());
      }
    }
    out.flush();
    out.close();
  }
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File corpusFile = opts.roFile("corpus", "The corpus");
  final File salientFile = opts.roFile("salientList", "The salient list");
  final File wordMapFile = opts.roFile("wordMap", "The wordmap");
  final PrintStream out = opts.outFileOrStdout();
  if (!opts.verify(FilterBySalientList.class)) {
    return;
  final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(salientFile));
  final Object2DoubleMap<NGram> ngrams = new Object2DoubleRBTreeMap<NGram>();
  int N = 0;

public static SalienceMetric fromFile(final File salienceFile) throws IOException {
  final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(salienceFile));
  final Object2DoubleMap<NGram> ngrams = new Object2DoubleRBTreeMap<NGram>();
  int N = 0;
  while (true) {
    try {
      int n = in.readInt();
      N = Math.max(N, n);
      int[] ng = new int[n];
      for (int i = 0; i < n; i++) {
        ng[i] = in.readInt();
      }
      ngrams.put(new NGram(ng), in.readDouble());
    } catch (EOFException x) {
      break;
    }
  }
  in.close();
  return new SalienceMetric(ngrams,N);
}

public static void main(String[] args) throws Exception {
  final CLIOpts opts = new CLIOpts(args);
  final File inFile = opts.roFile("trainFile", "The training corpus");
  final File queryFile = opts.roFile("queryFile", "The file to adapt to");
  final File freqFile = opts.roFile("freqFile", "The frequency file");
  final File wordMapFile = opts.roFile("wordMap", "The word map");
  final int N = opts.nonNegIntValue("N", "The largetst n-gram to consider");
  final File testDoc = opts.roFile("test.txt", "The test document to evaluate on");
  if(!opts.verify(AlphaSigmaGrid.class)) {
    return;

public static double calcBetaMean(BetaSimFunction function, File corpus, SourceType sourceType, int W) throws IOException {
  final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(corpus));
  
  double mean = 0.0;
  int docsRead = 0;
  final IntArrayList doc = new IntArrayList();
  while(true) {
    try {
      final int w = in.readInt();
      if(w == 0) {
        if((docsRead % 2 == 1 && sourceType == SourceType.FIRST) ||
            (docsRead % 2 == 0 && sourceType == SourceType.SECOND) ||
            sourceType == SourceType.SIMPLE) {
          final double score = function.scoreNGrams(doc,W);
          docsRead++;
          mean = score / docsRead + ((double)(docsRead-1) / (double)docsRead) * mean;
          doc.clear();  
        } else {
          docsRead++;
          doc.clear();
        }
      } else {
        doc.add(w);
      }
    } catch(EOFException x) {
      break;
    }
  }
  return mean;
}

public static void doCount(final File corpus, final int N, final PrintWriter[] outs, BetaSimFunction beta, SourceType sourceType, int W) throws IOException {
  int read = 0, docNo = 0;
  final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(corpus));
  final IntArrayList[] doc = {new IntArrayList(), new IntArrayList()};
  while (true) {

private static void filterBySalients(File corpusFile, Object2DoubleMap<NGram> ngrams, PrintStream out, String[] wordMap, int N, SourceType sourceType) throws IOException {
  final IntList doc[] = {new IntList(), new IntList()};
  final double[] salience = {0.0, 0.0};
  final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(corpusFile));

  final SparseIntArray binQuery2 = SparseIntArray.fromBinary(CLIOpts.openInputAsMaybeZipped(queryFile), Integer.MAX_VALUE);
  final IntSet salient = MostSalient.mostSalient(queryFile, inFile, W, salience, sourceType);
  DoCount.queryFile = salient;
  binQuery = MostSalient.filter(salient, binQuery2);
} else {
  binQuery = SparseIntArray.fromBinary(CLIOpts.openInputAsMaybeZipped(queryFile), Integer.MAX_VALUE);
  DoCount.queryFile = readQueryDoc(queryFile);

public static Object2DoubleMap<NGram> countInReference(final File reference, final int N) throws IOException {
  final Object2DoubleMap<NGram> counts = new Object2DoubleRBTreeMap<NGram>();
  final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(reference));
  final NGramCarousel carousel = new NGramCarousel(N);
  int docNo = 0;
  while (true) {
    try {
      final int w = in.readInt();
      if (w != 0) {
        //if (inDoc(docNo, sourceType)) {
          carousel.offer(w);
          for (int n = 1; n <= carousel.maxNGram(); n++) {
            final NGram ngram = carousel.ngram(n);
            if (counts.containsKey(ngram)) {
              counts.put(ngram, counts.getDouble(ngram) + 1.0);
            } else {
              counts.put(ngram, 1.0);
            }
          }
        //}
      } else {
        carousel.reset();
        docNo++;
      }
    } catch (EOFException x) {
      break;
    }
  }
  return counts;
}

public static Object2DoubleMap<NGram> countInCorpus(final File reference, final int N, final ObjectSet<NGram> referenceNGrams, SourceType sourceType) throws IOException {
  final Object2DoubleMap<NGram> counts = new Object2DoubleRBTreeMap<NGram>();
  final DataInputStream in = new DataInputStream(CLIOpts.openInputAsMaybeZipped(reference));
  final NGramCarousel carousel = new NGramCarousel(N);
  int docNo = 0;

Most used methods

<init>
doubleValue
intValue
openInputAsMaybeZipped
Return a file as an input stream, that unzips if the file ends in .gz or .bz2.
openOutputAsMaybeZipped
Return a file as an output stream, that zips if the file ends in .gz or .bz2.
roFile
verify
Call this after calling all CLIOpts to verify the CLIOpts are valid
woFile
enumOptional
flag
nonNegIntValue
outFileOrStdout

Popular in Java

Making http requests using okhttp
startActivity (Activity)
runOnUiThread (Activity)
getSharedPreferences (Context)
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
JPanel (javax.swing)
CodeWhisperer alternatives

How to useCLIOpts in eu.monnetproject.bliss

Best Java code snippets using eu.monnetproject.bliss.CLIOpts (Showing top 20 results out of 315)

How to use
CLIOpts
in
eu.monnetproject.bliss