if (numLines % 10000 == 0) Logger.logs("On line " + numLines); numLines++; final String[] parts = line.trim().split("\\|\\|\\|");
int linenum = 0; for (Entry<KneserNeyCounts> entry : ngrams.getNgramsForOrder(ngramOrder)) { if (linenum++ % 10000 == 0) Logger.logs("Writing line " + line); if (ngramOrder >= lmOrder - 2 && entry.value.tokenCounts < opts.kneserNeyMinCounts[ngramOrder]) continue; final String ngramString = StrUtils.join(WordIndexer.StaticMethods.toList(wordIndexer, entry.key));
try { while (reader.ready()) { if (currLine % 100000 == 0) Logger.logs("Read " + currLine + " lines"); currLine++; final String line = reader.readLine(); Logger.logs(currentNGramCount + " " + currentNGramLength + "-gram read."); Logger.endTrack(); callback.handleNgramOrderFinished(currentNGramLength);
int linenum = 0; for (final Entry<KneserNeyCounts> entry : ngrams.getNgramsForOrder(ngramOrder)) { if (linenum++ % 10000 == 0) Logger.logs("Writing line " + linenum); final long relevantCount = entry.value.tokenCounts; if (ngramOrder >= lmOrder - 2 && relevantCount < opts.kneserNeyMinCounts[ngramOrder]) continue;
if (numLines % 10000 == 0) Logger.logs("On line " + numLines); numLines++; String[] parts = line.trim().split("\\|\\|\\|");
int k = 0; for (String line : Iterators.able(IOUtils.lineIterator(ngramFile.getPath()))) { if (k % 1000 == 0) Logger.logs("Line " + k); k++; line = line.trim();
int[] ngramScratch = new int[currentNGramLength]; while ((line = reader.readLine()) != null) { if (currLine % 100000 == 0) Logger.logs("Read " + currLine + " lines"); currLine++; if (line.length() == 0) { Logger.logs(currentNGramCount + " " + currentNGramLength + "-gram read."); Logger.endTrack(); callback.handleNgramOrderFinished(currentNGramLength);
int k = 0; for (String line : Iterators.able(IOUtils.lineIterator(ngramFile.getPath()))) { if (k % 10000 == 0) Logger.logs("Line " + k); k++; line = line.trim();
/** * @param <W> * @param wordIndexer * @param maxOrder * @param allLinesIterator * @param callback * @param ngrams * @return */ private void countNgrams(final Iterable<String> allLinesIterator, final LmReaderCallback<Object> callback) { long numLines = 0; for (final String line : allLinesIterator) { if (numLines % 10000 == 0) Logger.logs("On line " + numLines); numLines++; final String[] words = line.split(" "); final int[] sent = new int[words.length + 2]; sent[0] = wordIndexer.getOrAddIndex(wordIndexer.getStartSymbol()); sent[sent.length - 1] = wordIndexer.getOrAddIndex(wordIndexer.getEndSymbol()); for (int i = 0; i < words.length; ++i) { sent[i + 1] = wordIndexer.getOrAddIndexFromString(words[i]); } for (int ngramOrder = 0; ngramOrder < lmOrder; ++ngramOrder) { for (int i = 0; i < sent.length; ++i) { if (i - ngramOrder < 0) continue; callback.call(sent, i - ngramOrder, i + 1, null, line); } } } callback.cleanup(); }
if (numLines % 10000 == 0) Logger.logs("On line " + numLines); numLines++; final String[] words = line.split(" ");
final long firstKey = uncompressed.get(uncompressedPos); if (currBlock++ % 1000 == 0) Logger.logs("On block " + currBlock + " starting at pos " + uncompressedPos);
final long firstKey = uncompressed.get(uncompressedPos); if (currBlock++ % 1000 == 0) Logger.logs("On block " + currBlock + " starting at pos " + uncompressedPos);