public void stat() throws IOException { Iterator it = keySet().iterator(); int[] freqCount = new int[11]; while (it.hasNext()) { String key = (String) it.next(); int freq = get(key); if (freq <= 10) { freqCount[freq]++; } else { freqCount[0]++; } } double c = 0; for (int i = 1; i < freqCount.length; i++) { c += (double) freqCount[i] / size(); logger.info("F(" + i + ")=" + freqCount[i] + " (" + c + ")"); } c += (double) freqCount[0] / size(); logger.info("F(freq>10)=" + freqCount[0] + " (" + c + ")"); } // end stat
logger.info( lineCount + "\t" + ((double) (end.getTime() - start1.getTime()) / 1000) + " total s (" + end + "), voc size:" + corpusVocabulary.size() + ", term index size:" + termIndex.size() + ", totalKW: " + totalKW); + " total s, voc size:" + corpusVocabulary.size()); start = new Date(); } else if ((lineCount % 500) == 0) {
if (documentVocabulary.size() == 0) { return;
if (documentVocabulary.size() == 0) { return;