public void addDocs(DocumentCollection docs, int appearanceThres, boolean countRepsWithinDoc) { UnigramStatistics stat = new UnigramStatistics(docs, countRepsWithinDoc); for (String w : stat.wordCounts.keySet()) { if (stat.wordCounts.get(w) >= appearanceThres) { wordToFid.put(w, dim); fidToWord.put(dim, w); dim++; } } /* * logger.info("Building a feature map"); for(int i=0;i<docs.docs.size();i++) { * Vector<String> words=docs.docs.elementAt(i).words; for(int j=0;j<words.size();j++) * if((!wordToFid.containsKey(words.elementAt(j)))&& * (stat.wordCounts.get(words.elementAt(j))>=appearanceThres)) { * wordToFid.put(words.elementAt(j), dim); fidToWord.put(dim,words.elementAt(j)); dim++; } } */ logger.info("Done building a feature map, the dimension is: " + dim); }
public void addDocs(DocumentCollection docs, int appearanceThres, boolean countRepsWithinDoc) { UnigramStatistics stat = new UnigramStatistics(docs, countRepsWithinDoc); for (String w : stat.wordCounts.keySet()) { if (stat.wordCounts.get(w) >= appearanceThres) { wordToFid.put(w, dim); fidToWord.put(dim, w); dim++; } } /* * logger.info("Building a feature map"); for(int i=0;i<docs.docs.size();i++) { * Vector<String> words=docs.docs.elementAt(i).words; for(int j=0;j<words.size();j++) * if((!wordToFid.containsKey(words.elementAt(j)))&& * (stat.wordCounts.get(words.elementAt(j))>=appearanceThres)) { * wordToFid.put(words.elementAt(j), dim); fidToWord.put(dim,words.elementAt(j)); dim++; } } */ logger.info("Done building a feature map, the dimension is: " + dim); }
public void addDocs(DocumentCollection docs, int appearanceThres, boolean countRepsWithinDoc) { UnigramStatistics stat = new UnigramStatistics(docs, countRepsWithinDoc); for (String w : stat.wordCounts.keySet()) { if (stat.wordCounts.get(w) >= appearanceThres) { wordToFid.put(w, dim); fidToWord.put(dim, w); dim++; } } /* * logger.info("Building a feature map"); for(int i=0;i<docs.docs.size();i++) { * Vector<String> words=docs.docs.elementAt(i).words; for(int j=0;j<words.size();j++) * if((!wordToFid.containsKey(words.elementAt(j)))&& * (stat.wordCounts.get(words.elementAt(j))>=appearanceThres)) { * wordToFid.put(words.elementAt(j), dim); fidToWord.put(dim,words.elementAt(j)); dim++; } } */ logger.info("Done building a feature map, the dimension is: " + dim); }