/** Add a single term to the lexicon being built * @param term The String term * @param tf the frequency of the term */ public void addTerm(String term, int tf) { TempLex.insert(term,tf); }
/** Inserts all the terms from a document posting * into the lexicon map * @param _doc The postinglist for that document. Assumed to be a FieldDocumentPostingList. */ public void insert(DocumentPostingList _doc) { super.insert(_doc); FieldDocumentPostingList doc = (FieldDocumentPostingList)_doc; int fi = 0; for(TObjectIntHashMap<String> docField : doc.field_occurrences) { final TObjectIntHashMap<String> thisField = field_tfs[fi]; //final int fii = fi; docField.forEachEntry(new TObjectIntProcedure<String>() { public boolean execute(String term, int freq) { //System.out.println("term " + term + " tf_" + fii + "="+ freq); thisField.adjustOrPutValue(term, freq, freq); return true; } }); fi++; } }
/** adds the terms of a document to the temporary lexicon in memory. * @param terms DocumentPostingList the terms of the document to add to the temporary lexicon */ public void addDocumentTerms(DocumentPostingList terms) { TempLex.insert(terms); DocCount++; if((DocCount % DocumentsPerLexicon) == 0) { if (logger.isDebugEnabled()) logger.debug("flushing lexicon"); writeTemporaryLexicon(); TempLexCount++; TempLex.clear(); //try{ TempLex = (LexiconMap)LexiconMapClass.newInstance(); } catch (Exception e) {logger.error(e);} } }