/** * Add the terms in a DocumentPostingList to the postings in memory. * @param _docPostings BlockDocumentPostingList containing the posting information for one document. * @param docid Document id of the indexed document * @throws IOException if an I/O error occurs. */ public void addTerms(DocumentPostingList _docPostings, int docid) throws IOException { BlockDocumentPostingList docPostings = (BlockDocumentPostingList) _docPostings; for (String term : docPostings.termSet()) add(term, docid, docPostings.getFrequency(term), docPostings.getBlocks(term)); }
public void processTerm(String t) { // null means the term has been filtered out (eg stopwords) if (t != null) { //add term to thingy tree ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; if (++numOfTokensInBlock >= BLOCK_SIZE && blockId < MAX_BLOCKS) { numOfTokensInBlock = 0; blockId++; } } }
@Override public void write(final DataOutput out) throws IOException { WritableUtils.writeVInt(out, getNumberOfPointers()); try this.forEachTerm(new TObjectIntProcedure<String>()
@Override public void readFields(DataInput in) throws IOException { clear(); final int termCount = WritableUtils.readVInt(in); for(int i=0;i<termCount;i++) { final String term = Text.readString(in); final int freq = WritableUtils.readVInt(in); final int bf = WritableUtils.readVInt(in); insert(freq, term); if (bf == 0) continue; final int[] blocks = new int[bf]; blocks[0] = WritableUtils.readVInt(in)-1; for(int j=1;j<bf;j++) blocks[j] = WritableUtils.readVInt(in) - blocks[j-1]; term_blocks.put(term, new TIntHashSet(blocks)); } }
protected void createDocumentPostings(){ if (FieldScore.FIELDS_COUNT > 0) termsInDocument = new BlockFieldDocumentPostingList(FieldScore.FIELDS_COUNT); else termsInDocument = new BlockDocumentPostingList(); blockId = 0; numOfTokensInBlock = 0; }
protected void createDocumentPostings(){ if (FieldScore.FIELDS_COUNT > 0) termsInDocument = new BlockFieldDocumentPostingList(FieldScore.FIELDS_COUNT); else termsInDocument = new BlockDocumentPostingList(); blockId = 0; numOfTokensInBlock = 0; }
public void processTerm(String t) { // null means the term has been filtered out (eg stopwords) if (t != null) { //add term to thingy tree ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; if (++numOfTokensInBlock >= BLOCK_SIZE && blockId < MAX_BLOCKS) { numOfTokensInBlock = 0; blockId++; } } }
public void processTerm(String t) { if (t== null) return; // current term is a delimiter if (blockDelimiterTerms.contains(t)) { // delimiters should also be indexed if (indexDelimiters) { ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); if (countDelimiters) numOfTokensInDocument++; } numOfTokensInBlock = 0; blockId++; } else { // index non-delimiter term ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; } }
public void processTerm(String t) { if (t== null) return; // current term is a delimiter if (blockDelimiterTerms.contains(t)) { // delimiters should also be indexed if (indexDelimiters) { ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); if (countDelimiters) numOfTokensInDocument++; } numOfTokensInBlock = 0; blockId++; } else { // index non-delimiter term ((BlockDocumentPostingList)termsInDocument).insert(t, blockId); numOfTokensInDocument++; } }
/** Insert a term into this document, occurs at given block id */ public void insert(String t, int blockId) { insert(t); TIntHashSet blockids = null; if ((blockids = term_blocks.get(t)) == null) { term_blocks.put(t, blockids = new TIntHashSet(/*TODO */)); } blockids.add(blockId); blockCount++; }