@Override public int docFreq() throws IOException { int sum = 0; for(int i=0;i<numTop;i++) { sum += top[i].terms.docFreq(); } return sum; }
@Override public int docFreq() throws IOException { return actualEnum.docFreq(); }
@Override public int docFreq() throws IOException { return tenum.docFreq(); }
@Override public int docFreq() throws IOException { return in.docFreq(); }
/** Returns an expected cost in simple operations * of processing the occurrences of a term * in a document that contains the term. * This is for use by {@link TwoPhaseIterator#matchCost} implementations. * <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available. * @param termsEnum The term is the term at which this TermsEnum is positioned. */ static float termPositionsCost(TermsEnum termsEnum) throws IOException { int docFreq = termsEnum.docFreq(); assert docFreq > 0; long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; }
/** Returns an expected cost in simple operations * of processing the occurrences of a term * in a document that contains the term. * <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available. * @param termsEnum The term is the term at which this TermsEnum is positioned. * <p> * This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost(). * <br> * TODO: keep only a single copy of this method and the constants used in it * when SpanTermQuery moves to the o.a.l.search package. */ static float termPositionsCost(TermsEnum termsEnum) throws IOException { int docFreq = termsEnum.docFreq(); assert docFreq > 0; long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq); return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS; }
@Override public void verify( NodePropertyAccessor accessor, int[] propKeyIds ) throws IndexEntryConflictException, IOException { for ( String field : allFields() ) { if ( LuceneDocumentStructure.useFieldForUniquenessVerification( field ) ) { TermsEnum terms = LuceneDocumentStructure.originalTerms( termsForField( field ), field ); BytesRef termsRef; while ( (termsRef = terms.next()) != null ) { if ( terms.docFreq() > 1 ) { TermQuery query = new TermQuery( new Term( field, termsRef ) ); searchForDuplicates( query, accessor, propKeyIds, terms.docFreq() ); } } } } }
private static Terms getTerms( String value, int frequency ) throws IOException { TermsEnum termsEnum = mock( TermsEnum.class ); Terms terms = mock( Terms.class ); when( terms.iterator() ).thenReturn( termsEnum ); when( termsEnum.next() ).thenReturn( new BytesRef( value.getBytes() ) ).thenReturn( null ); when( termsEnum.docFreq() ).thenReturn( frequency ); return terms; }
TermAndState(String field, TermsEnum termsEnum) throws IOException { this.field = field; this.termsEnum = termsEnum; this.term = BytesRef.deepCopyOf(termsEnum.term()); this.state = termsEnum.termState(); this.docFreq = termsEnum.docFreq(); this.totalTermFreq = termsEnum.totalTermFreq(); } }
/** Try to collect terms from the given terms enum and return true iff all * terms could be collected. If {@code false} is returned, the enum is * left positioned on the next term. */ private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException { final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount()); for (int i = 0; i < threshold; ++i) { final BytesRef term = termsEnum.next(); if (term == null) { return true; } TermState state = termsEnum.termState(); terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq())); } return termsEnum.next() == null; }
public void listTokens(int freq) throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { LOGGER.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next==null) {iter=null;} } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } }
@Override public final int docFreq(Term term) throws IOException { final Terms terms = terms(term.field()); if (terms == null) { return 0; } final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term.bytes())) { return termsEnum.docFreq(); } else { return 0; } }
score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs); } else if (needPositionsAndFrequencies) { score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);
while ( (termsRef = termsEnum.next()) != null ) sampler.include( termsRef.utf8ToString(), termsEnum.docFreq() ); checkCancellation();
/** * Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the * given {@link Term}. This method will lookup the given term in all context's leaf readers * and register each of the readers containing the term in the returned {@link TermContext} * using the leaf reader's ordinal. * <p> * Note: the given context must be a top-level context. */ public static TermContext build(IndexReaderContext context, Term term) throws IOException { assert context != null && context.isTopLevel; final String field = term.field(); final BytesRef bytes = term.bytes(); final TermContext perReaderTermState = new TermContext(context); //if (DEBUG) System.out.println("prts.build term=" + term); for (final LeafReaderContext ctx : context.leaves()) { //if (DEBUG) System.out.println(" r=" + leaves[i].reader); final Terms terms = ctx.reader().terms(field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(bytes)) { final TermState termState = termsEnum.termState(); //if (DEBUG) System.out.println(" found"); perReaderTermState.register(termState, ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } } } return perReaderTermState; }
public long internalFrequency(BytesRef term) throws IOException { if (termsEnum.seekExact(term)) { return useTotalTermFrequency ? termsEnum.totalTermFreq() : termsEnum.docFreq(); } return 0; }
private void writeTermStatistics(TermsEnum topLevelIterator) throws IOException { int docFreq = topLevelIterator.docFreq(); assert (docFreq >= -1); writePotentiallyNegativeVInt(docFreq); long ttf = topLevelIterator.totalTermFreq(); assert (ttf >= -1); writePotentiallyNegativeVLong(ttf); }