org.apache.lucene.index.TermsEnum.docFreq java code examples

@Override
public int docFreq() throws IOException {
 int sum = 0;
 for(int i=0;i<numTop;i++) {
  sum += top[i].terms.docFreq();
 }
 return sum;
}

@Override
public int docFreq() throws IOException {
 return actualEnum.docFreq();
}

@Override
public int docFreq() throws IOException {
 return tenum.docFreq();
}

@Override
public int docFreq() throws IOException {
 return in.docFreq();
}

/** Returns an expected cost in simple operations
 *  of processing the occurrences of a term
 *  in a document that contains the term.
 *  This is for use by {@link TwoPhaseIterator#matchCost} implementations.
 *  <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
 *  @param termsEnum The term is the term at which this TermsEnum is positioned.
 */
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
 int docFreq = termsEnum.docFreq();
 assert docFreq > 0;
 long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
 float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
 return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}

/** Returns an expected cost in simple operations
 *  of processing the occurrences of a term
 *  in a document that contains the term.
 *  <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
 *  @param termsEnum The term is the term at which this TermsEnum is positioned.
 *  <p>
 *  This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
 *  <br>
 *  TODO: keep only a single copy of this method and the constants used in it
 *  when SpanTermQuery moves to the o.a.l.search package.
 */
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
 int docFreq = termsEnum.docFreq();
 assert docFreq > 0;
 long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
 float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
 return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}

@Override
public void verify( NodePropertyAccessor accessor, int[] propKeyIds ) throws IndexEntryConflictException, IOException
{
  for ( String field : allFields() )
  {
    if ( LuceneDocumentStructure.useFieldForUniquenessVerification( field ) )
    {
      TermsEnum terms = LuceneDocumentStructure.originalTerms( termsForField( field ), field );
      BytesRef termsRef;
      while ( (termsRef = terms.next()) != null )
      {
        if ( terms.docFreq() > 1 )
        {
          TermQuery query = new TermQuery( new Term( field, termsRef ) );
          searchForDuplicates( query, accessor, propKeyIds, terms.docFreq() );
        }
      }
    }
  }
}

private static Terms getTerms( String value, int frequency ) throws IOException
{
  TermsEnum termsEnum = mock( TermsEnum.class );
  Terms terms = mock( Terms.class );
  when( terms.iterator() ).thenReturn( termsEnum );
  when( termsEnum.next() ).thenReturn( new BytesRef( value.getBytes() ) ).thenReturn( null );
  when( termsEnum.docFreq() ).thenReturn( frequency );
  return terms;
}

while ( (termsRef = terms.next()) != null )
  if ( terms.docFreq() > 1 )
    collector.init( terms.docFreq() );
    searcher.search( new TermQuery( new Term( field, termsRef ) ), collector );

 TermAndState(String field, TermsEnum termsEnum) throws IOException {
  this.field = field;
  this.termsEnum = termsEnum;
  this.term = BytesRef.deepCopyOf(termsEnum.term());
  this.state = termsEnum.termState();
  this.docFreq = termsEnum.docFreq();
  this.totalTermFreq = termsEnum.totalTermFreq();
 }
}

/** Try to collect terms from the given terms enum and return true iff all
 *  terms could be collected. If {@code false} is returned, the enum is
 *  left positioned on the next term. */
private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
 final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
 for (int i = 0; i < threshold; ++i) {
  final BytesRef term = termsEnum.next();
  if (term == null) {
   return true;
  }
  TermState state = termsEnum.termState();
  terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
 }
 return termsEnum.next() == null;
}

public void listTokens(int freq) throws IOException {
  IndexReader ireader = null;
  TermsEnum iter = null;
  Terms terms;
  try {
    ireader = DirectoryReader.open(indexDirectory);
    int numDocs = ireader.numDocs();
    if (numDocs > 0) {
      Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
      terms = uFields.terms(QueryBuilder.DEFS);
      iter = terms.iterator(); // init uid iterator
    }
    while (iter != null && iter.term() != null) {
      //if (iter.term().field().startsWith("f")) {
      if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
        LOGGER.warning(iter.term().utf8ToString());
      }
      BytesRef next = iter.next();
      if (next==null) {iter=null;}
    }
  } finally {
    if (ireader != null) {
      try {
        ireader.close();
      } catch (IOException e) {
        LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
      }
    }
  }
}

@Override
public final int docFreq(Term term) throws IOException {
 final Terms terms = terms(term.field());
 if (terms == null) {
  return 0;
 }
 final TermsEnum termsEnum = terms.iterator();
 if (termsEnum.seekExact(term.bytes())) {
  return termsEnum.docFreq();
 } else {
  return 0;
 }
}

 t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
} else {
 visitedTerms.put(st.bytes.get(), st);
 assert st.termState.docFreq() == 0;
 st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
 stQueue.offer(st);

  score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs);
} else if (needPositionsAndFrequencies) {
  score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);

while ( (termsRef = termsEnum.next()) != null )
  sampler.include( termsRef.utf8ToString(), termsEnum.docFreq() );
  checkCancellation();

final int docFreq = termsEnum.docFreq();
if (docFreq <= 0) {
 throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
 int expectedDocFreq = termsEnum.docFreq();
 PostingsEnum d = termsEnum.postings(null, PostingsEnum.NONE);
 int docFreq = 0;

/**
 * Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the
 * given {@link Term}. This method will lookup the given term in all context's leaf readers 
 * and register each of the readers containing the term in the returned {@link TermContext}
 * using the leaf reader's ordinal.
 * <p>
 * Note: the given context must be a top-level context.
 */
public static TermContext build(IndexReaderContext context, Term term)
  throws IOException {
 assert context != null && context.isTopLevel;
 final String field = term.field();
 final BytesRef bytes = term.bytes();
 final TermContext perReaderTermState = new TermContext(context);
 //if (DEBUG) System.out.println("prts.build term=" + term);
 for (final LeafReaderContext ctx : context.leaves()) {
  //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
  final Terms terms = ctx.reader().terms(field);
  if (terms != null) {
   final TermsEnum termsEnum = terms.iterator();
   if (termsEnum.seekExact(bytes)) { 
    final TermState termState = termsEnum.termState();
    //if (DEBUG) System.out.println("    found");
    perReaderTermState.register(termState, ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
   }
  }
 }
 return perReaderTermState;
}

public long internalFrequency(BytesRef term) throws IOException {
  if (termsEnum.seekExact(term)) {
    return useTotalTermFrequency ? termsEnum.totalTermFreq() : termsEnum.docFreq();
  }
  return 0;
}

private void writeTermStatistics(TermsEnum topLevelIterator) throws IOException {
  int docFreq = topLevelIterator.docFreq();
  assert (docFreq >= -1);
  writePotentiallyNegativeVInt(docFreq);
  long ttf = topLevelIterator.totalTermFreq();
  assert (ttf >= -1);
  writePotentiallyNegativeVLong(ttf);
}

Javadoc

Returns the number of documents containing the current term. Do not call this when the enum is unpositioned. SeekStatus#END.

Popular methods of TermsEnum

next
totalTermFreq
Returns the total number of occurrences of this term across all documents (the sum of the freq() for
term
Returns current term. Do not call this when the enum is unpositioned.
seekExact
postings
seekCeil
Seeks to the specified term, if it exists, or to the next (ceiling) term. Returns SeekStatus to indi
ord
Returns ordinal position for current term. This is an optional method (the codec may throw Unsupport
attributes
Returns the related attributes.
termState
Expert: Returns the TermsEnums internal state to position the TermsEnum without re-seeking the term
docs
Get DocsEnum for the current term, with control over whether freqs are required. Do not call this wh
docsAndPositions
Get DocsAndPositionsEnum for the current term, with control over whether offsets and payloads are re
getComparator

Popular in Java

Making http post requests using okhttp
compareTo (BigDecimal)
runOnUiThread (Activity)
notifyDataSetChanged (ArrayAdapter)
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top Vim plugins

How to use docFreqmethodin org.apache.lucene.index.TermsEnum

Best Java code snippets using org.apache.lucene.index.TermsEnum.docFreq (Showing top 20 results out of 414)

How to use
docFreq
method
in
org.apache.lucene.index.TermsEnum