Tabnine Logo
TermsEnum.docFreq
Code IndexAdd Tabnine to your IDE (free)

How to use
docFreq
method
in
org.apache.lucene.index.TermsEnum

Best Java code snippets using org.apache.lucene.index.TermsEnum.docFreq (Showing top 20 results out of 414)

origin: org.apache.lucene/lucene-core

@Override
public int docFreq() throws IOException {
 int sum = 0;
 for(int i=0;i<numTop;i++) {
  sum += top[i].terms.docFreq();
 }
 return sum;
}
origin: org.apache.lucene/lucene-core

@Override
public int docFreq() throws IOException {
 return actualEnum.docFreq();
}
origin: org.apache.lucene/lucene-core

@Override
public int docFreq() throws IOException {
 return tenum.docFreq();
}
origin: org.apache.lucene/lucene-core

@Override
public int docFreq() throws IOException {
 return in.docFreq();
}
origin: org.apache.lucene/lucene-core

/** Returns an expected cost in simple operations
 *  of processing the occurrences of a term
 *  in a document that contains the term.
 *  This is for use by {@link TwoPhaseIterator#matchCost} implementations.
 *  <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
 *  @param termsEnum The term is the term at which this TermsEnum is positioned.
 */
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
 int docFreq = termsEnum.docFreq();
 assert docFreq > 0;
 long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
 float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
 return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}
origin: org.apache.lucene/lucene-core

/** Returns an expected cost in simple operations
 *  of processing the occurrences of a term
 *  in a document that contains the term.
 *  <br>This may be inaccurate when {@link TermsEnum#totalTermFreq()} is not available.
 *  @param termsEnum The term is the term at which this TermsEnum is positioned.
 *  <p>
 *  This is a copy of org.apache.lucene.search.PhraseQuery.termPositionsCost().
 *  <br>
 *  TODO: keep only a single copy of this method and the constants used in it
 *  when SpanTermQuery moves to the o.a.l.search package.
 */
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
 int docFreq = termsEnum.docFreq();
 assert docFreq > 0;
 long totalTermFreq = termsEnum.totalTermFreq(); // -1 when not available
 float expOccurrencesInMatchingDoc = (totalTermFreq < docFreq) ? 1 : (totalTermFreq / (float) docFreq);
 return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
}
origin: neo4j/neo4j

@Override
public void verify( NodePropertyAccessor accessor, int[] propKeyIds ) throws IndexEntryConflictException, IOException
{
  for ( String field : allFields() )
  {
    if ( LuceneDocumentStructure.useFieldForUniquenessVerification( field ) )
    {
      TermsEnum terms = LuceneDocumentStructure.originalTerms( termsForField( field ), field );
      BytesRef termsRef;
      while ( (termsRef = terms.next()) != null )
      {
        if ( terms.docFreq() > 1 )
        {
          TermQuery query = new TermQuery( new Term( field, termsRef ) );
          searchForDuplicates( query, accessor, propKeyIds, terms.docFreq() );
        }
      }
    }
  }
}
origin: neo4j/neo4j

private static Terms getTerms( String value, int frequency ) throws IOException
{
  TermsEnum termsEnum = mock( TermsEnum.class );
  Terms terms = mock( Terms.class );
  when( terms.iterator() ).thenReturn( termsEnum );
  when( termsEnum.next() ).thenReturn( new BytesRef( value.getBytes() ) ).thenReturn( null );
  when( termsEnum.docFreq() ).thenReturn( frequency );
  return terms;
}
origin: neo4j/neo4j

while ( (termsRef = terms.next()) != null )
  if ( terms.docFreq() > 1 )
    collector.init( terms.docFreq() );
    searcher.search( new TermQuery( new Term( field, termsRef ) ), collector );
origin: org.apache.lucene/lucene-core

 TermAndState(String field, TermsEnum termsEnum) throws IOException {
  this.field = field;
  this.termsEnum = termsEnum;
  this.term = BytesRef.deepCopyOf(termsEnum.term());
  this.state = termsEnum.termState();
  this.docFreq = termsEnum.docFreq();
  this.totalTermFreq = termsEnum.totalTermFreq();
 }
}
origin: org.apache.lucene/lucene-core

/** Try to collect terms from the given terms enum and return true iff all
 *  terms could be collected. If {@code false} is returned, the enum is
 *  left positioned on the next term. */
private boolean collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
 final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
 for (int i = 0; i < threshold; ++i) {
  final BytesRef term = termsEnum.next();
  if (term == null) {
   return true;
  }
  TermState state = termsEnum.termState();
  terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, termsEnum.docFreq(), termsEnum.totalTermFreq()));
 }
 return termsEnum.next() == null;
}
origin: oracle/opengrok

public void listTokens(int freq) throws IOException {
  IndexReader ireader = null;
  TermsEnum iter = null;
  Terms terms;
  try {
    ireader = DirectoryReader.open(indexDirectory);
    int numDocs = ireader.numDocs();
    if (numDocs > 0) {
      Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0);
      terms = uFields.terms(QueryBuilder.DEFS);
      iter = terms.iterator(); // init uid iterator
    }
    while (iter != null && iter.term() != null) {
      //if (iter.term().field().startsWith("f")) {
      if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) {
        LOGGER.warning(iter.term().utf8ToString());
      }
      BytesRef next = iter.next();
      if (next==null) {iter=null;}
    }
  } finally {
    if (ireader != null) {
      try {
        ireader.close();
      } catch (IOException e) {
        LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e);
      }
    }
  }
}
origin: org.apache.lucene/lucene-core

@Override
public final int docFreq(Term term) throws IOException {
 final Terms terms = terms(term.field());
 if (terms == null) {
  return 0;
 }
 final TermsEnum termsEnum = terms.iterator();
 if (termsEnum.seekExact(term.bytes())) {
  return termsEnum.docFreq();
 } else {
  return 0;
 }
}
origin: org.apache.lucene/lucene-core

 t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
} else {
 visitedTerms.put(st.bytes.get(), st);
 assert st.termState.docFreq() == 0;
 st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
 stQueue.offer(st);
origin: oracle/opengrok

  score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs);
} else if (needPositionsAndFrequencies) {
  score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);
origin: neo4j/neo4j

while ( (termsRef = termsEnum.next()) != null )
  sampler.include( termsRef.utf8ToString(), termsEnum.docFreq() );
  checkCancellation();
origin: org.apache.lucene/lucene-core

final int docFreq = termsEnum.docFreq();
if (docFreq <= 0) {
 throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");
 int expectedDocFreq = termsEnum.docFreq();
 PostingsEnum d = termsEnum.postings(null, PostingsEnum.NONE);
 int docFreq = 0;
origin: org.apache.lucene/lucene-core

/**
 * Creates a {@link TermContext} from a top-level {@link IndexReaderContext} and the
 * given {@link Term}. This method will lookup the given term in all context's leaf readers 
 * and register each of the readers containing the term in the returned {@link TermContext}
 * using the leaf reader's ordinal.
 * <p>
 * Note: the given context must be a top-level context.
 */
public static TermContext build(IndexReaderContext context, Term term)
  throws IOException {
 assert context != null && context.isTopLevel;
 final String field = term.field();
 final BytesRef bytes = term.bytes();
 final TermContext perReaderTermState = new TermContext(context);
 //if (DEBUG) System.out.println("prts.build term=" + term);
 for (final LeafReaderContext ctx : context.leaves()) {
  //if (DEBUG) System.out.println("  r=" + leaves[i].reader);
  final Terms terms = ctx.reader().terms(field);
  if (terms != null) {
   final TermsEnum termsEnum = terms.iterator();
   if (termsEnum.seekExact(bytes)) { 
    final TermState termState = termsEnum.termState();
    //if (DEBUG) System.out.println("    found");
    perReaderTermState.register(termState, ctx.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
   }
  }
 }
 return perReaderTermState;
}
origin: org.elasticsearch/elasticsearch

public long internalFrequency(BytesRef term) throws IOException {
  if (termsEnum.seekExact(term)) {
    return useTotalTermFrequency ? termsEnum.totalTermFreq() : termsEnum.docFreq();
  }
  return 0;
}
origin: org.elasticsearch/elasticsearch

private void writeTermStatistics(TermsEnum topLevelIterator) throws IOException {
  int docFreq = topLevelIterator.docFreq();
  assert (docFreq >= -1);
  writePotentiallyNegativeVInt(docFreq);
  long ttf = topLevelIterator.totalTermFreq();
  assert (ttf >= -1);
  writePotentiallyNegativeVLong(ttf);
}
org.apache.lucene.indexTermsEnumdocFreq

Javadoc

Returns the number of documents containing the current term. Do not call this when the enum is unpositioned. SeekStatus#END.

Popular methods of TermsEnum

  • next
  • totalTermFreq
    Returns the total number of occurrences of this term across all documents (the sum of the freq() for
  • term
    Returns current term. Do not call this when the enum is unpositioned.
  • seekExact
  • postings
  • seekCeil
    Seeks to the specified term, if it exists, or to the next (ceiling) term. Returns SeekStatus to indi
  • ord
    Returns ordinal position for current term. This is an optional method (the codec may throw Unsupport
  • attributes
    Returns the related attributes.
  • termState
    Expert: Returns the TermsEnums internal state to position the TermsEnum without re-seeking the term
  • docs
    Get DocsEnum for the current term, with control over whether freqs are required. Do not call this wh
  • docsAndPositions
    Get DocsAndPositionsEnum for the current term, with control over whether offsets and payloads are re
  • getComparator
  • docsAndPositions,
  • getComparator,
  • docfreq

Popular in Java

  • Making http post requests using okhttp
  • scheduleAtFixedRate (ScheduledExecutorService)
  • getSystemService (Context)
  • onRequestPermissionsResult (Fragment)
  • Menu (java.awt)
  • Window (java.awt)
    A Window object is a top-level window with no borders and no menubar. The default layout for a windo
  • URI (java.net)
    A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
  • LinkedHashMap (java.util)
    LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
  • LinkedList (java.util)
    Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
  • SortedSet (java.util)
    SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
  • Top Vim plugins
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now