/** Returns {@link PostingsEnum} for the specified field and * term, with control over whether freqs are required. * Some codecs may be able to optimize their * implementation when freqs are not required. This will * return null if the field or term does not exist. See {@link * TermsEnum#postings(PostingsEnum,int)}.*/ public static PostingsEnum getTermDocsEnum(IndexReader r, String field, BytesRef term, int flags) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term)) { return termsEnum.postings(null, flags); } } return null; }
/** Returns {@link PostingsEnum} for the specified * field and term, with control over whether offsets and payloads are * required. Some codecs may be able to optimize * their implementation when offsets and/or payloads are not * required. This will return null if the field or term does not * exist. See {@link TermsEnum#postings(PostingsEnum,int)}. */ public static PostingsEnum getTermPositionsEnum(IndexReader r, String field, BytesRef term, int flags) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(term)) { return termsEnum.postings(null, flags); } } return null; }
/** * Returns {@link CollectionStatistics} for a field. * * This can be overridden for example, to return a field's statistics * across a distributed collection. * @lucene.experimental */ public CollectionStatistics collectionStatistics(String field) throws IOException { final int docCount; final long sumTotalTermFreq; final long sumDocFreq; assert field != null; Terms terms = MultiFields.getTerms(reader, field); if (terms == null) { docCount = 0; sumTotalTermFreq = 0; sumDocFreq = 0; } else { docCount = terms.getDocCount(); sumTotalTermFreq = terms.getSumTotalTermFreq(); sumDocFreq = terms.getSumDocFreq(); } return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); } }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException { this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator); }
Terms terms = MultiFields.getTerms(indexReader, field); CharsRefBuilder spare = new CharsRefBuilder(); if (terms != null) {
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException { this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator); }
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException { this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator); }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException { this(spellchecker, field, suggestMode, reader, nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field)); }
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException { this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator); }
PhraseSuggestionContext.DirectCandidateGenerator generator = generators.get(i); DirectSpellChecker directSpellChecker = generator.createDirectSpellChecker(); Terms terms = MultiFields.getTerms(indexReader, generator.field()); if (terms != null) { gens.add(new DirectCandidateGenerator(directSpellChecker, generator.field(), generator.suggestMode(), final Terms suggestTerms = MultiFields.getTerms(indexReader, suggestField); if (gens.size() > 0 && suggestTerms != null) { final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(),
private long getSumTermFrequency(IndexReader reader, String fieldName) { Terms collectionTermVector = null; try { collectionTermVector = MultiFields.getTerms(reader, fieldName); long totalTermFreq = collectionTermVector.getSumTotalTermFreq(); return totalTermFreq; } catch (IOException e) { LOG.warn("Unable to get total term frequency, it might not be indexed"); } return 0; }
@Override public final InputIterator getEntryIterator() throws IOException { final Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { return new InputIterator.InputIteratorWrapper(terms.iterator()); } else { return InputIterator.EMPTY; } } }
HighFrequencyIterator() throws IOException { Terms terms = MultiFields.getTerms(reader, field); if (terms != null) { termsEnum = terms.iterator(); } else { termsEnum = null; } minNumDocs = (int)(thresh * (float)reader.numDocs()); }
/** * Returns the average number of unique terms times the number of docs belonging to the input class * * @param term the class term * @return the average number of unique terms * @throws java.io.IOException If there is a low-level I/O error */ private double getTextTermFreqForClass(Term term, String fieldName) throws IOException { double avgNumberOfUniqueTerms; Terms terms = MultiFields.getTerms(indexReader, fieldName); long numPostings = terms.getSumDocFreq(); // number of term/doc pairs avgNumberOfUniqueTerms = numPostings / (double) terms.getDocCount(); // avg # of unique terms per doc int docsWithC = indexReader.docFreq(term); return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c }
private DataPoint convertToDataPoint(Document doc, RerankerContext<T> context) { Terms terms = null; try { terms = MultiFields.getTerms(context.getIndexSearcher().getIndexReader(), this.termsField); } catch (IOException e) { LOG.error("Unable to retrieve term vectors"); } float[] features = this.extractors.extractAll(doc, terms, context); String rankLibEntryString = BaseFeatureExtractor.constructOutputString("0", 0, "0", features); DataPoint dp = new DataPoint(rankLibEntryString); return dp; }