@Override public long getSumTotalTermFreq() throws IOException { long sum = 0; for(Terms terms : subs) { final long v = terms.getSumTotalTermFreq(); if (v == -1) { return -1; } sum += v; } return sum; }
@Override public long getSumTotalTermFreq() throws IOException { return in.getSumTotalTermFreq(); }
@Override public final long getSumTotalTermFreq(String field) throws IOException { final Terms terms = terms(field); if (terms == null) { return 0; } return terms.getSumTotalTermFreq(); }
/** * Expert: returns additional information about this Terms instance * for debugging purposes. */ public Object getStats() throws IOException { StringBuilder sb = new StringBuilder(); sb.append("impl=" + getClass().getSimpleName()); sb.append(",size=" + size()); sb.append(",docCount=" + getDocCount()); sb.append(",sumTotalTermFreq=" + getSumTotalTermFreq()); sb.append(",sumDocFreq=" + getSumDocFreq()); return sb.toString(); } }
/** * Returns {@link CollectionStatistics} for a field. * * This can be overridden for example, to return a field's statistics * across a distributed collection. * @lucene.experimental */ public CollectionStatistics collectionStatistics(String field) throws IOException { final int docCount; final long sumTotalTermFreq; final long sumDocFreq; assert field != null; Terms terms = MultiFields.getTerms(reader, field); if (terms == null) { docCount = 0; sumTotalTermFreq = 0; sumDocFreq = 0; } else { docCount = terms.getDocCount(); sumTotalTermFreq = terms.getSumTotalTermFreq(); sumDocFreq = terms.getSumDocFreq(); } return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); } }
if (terms.getSumTotalTermFreq() != -1) { throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)"); final long v = fields.terms(field).getSumTotalTermFreq(); if (v != -1 && sumTotalTermFreq != v) { throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq);
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException { this.field = field; if (terms == null) { throw new IllegalArgumentException("Field: [" + field + "] does not exist"); } this.terms = terms; final long vocSize = terms.getSumTotalTermFreq(); this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize; this.useTotalTermFreq = vocSize != -1; long numTerms = terms.size(); // -1 cannot be used as value, because scoreUnigram(...) can then divide by 0 if vocabluarySize is 1. // -1 is returned when terms is a MultiTerms instance. this.numTerms = vocabluarySize + numTerms > 1 ? numTerms : 0; this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now this.reader = reader; this.realWordLikelyhood = realWordLikelyHood; this.separator = separator; }
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException { if (terms == null) { throw new IllegalArgumentException("generator field [" + field + "] doesn't exist"); } this.spellchecker = spellchecker; this.field = field; this.numCandidates = numCandidates; this.suggestMode = suggestMode; this.reader = reader; final long dictSize = terms.getSumTotalTermFreq(); this.useTotalTermFrequency = dictSize != -1; this.dictSize = dictSize == -1 ? reader.maxDoc() : dictSize; this.preFilter = preFilter; this.postFilter = postFilter; this.nonErrorLikelihood = nonErrorLikelihood; float thresholdFrequency = spellchecker.getThresholdFrequency(); this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency); termsEnum = terms.iterator(); }
private void buildFieldStatistics(XContentBuilder builder, Terms curTerms) throws IOException { long sumDocFreq = curTerms.getSumDocFreq(); int docCount = curTerms.getDocCount(); long sumTotalTermFrequencies = curTerms.getSumTotalTermFreq(); if (docCount >= 0) { assert ((sumDocFreq >= 0)) : "docCount >= 0 but sumDocFreq ain't!"; assert ((sumTotalTermFrequencies >= 0)) : "docCount >= 0 but sumTotalTermFrequencies ain't!"; builder.startObject(FieldStrings.FIELD_STATISTICS); builder.field(FieldStrings.SUM_DOC_FREQ, sumDocFreq); builder.field(FieldStrings.DOC_COUNT, docCount); builder.field(FieldStrings.SUM_TTF, sumTotalTermFrequencies); builder.endObject(); } else if (docCount == -1) { // this should only be -1 if the field // statistics were not requested at all. In // this case all 3 values should be -1 assert ((sumDocFreq == -1)) : "docCount was -1 but sumDocFreq ain't!"; assert ((sumTotalTermFrequencies == -1)) : "docCount was -1 but sumTotalTermFrequencies ain't!"; } else { throw new IllegalStateException( "Something is wrong with the field statistics of the term vector request: Values are " + "\n" + FieldStrings.SUM_DOC_FREQ + " " + sumDocFreq + "\n" + FieldStrings.DOC_COUNT + " " + docCount + "\n" + FieldStrings.SUM_TTF + " " + sumTotalTermFrequencies); } }
private void writeFieldStatistics(Terms topLevelTerms) throws IOException { long sttf = topLevelTerms.getSumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = topLevelTerms.getSumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = topLevelTerms.getDocCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
@Override public long getSumTotalTermFreq() throws IOException { long sum = 0; for(Terms terms : subs) { final long v = terms.getSumTotalTermFreq(); if (v == -1) { return -1; } sum += v; } return sum; }
@Override public long getSumTotalTermFreq() throws IOException { return delegateTerms.getSumTotalTermFreq(); }
public long totalTermFreq() { try { return terms.getSumTotalTermFreq(); } catch (IOException e) { e.printStackTrace(); } return 0; }
public long totalTermFreq() { try { return terms.getSumTotalTermFreq(); } catch (IOException e) { e.printStackTrace(); } return 0; }
@Override public final long getSumTotalTermFreq(String field) throws IOException { final Terms terms = terms(field); if (terms == null) { return 0; } return terms.getSumTotalTermFreq(); }
@Override public final long getSumTotalTermFreq(String field) throws IOException { final Terms terms = terms(field); if (terms == null) { return 0; } return terms.getSumTotalTermFreq(); }
private void writeFieldStatistics(Terms topLevelTerms) throws IOException { long sttf = topLevelTerms.getSumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = topLevelTerms.getSumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = topLevelTerms.getDocCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
@Override public FieldStats stats(Terms terms, int maxDoc) throws IOException { long minValue = NumericUtils.getMinInt(terms); long maxValue = NumericUtils.getMaxInt(terms); return new FieldStats.Long( maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue ); } }
private void writeFieldStatistics(Terms topLevelTerms) throws IOException { long sttf = topLevelTerms.getSumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = topLevelTerms.getSumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = topLevelTerms.getDocCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
@Override public FieldStats stats(Terms terms, int maxDoc) throws IOException { float minValue = NumericUtils.sortableIntToFloat(NumericUtils.getMinInt(terms)); float maxValue = NumericUtils.sortableIntToFloat(NumericUtils.getMaxInt(terms)); return new FieldStats.Float( maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue ); } }