public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) { this.terms = terms; this.numTerms = terms.bytesHash.size(); sortedTermIDs = terms.sortedTermIDs; assert sortedTermIDs != null; postingsArray = (FreqProxPostingsArray) terms.postingsArray; }
/** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to * the real term vectors files in the Directory. */ @Override void finish() { if (!doVectors || bytesHash.size() == 0) { return; } termsWriter.addFieldToFlush(this); }
@Override Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException { assert sortField.getType().equals(SortField.Type.STRING); assert finalSortedValues == null && finalOrdMap == null &&finalOrds == null; int valueCount = hash.size(); finalSortedValues = hash.sort(); finalOrds = pending.build(); finalOrdMap = new int[valueCount]; for (int ord = 0; ord < valueCount; ord++) { finalOrdMap[finalSortedValues[ord]] = ord; } final SortedDocValues docValues = new BufferedSortedDocValues(hash, valueCount, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator()); return Sorter.getDocComparator(maxDoc, sortField, () -> docValues, () -> null); }
@Override Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException { assert sortField instanceof SortedSetSortField; assert finalOrds == null && finalOrdCounts == null && finalSortedValues == null && finalOrdMap == null; int valueCount = hash.size(); finalOrds = pending.build(); finalOrdCounts = pendingCounts.build(); finalSortedValues = hash.sort(); finalOrdMap = new int[valueCount]; for (int ord = 0; ord < valueCount; ord++) { finalOrdMap[finalSortedValues[ord]] = ord; } SortedSetSortField sf = (SortedSetSortField) sortField; final SortedSetDocValues dvs = new BufferedSortedSetDocValues(finalSortedValues, finalOrdMap, hash, finalOrds, finalOrdCounts, maxCount, docsWithField.iterator()); return Sorter.getDocComparator(maxDoc, sf, () -> SortedSetSelector.wrap(dvs, sf.getSelector()), () -> null); }
if (perField.bytesHash.size() > 0) { perField.sortPostings(); assert perField.fieldInfo.getIndexOptions() != IndexOptions.NONE;
@Override public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final B builder = getTopLevelBuilder(); final ParallelArraysTermCollector col = new ParallelArraysTermCollector(); collectTerms(reader, query, col); final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(); final float[] boost = col.array.boost; final TermContext[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef())); assert reader.docFreq(term) == termStates[pos].docFreq(); addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]); } } return build(builder); }
@Override public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer) throws IOException { final int valueCount = hash.size(); final PackedLongValues ords; final int[] sortedValues;
@Override public void flush(SegmentWriteState state, Sorter.DocMap sortMap, DocValuesConsumer dvConsumer) throws IOException { final int valueCount = hash.size(); final PackedLongValues ords; final PackedLongValues ordCounts;
if (bytesHash.size() != 0) {
final int numPostings = bytesHash.size();
public FreqProxTermsEnum(FreqProxTermsWriterPerField terms) { this.terms = terms; this.numTerms = terms.bytesHash.size(); sortedTermIDs = terms.sortedTermIDs; assert sortedTermIDs != null; postingsArray = (FreqProxPostingsArray) terms.postingsArray; }
@Override public long getSumDocFreq() { // each term has df=1 return info.terms.size(); }
/** * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @throws IOException if an {@link IOException} occurs; */ public StemmerOverrideMap build() throws IOException { ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>( FST.INPUT_TYPE.BYTE4, outputs); final int[] sort = hash.sort(); IntsRefBuilder intsSpare = new IntsRefBuilder(); final int size = hash.size(); BytesRef spare = new BytesRef(); for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.get(id, spare); intsSpare.copyUTF8Bytes(bytesRef); builder.add(intsSpare.get(), new BytesRef(outputValues.get(id))); } return new StemmerOverrideMap(builder.finish(), ignoreCase); }
/** Called once per field per document if term vectors * are enabled, to write the vectors to * RAMOutputStream, which is then quickly flushed to * the real term vectors files in the Directory. */ @Override void finish() { if (!doVectors || bytesHash.size() == 0) { return; } termsWriter.addFieldToFlush(this); }
@Override public void seekExact(long ord) { assert ord < info.terms.size(); termUpto = (int) ord; info.terms.get(info.sortedTerms[termUpto], br); }
@Override public void seekExact(long ord) { assert ord < info.terms.size(); termUpto = (int) ord; info.terms.get(info.sortedTerms[termUpto], br); }
@Override public boolean seekExact(BytesRef text) { termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator()); return termUpto >= 0; }
@Override public void setNextReader(LeafReaderContext readerContext) throws IOException { this.docValues = DocValues.getSorted(readerContext.reader(), field); this.ordsToGroupIds.clear(); BytesRef scratch = new BytesRef(); for (int i = 0; i < values.size(); i++) { values.get(i, scratch); int ord = this.docValues.lookupTerm(scratch); if (ord >= 0) ordsToGroupIds.put(ord, i); } }
private synchronized void materialize() { if(_strings != null) return; // we need the tuple (datatype, ref.length) to handle the data final AsStrings proc = new AsStrings(_bytesRefs.size(), _dataType); process(_bytesRefs, proc); _strings = proc.getList(); _bytesRefs = null; }
SeekingTermSetTermsEnum(TermsEnum tenum, BytesRefTermsSet termsSet) { super(tenum); this.terms = termsSet.getBytesRefHash(); this.ords = this.terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); lastElement = terms.size() - 1; lastTerm = terms.get(ords[lastElement], new BytesRef()); seekTerm = terms.get(ords[upto], spare); }