Refine search
@Override public final Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException { final B builder = getTopLevelBuilder(); final ParallelArraysTermCollector col = new ParallelArraysTermCollector(); collectTerms(reader, query, col); final int size = col.terms.size(); if (size > 0) { final int sort[] = col.terms.sort(); final float[] boost = col.array.boost; final TermContext[] termStates = col.array.termState; for (int i = 0; i < size; i++) { final int pos = sort[i]; final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef())); assert reader.docFreq(term) == termStates[pos].docFreq(); addClause(builder, term, termStates[pos].docFreq(), boost[pos], termStates[pos]); } } return build(builder); }
private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID-1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * Integer.BYTES); } pending.add(termID); updateBytesUsed(); }
/** streamCount: how many streams this field stores per term. * E.g. doc(+freq) is 1 stream, prox+offset is a second. */ public TermsHashPerField(int streamCount, FieldInvertState fieldState, TermsHash termsHash, TermsHashPerField nextPerField, FieldInfo fieldInfo) { intPool = termsHash.intPool; bytePool = termsHash.bytePool; termBytePool = termsHash.termBytePool; docState = termsHash.docState; this.termsHash = termsHash; bytesUsed = termsHash.bytesUsed; this.fieldState = fieldState; this.streamCount = streamCount; numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; this.nextPerField = nextPerField; PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); }
@Override Sorter.DocComparator getDocComparator(int maxDoc, SortField sortField) throws IOException { assert sortField.getType().equals(SortField.Type.STRING); assert finalSortedValues == null && finalOrdMap == null &&finalOrds == null; int valueCount = hash.size(); finalSortedValues = hash.sort(); finalOrds = pending.build(); finalOrdMap = new int[valueCount]; for (int ord = 0; ord < valueCount; ord++) { finalOrdMap[finalSortedValues[ord]] = ord; } final SortedDocValues docValues = new BufferedSortedDocValues(hash, valueCount, finalOrds, finalSortedValues, finalOrdMap, docsWithField.iterator()); return Sorter.getDocComparator(maxDoc, sortField, () -> docValues, () -> null); }
@Override public void setNextReader(LeafReaderContext readerContext) throws IOException { this.docValues = DocValues.getSorted(readerContext.reader(), field); this.ordsToGroupIds.clear(); BytesRef scratch = new BytesRef(); for (int i = 0; i < values.size(); i++) { values.get(i, scratch); int ord = this.docValues.lookupTerm(scratch); if (ord >= 0) ordsToGroupIds.put(ord, i); } }
@Override protected void addAll(TermsSet terms) { if (!(terms instanceof BytesRefTermsSet)) { throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected."); } BytesRefHash input = ((BytesRefTermsSet) terms).set; BytesRef reusable = new BytesRef(); for (int i = 0; i < input.size(); i++) { input.get(i, reusable); set.add(reusable); } }
private void readFromBytes(BytesRef bytes) { // Read pruned flag this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false); // Read size fo the set int size = Bytes.readInt(bytes); // Read terms bytesUsed = Counter.newCounter(); pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); set = new BytesRefHash(pool); BytesRef reusable = new BytesRef(); for (int i = 0; i < size; i++) { Bytes.readBytesRef(bytes, reusable); set.add(reusable); } }
private SortedDocValues getSortedDocValues(String field, DocValuesType docValuesType) { Info info = getInfoForExpectedDocValuesType(field, docValuesType); if (info != null) { BytesRef value = info.binaryProducer.dvBytesValuesSet.get(0, new BytesRef()); return sortedDocValues(value); } else { return null; } }
@Override public BytesRef lookupOrd(int ord) { assert ord >= 0 && ord < sortedValues.length; assert sortedValues[ord] >= 0 && sortedValues[ord] < sortedValues.length; hash.get(sortedValues[ord], scratch); return scratch; }
private final int binarySearch(BytesRef b, BytesRef bytesRef, int low, int high, BytesRefHash hash, int[] ords) { int mid = 0; while (low <= high) { mid = (low + high) >>> 1; hash.get(ords[mid], bytesRef); final int cmp = bytesRef.compareTo(b); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return mid; } } assert bytesRef.compareTo(b) != 0; return -(low + 1); }
/** Collapse the hash table and sort in-place; also sets * this.sortedTermIDs to the results */ public int[] sortPostings() { sortedTermIDs = bytesHash.sort(BytesRef.getUTF8SortedAsUnicodeComparator()); return sortedTermIDs; }
@Override public SeekStatus seekCeil(BytesRef text) { termUpto = binarySearch(text, br, 0, info.terms.size()-1, info.terms, info.sortedTerms, BytesRef.getUTF8SortedAsUnicodeComparator()); if (termUpto < 0) { // not found; choose successor termUpto = -termUpto-1; if (termUpto >= info.terms.size()) { return SeekStatus.END; } else { info.terms.get(info.sortedTerms[termUpto], br); return SeekStatus.NOT_FOUND; } } else { return SeekStatus.FOUND; } }