Refine search
@Override public BytesRef subtract(BytesRef output, BytesRef inc) { assert output != null; assert inc != null; if (inc == NO_OUTPUT) { // no prefix removed return output; } else { assert StringHelper.startsWith(output, inc); if (inc.length == output.length) { // entire output removed return NO_OUTPUT; } else { assert inc.length < output.length: "inc.length=" + inc.length + " vs output.length=" + output.length; assert inc.length > 0; return new BytesRef(output.bytes, output.offset + inc.length, output.length-inc.length); } } }
/** Calculates the hash code as required by TermsHash during indexing. * <p> This is currently implemented as MurmurHash3 (32 * bit), using the seed from {@link * StringHelper#GOOD_FAST_HASH_SEED}, but is subject to * change from release to release. */ @Override public int hashCode() { return StringHelper.murmurhash3_x86_32(this, StringHelper.GOOD_FAST_HASH_SEED); }
/** add a term. This fully consumes in the incoming {@link BytesRef}. */ public void add(String field, BytesRef bytes) { assert lastTerm.equals(new Term("")) || new Term(field, bytes).compareTo(lastTerm) > 0; try { final int prefix; if (size > 0 && field.equals(lastTerm.field)) { // same field as the last term prefix = StringHelper.bytesDifference(lastTerm.bytes, bytes); output.writeVInt(prefix << 1); } else { // field change prefix = 0; output.writeVInt(1); output.writeString(field); } int suffix = bytes.length - prefix; output.writeVInt(suffix); output.writeBytes(bytes.bytes, bytes.offset + prefix, suffix); lastTermBytes.copyBytes(bytes); lastTerm.bytes = lastTermBytes.get(); lastTerm.field = field; size += 1; } catch (IOException e) { throw new RuntimeException(e); } }
private void addReverseTermIndex(FieldInfo field, final Iterable<BytesRef> values, int maxLength) throws IOException { long count = 0; BytesRefBuilder priorTerm = new BytesRefBuilder(); priorTerm.grow(maxLength); BytesRef indexTerm = new BytesRef(); long startFP = data.getFilePointer(); PagedBytes pagedBytes = new PagedBytes(15); MonotonicBlockPackedWriter addresses = new MonotonicBlockPackedWriter(data, MONOTONIC_BLOCK_SIZE); for (BytesRef b : values) { int termPosition = (int) (count & REVERSE_INTERVAL_MASK); if (termPosition == 0) { int len = StringHelper.sortKeyLength(priorTerm.get(), b); indexTerm.bytes = b.bytes; indexTerm.offset = b.offset; indexTerm.length = len; addresses.add(pagedBytes.copyUsingLengthPrefix(indexTerm)); } else if (termPosition == REVERSE_INTERVAL_MASK) { priorTerm.copyBytes(b); } count++; } addresses.finish(); long numBytes = pagedBytes.getPointer(); pagedBytes.freeze(true); PagedBytesDataInput in = pagedBytes.getDataInput(); meta.writeLong(startFP); data.writeVLong(numBytes); data.copyBytes(in, numBytes); }
public static void checkFooter(ChecksumIndexInput input) throws IOException { BytesRefBuilder scratch = new BytesRefBuilder(); String expectedChecksum = String.format(Locale.ROOT, "%020d", input.getChecksum()); readLine(input, scratch); if (StringHelper.startsWith(scratch.get(), CHECKSUM) == false) { throw new CorruptIndexException("SimpleText failure: expected checksum line but got " + scratch.get().utf8ToString(), input); } String actualChecksum = new BytesRef(scratch.bytes(), CHECKSUM.length, scratch.length() - CHECKSUM.length).utf8ToString(); if (!expectedChecksum.equals(actualChecksum)) { throw new CorruptIndexException("SimpleText checksum failure: " + actualChecksum + " != " + expectedChecksum, input); } if (input.length() != input.getFilePointer()) { throw new CorruptIndexException("Unexpected stuff at the end of file, please be careful with your text editor!", input); } }
throw new IllegalArgumentException("Invalid id: " + StringHelper.idToString(id)); BytesRef suffixBytes = new BytesRef(suffix); if (suffixBytes.length != suffix.length() || suffixBytes.length >= 256) { throw new IllegalArgumentException("suffix must be simple ASCII, less than 256 characters in length [got " + suffix + "]");
term = StringHelper.intsRefToBytesRef(singleton); } else { term = new BytesRef(UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length));
min = new BytesRef(); minInclusive = true; cmp = min.compareTo(max); } else { cmp = -1; StringHelper.startsWith(max, min) && suffixIsZeros(max, min.length)) {
final BytesRef prefix = new BytesRef(prefixLength + (hasFloorLeadLabel ? 1 : 0)); System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength); prefix.length = prefixLength; assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix; BlockTermState state = term.state; final int suffix = term.termBytes.length - prefixLength; PendingTerm term = (PendingTerm) ent; assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix; BlockTermState state = term.state; final int suffix = term.termBytes.length - prefixLength; } else { PendingBlock block = (PendingBlock) ent; assert StringHelper.startsWith(block.prefix, prefix); final int suffix = block.prefix.length - prefixLength; assert StringHelper.startsWith(block.prefix, prefix);
private TreeMap<String,Long> readFields(IndexInput in) throws IOException { ChecksumIndexInput input = new BufferedChecksumIndexInput(in); BytesRefBuilder scratch = new BytesRefBuilder(); TreeMap<String,Long> fields = new TreeMap<>(); while (true) { SimpleTextUtil.readLine(input, scratch); if (scratch.get().equals(END)) { SimpleTextUtil.checkFooter(input); return fields; } else if (StringHelper.startsWith(scratch.get(), FIELD)) { String fieldName = new String(scratch.bytes(), FIELD.length, scratch.length() - FIELD.length, StandardCharsets.UTF_8); fields.put(fieldName, input.getFilePointer()); } } }
DirectMonotonicWriter writer = DirectMonotonicWriter.getInstance(meta, addressBuffer, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT); BytesRefBuilder previous = new BytesRefBuilder(); long ord = 0; long start = data.getFilePointer(); data.writeBytes(term.bytes, term.offset, term.length); } else { final int prefixLength = StringHelper.bytesDifference(previous.get(), term); final int suffixLength = term.length - prefixLength; assert suffixLength > 0; // terms are unique previous.copyBytes(term); ++ord;
BytesRefBuilder previous = new BytesRefBuilder(); long offset = 0; long ord = 0; sortKeyLength = StringHelper.sortKeyLength(previous.get(), term); previous.copyBytes(term);
BytesRefBuilder lastTerm = new BytesRefBuilder(); lastTerm.grow(Math.max(0, maxLength)); long count = 0; for (BytesRef v : values) { termAddresses.add(data.getFilePointer() - startFP); lastTerm.clear(); int sharedPrefix = StringHelper.bytesDifference(lastTerm.get(), v); data.writeVInt(sharedPrefix); data.writeVInt(v.length - sharedPrefix);
private boolean startsWith(BytesRef prefix) { return StringHelper.startsWith(scratch.get(), prefix); }
private void readIndex(int maxDoc) throws IOException { ChecksumIndexInput input = new BufferedChecksumIndexInput(in); offsets = new long[maxDoc]; int upto = 0; while (!scratch.get().equals(END)) { SimpleTextUtil.readLine(input, scratch); if (StringHelper.startsWith(scratch.get(), DOC)) { offsets[upto] = input.getFilePointer(); upto++; } } SimpleTextUtil.checkFooter(input); assert upto == offsets.length; }
/** * Returns true if the term matches the automaton. Also stashes away the term * to assist with smart enumeration. */ @Override protected AcceptStatus accept(final BytesRef term) { if (commonSuffixRef == null || StringHelper.endsWith(term, commonSuffixRef)) { if (runAutomaton.run(term.bytes, term.offset, term.length)) return linear ? AcceptStatus.YES : AcceptStatus.YES_AND_SEEK; else return (linear && term.compareTo(linearUpperBound) < 0) ? AcceptStatus.NO : AcceptStatus.NO_AND_SEEK; } else { return (linear && term.compareTo(linearUpperBound) < 0) ? AcceptStatus.NO : AcceptStatus.NO_AND_SEEK; } }
if (isSubBlock && StringHelper.startsWith(target, term)) { final int cmp = term.compareTo(target); if (cmp < 0) { if (currentFrame.nextEnt == currentFrame.entCount) {
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException { // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. List<LeafReaderContext> leaves = reader.leaves(); for (LeafReaderContext leaf : leaves) { Terms _terms = leaf.reader().terms(field); if (_terms == null) { continue; } TermsEnum termsEnum = _terms.iterator(); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); if (TermsEnum.SeekStatus.END == seekStatus) { continue; } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { if (!StringHelper.startsWith(term, prefix.bytes())) { break; } terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { return; } } } }
@Override protected AcceptStatus accept( BytesRef term ) { return StringHelper.startsWith( term, prefix ) ? AcceptStatus.YES : AcceptStatus.END; } }
SegmentInfo si = new SegmentInfo(directoryOrig, Version.LATEST, null, mergeSegmentName, -1, false, codec, Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), config.getIndexSort()); Map<String,String> details = new HashMap<>(); details.put("mergeMaxNumSegments", "" + merge.maxNumSegments);