private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID-1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * Integer.BYTES); } pending.add(termID); updateBytesUsed(); }
private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID-1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * Integer.BYTES); } if (currentUpto == currentValues.length) { currentValues = ArrayUtil.grow(currentValues, currentValues.length+1); iwBytesUsed.addAndGet((currentValues.length - currentUpto) * Integer.BYTES); } currentValues[currentUpto] = termID; currentUpto++; }
int termID = bytesHash.add(termAtt.getBytesRef());
int ord = words.add(utf8Scratch.get()); if (ord < 0) {
/** * Adds an input string and its stemmer override output to this builder. * * @param input the input char sequence * @param output the stemmer override output char sequence * @return <code>false</code> iff the input has already been added to this builder otherwise <code>true</code>. */ public boolean add(CharSequence input, CharSequence output) { final int length = input.length(); if (ignoreCase) { // convert on the fly to lowercase charsSpare.grow(length); final char[] buffer = charsSpare.chars(); for (int i = 0; i < length; ) { i += Character.toChars( Character.toLowerCase( Character.codePointAt(input, i)), buffer, i); } spare.copyChars(buffer, 0, length); } else { spare.copyChars(input, 0, length); } if (hash.add(spare.get()) >= 0) { outputValues.add(output); return true; } return false; }
this.needsInputCleaning = ignoreCase; this.needsOutputCleaning = false; // set if we have an OCONV flagLookup.add(new BytesRef()); // no flags -> ord 0
int appendFlagsOrd = flagLookup.add(scratch.get()); if (appendFlagsOrd < 0) {
} else { encodeFlags(flagsScratch, wordForm); int ord = flagLookup.add(flagsScratch.get()); if (ord < 0) {
@Override public void consume(final BytesRef ref) throws Exception { _target.add(ref); }
public void add(BytesRef term) { this.set.add(term); }
@Override public void collect(int doc) throws IOException { final BytesRef term = docValues.get(doc); collectorTerms.add(term); } }
@Override public void setGroups(Collection<SearchGroup<BytesRef>> searchGroups) { this.values.clear(); this.values.reinit(); for (SearchGroup<BytesRef> sg : searchGroups) { if (sg.groupValue == null) includeEmpty = true; else this.values.add(sg.groupValue); } this.secondPass = true; } }
@Override protected void addAll(TermsSet terms) { if (!(terms instanceof BytesRefTermsSet)) { throw new UnsupportedOperationException("Invalid type: BytesRefTermsSet expected."); } BytesRefHash input = ((BytesRefTermsSet) terms).set; BytesRef reusable = new BytesRef(); for (int i = 0; i < input.size(); i++) { input.get(i, reusable); set.add(reusable); } }
@Override public void collect(int doc) throws IOException { long ord; docValues.setDocument(doc); while ((ord = docValues.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { final BytesRef term = docValues.lookupOrd(ord); collectorTerms.add(term); } } }
private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID-1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * Integer.BYTES); } pending.add(termID); updateBytesUsed(); }
private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID-1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * RamUsageEstimator.NUM_BYTES_INT); } pending.add(termID); updateBytesUsed(); }
private void addOneValue(BytesRef value) { int termID = hash.add(value); if (termID < 0) { termID = -termID-1; } else { // reserve additional space for each unique value: // 1. when indexing, when hash is 50% full, rehash() suddenly needs 2*size ints. // TODO: can this same OOM happen in THPF? // 2. when flushing, we need 1 int per value (slot in the ordMap). iwBytesUsed.addAndGet(2 * RamUsageEstimator.NUM_BYTES_INT); } pending.add(termID); updateBytesUsed(); }
@Override public void collect(final int docId) throws IOException { if(_entries.size() > _maxPerShard) return; if(!_exhaustive && _random.nextFloat() > _sampleRate) return; final Iter iter = _values.getIter(docId); while(iter.hasNext() && _entries.size() < _maxPerShard) { _entries.add(iter.next(), iter.hash()); } }
@Override public void readFrom(StreamInput in) throws IOException { this.setIsPruned(in.readBoolean()); int size = in.readInt(); bytesUsed = Counter.newCounter(); pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); set = new BytesRefHash(pool); for (long i = 0; i < size; i++) { set.add(in.readBytesRef()); } }
private void readFromBytes(BytesRef bytes) { // Read pruned flag this.setIsPruned(bytes.bytes[bytes.offset++] == 1 ? true : false); // Read size fo the set int size = Bytes.readInt(bytes); // Read terms bytesUsed = Counter.newCounter(); pool = new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(bytesUsed)); set = new BytesRefHash(pool); BytesRef reusable = new BytesRef(); for (int i = 0; i < size; i++) { Bytes.readBytesRef(bytes, reusable); set.add(reusable); } }