@Override public byte get(int index) { return byteArray.get(offset + index); }
@Override public BytesRef next() throws IOException { if (nextFragmentSize != 0) { final boolean materialized = byteArray.get(offset + position, nextFragmentSize, slice); assert materialized == false : "iteration should be page aligned but array got materialized"; position += nextFragmentSize; final int remaining = length - position; nextFragmentSize = Math.min(remaining, PAGE_SIZE); return slice; } else { assert nextFragmentSize == 0 : "fragmentSize expected [0] but was: [" + nextFragmentSize + "]"; return null; // we are done with this iteration } } };
/** @see Arrays#hashCode(byte[]) */ public int hashCode(ByteArray array) { if (array == null) { return 0; } int hash = 1; for (long i = 0; i < array.size(); i++) { hash = 31 * hash + array.get(i); } return hash; }
@Override public BytesRef toBytesRef() { BytesRef bref = new BytesRef(); // if length <= pagesize this will dereference the page, or materialize the byte[] byteArray.get(offset, length, bref); return bref; }
private void collectHll(long bucket, long index, int runLen) { final long bucketIndex = (bucket << p) + index; runLens.set(bucketIndex, (byte) Math.max(runLen, runLens.get(bucketIndex))); }
/** * Return the key at <code>0 <= index <= capacity()</code>. The result is undefined if the slot is unused. * <p>Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called</p> */ public BytesRef get(long id, BytesRef dest) { final long startOffset = startOffsets.get(id); final int length = (int) (startOffsets.get(id + 1) - startOffset); bytes.get(startOffset, length, dest); return dest; }
private int get(long bucket, int index) { runLens.get(index(bucket, index), 4, readSpare); return ByteUtils.readIntLE(readSpare.bytes, readSpare.offset); }
private Object getComparableData(long bucket) { if (algorithm.get(bucket) == LINEAR_COUNTING) { Set<Integer> values = new HashSet<>(); try (IntArray hashSetValues = hashSet.values(bucket)) { for (long i = 0; i < hashSetValues.size(); i++) { values.add(hashSetValues.get(i)); } } return values; } else { Map<Byte, Integer> values = new HashMap<>(); for (long i = 0; i < runLens.size(); i++) { byte runLength = runLens.get((bucket << p) + i); Integer numOccurances = values.get(runLength); if (numOccurances == null) { values.put(runLength, 1); } else { values.put(runLength, numOccurances + 1); } } return values; } }
private long cardinalityHll(long bucket) { double inverseSum = 0; int zeros = 0; for (long i = bucket << p, end = i + m; i < end; ++i) { final int runLen = runLens.get(i); inverseSum += 1. / (1L << runLen); if (runLen == 0) { ++zeros; } } double e1 = alphaMM / inverseSum; double e2 = e1 <= 5 * m ? e1 - estimateBias(e1) : e1; long h; if (zeros != 0) { h = linearCounting(m, zeros); } else { h = Math.round(e2); } if (h <= threshold()) { return h; } else { return Math.round(e2); } }
public void merge(long thisBucket, HyperLogLogPlusPlus other, long otherBucket) { if (p != other.p) { throw new IllegalArgumentException(); } ensureCapacity(thisBucket + 1); if (other.algorithm.get(otherBucket) == LINEAR_COUNTING) { final IntArray values = other.hashSet.values(otherBucket); try { for (long i = 0; i < values.size(); ++i) { final int encoded = values.get(i); if (algorithm.get(thisBucket) == LINEAR_COUNTING) { collectLcEncoded(thisBucket, encoded); } else { collectHllEncoded(thisBucket, encoded); } } } finally { Releasables.close(values); } } else { if (algorithm.get(thisBucket) != HYPERLOGLOG) { upgradeToHll(thisBucket); } final long thisStart = thisBucket << p; final long otherStart = otherBucket << p; for (int i = 0; i < m; ++i) { runLens.set(thisStart + i, (byte) Math.max(runLens.get(thisStart + i), other.runLens.get(otherStart + i))); } } }
public void writeTo(long bucket, StreamOutput out) throws IOException { out.writeVInt(p); if (algorithm.get(bucket) == LINEAR_COUNTING) { out.writeBoolean(LINEAR_COUNTING); try (IntArray hashes = hashSet.values(bucket)) { out.writeVLong(hashes.size()); for (long i = 0; i < hashes.size(); ++i) { out.writeInt(hashes.get(i)); } } } else { out.writeBoolean(HYPERLOGLOG); for (long i = bucket << p, end = i + m; i < end; ++i) { out.writeByte(runLens.get(i)); } } }
@Override public byte readByte() throws IOException { if (pos >= length) { throw new EOFException(); } return bytearray.get(offset + pos++); }
@Override public BytesRef toBytesRef() { BytesRef bref = new BytesRef(); // if length <= pagesize this will dereference the page, or materialize the byte[] byteArray.get(offset, length, bref); return bref; }
/** @see Arrays#hashCode(byte[]) */ public int hashCode(ByteArray array) { if (array == null) { return 0; } int hash = 1; for (long i = 0; i < array.size(); i++) { hash = 31 * hash + array.get(i); } return hash; }
@Override public BytesRef toBytesRef() { BytesRef bref = new BytesRef(); // if length <= pagesize this will dereference the page, or materialize the byte[] byteArray.get(offset, length, bref); return bref; }
@Override public int arrayOffset() { if (hasArray()) { BytesRef ref = new BytesRef(); bytearray.get(offset, length, ref); return ref.offset; } throw new IllegalStateException("array not available"); }
/** * Return the key at <code>0 <= index <= capacity()</code>. The result is undefined if the slot is unused. * <p>Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called</p> */ public BytesRef get(long id, BytesRef dest) { final long startOffset = startOffsets.get(id); final int length = (int) (startOffsets.get(id + 1) - startOffset); bytes.get(startOffset, length, dest); return dest; }
/** * Return the key at <code>0 <= index <= capacity()</code>. The result is undefined if the slot is unused. * <p>Beware that the content of the {@link BytesRef} may become invalid as soon as {@link #close()} is called</p> */ public BytesRef get(long id, BytesRef dest) { final long startOffset = startOffsets.get(id); final int length = (int) (startOffsets.get(id + 1) - startOffset); bytes.get(startOffset, length, dest); return dest; }
@Override public BytesArray toBytesArray() { BytesRef ref = new BytesRef(); bytearray.get(offset, length, ref); return new BytesArray(ref); }