@Override public void transientInit() throws HiveException { super.transientInit(); inSet = new CuckooSetLong(inListValues.length); inSet.load(inListValues); }
public void insert(long x) { if (x == blank) { findNewBlank(); } long temp; if (lookup(x)) { return; } // Try to insert up to n times. Rehash if that fails. for(int i = 0; i != n; i++) { if (t1[h1(x)] == blank) { t1[h1(x)] = x; return; } // swap x and t1[h1(x)] temp = t1[h1(x)]; t1[h1(x)] = x; x = temp; if (t2[h2(x)] == blank) { t2[h2(x)] = x; return; } // swap x and t2[h2(x)] temp = t2[h2(x)]; t2[h2(x)] = x; x = temp; } rehash(); insert(x); }
throw new RuntimeException("Too many rehashes"); updateHashSalt(); for (Long v : prev1) { if (v != blank) { long x = tryInsert(v); if (x != blank) { rehash(); return; long x = tryInsert(v); if (x != blank) { rehash(); return;
/** * Return true if and only if the value x is present in the set. */ public boolean lookup(long x) { /* Must check that x is not blank because otherwise you could * get a false positive if the blank value was a value you * were legitimately testing to see if it was in the set. */ return x != blank && (t1[h1(x)] == x || t2[h2(x)] == x); }
inSet = new CuckooSetLong(inListValues.length); inSet.load(inListValues); outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; outputColVector.isRepeating = true; } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; outNulls[0] = false; } else { outNulls[i] = nullPos[i]; if (!nullPos[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; for(int i = 0; i != n; i++) { if (!nullPos[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
28L, 29L, 30L, 111111111111111L, -444444444444444L, Long.MIN_VALUE}; Long[] negatives = {0L, 4L, 4000L, -2L, 19L, 222222222222222L, -333333333333333L}; CuckooSetLong s = new CuckooSetLong(values.length); for(Long v : values) { s.insert(v); assertTrue(s.lookup(v)); assertFalse(s.lookup(v)); s = new CuckooSetLong(values2.length); for(Long v : values2) { s.insert(v); assertTrue(s.lookup(v)); assertFalse(s.lookup(v));
/** * Insert all values in the input array into the set. */ public void load(long[] a) { for (Long x : a) { insert(x); } }
/** * Return true if and only if the value x is present in the set. */ public boolean lookup(double x) { return setLong.lookup(Double.doubleToRawLongBits(x)); }
@Test public void testSetLongRandom() { long[] values; Random gen = new Random(98763537); for(int i = 0; i < 200;) { // Make a random array of longs int size = gen.nextInt() % MAX_SIZE; if (size <= 0) { // ensure size is >= 1, otherwise try again continue; } i++; values = new long[size]; loadRandom(values, gen); // load them into a SetLong CuckooSetLong s = new CuckooSetLong(size); loadSet(s, values); // look them up to make sure they are all there for (int j = 0; j != size; j++) { assertTrue(s.lookup(values[j])); } } }
public CuckooSetDouble(int expectedSize) { setLong = new CuckooSetLong(expectedSize); }
/** * Allocate a new set to hold expectedSize values. Re-allocation to expand * the set is not implemented, so the expected size must be at least the * size of the set to be inserteed. * @param expectedSize At least the size of the set of values that will be inserted. */ public CuckooSetLong(int expectedSize) { // Choose array size. We have two hash tables to hold entries, so the sum // of the two should have a bit more than twice as much space as the // minimum required. n = (int) (expectedSize * PADDING_FACTOR / 2.0); // try to get prime number table size to have less dependence on good hash function for (int i = 0; i != primes.length; i++) { if (n <= primes[i]) { n = primes[i]; break; } } t1 = new long[n]; t2 = new long[n]; Arrays.fill(t1, blank); Arrays.fill(t2, blank); updateHashSalt(); }
inSet = new CuckooSetLong(inListValues.length); inSet.load(inListValues); outputVector[0] = inSet.lookup(vector[0]) ? 1 : 0; } else { outputIsNull[0] = true; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; outputIsNull[i] = inputIsNull[i]; if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0; for(int i = 0; i != n; i++) { if (!inputIsNull[i]) { outputVector[i] = inSet.lookup(vector[i]) ? 1 : 0;
/** * Return true if and only if the value x is present in the set. */ public boolean lookup(long x) { /* Must check that x is not blank because otherwise you could * get a false positive if the blank value was a value you * were legitimately testing to see if it was in the set. */ return x != blank && (t1[h1(x)] == x || t2[h2(x)] == x); }
/** * Insert all values in the input array into the set. */ public void load(long[] a) { for (Long x : a) { insert(x); } }
/** * Return true if and only if the value x is present in the set. */ public boolean lookup(double x) { return setLong.lookup(Double.doubleToRawLongBits(x)); }
public CuckooSetDouble(int expectedSize) { setLong = new CuckooSetLong(expectedSize); }
/** * Allocate a new set to hold expectedSize values. Re-allocation to expand * the set is not implemented, so the expected size must be at least the * size of the set to be inserteed. * @param expectedSize At least the size of the set of values that will be inserted. */ public CuckooSetLong(int expectedSize) { // Choose array size. We have two hash tables to hold entries, so the sum // of the two should have a bit more than twice as much space as the // minimum required. n = (int) (expectedSize * PADDING_FACTOR / 2.0); // try to get prime number table size to have less dependence on good hash function for (int i = 0; i != primes.length; i++) { if (n <= primes[i]) { n = primes[i]; break; } } t1 = new long[n]; t2 = new long[n]; Arrays.fill(t1, blank); Arrays.fill(t2, blank); updateHashSalt(); }
public void insert(long x) { if (x == blank) { findNewBlank(); } long temp; if (lookup(x)) { return; } // Try to insert up to n times. Rehash if that fails. for(int i = 0; i != n; i++) { if (t1[h1(x)] == blank) { t1[h1(x)] = x; return; } // swap x and t1[h1(x)] temp = t1[h1(x)]; t1[h1(x)] = x; x = temp; if (t2[h2(x)] == blank) { t2[h2(x)] = x; return; } // swap x and t2[h2(x)] temp = t2[h2(x)]; t2[h2(x)] = x; x = temp; } rehash(); insert(x); }
inSet = new CuckooSetLong(inListValues.length); inSet.load(inListValues); if (!(inSet.lookup(vector[0]))) { for(int j=0; j != n; j++) { int i = sel[j]; if (inSet.lookup(vector[i])) { sel[newSize++] = i; int newSize = 0; for(int i = 0; i != n; i++) { if (inSet.lookup(vector[i])) { sel[newSize++] = i; if (!inSet.lookup(vector[0])) { int i = sel[j]; if (!nullPos[i]) { if (inSet.lookup(vector[i])) { sel[newSize++] = i; for(int i = 0; i != n; i++) { if (!nullPos[i]) { if (inSet.lookup(vector[i])) { sel[newSize++] = i;
throw new RuntimeException("Too many rehashes"); updateHashSalt(); for (Long v : prev1) { if (v != blank) { long x = tryInsert(v); if (x != blank) { rehash(); return; long x = tryInsert(v); if (x != blank) { rehash(); return;