/** Convenience method to compute a fingerprint on a full bytes array. */ private static long fingerprint(byte[] bytes) { return fingerprint(bytes, bytes.length); }
public void testReallySimpleFingerprints() { assertEquals(8581389452482819506L, fingerprint("test".getBytes(UTF_8))); // 32 characters long assertEquals(-4196240717365766262L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8))); // 256 characters long assertEquals(3500507768004279527L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8))); }
public void testUtf8() { char[] charsA = new char[128]; char[] charsB = new char[128]; for (int i = 0; i < charsA.length; i++) { if (i < 100) { charsA[i] = 'a'; charsB[i] = 'a'; } else { // Both two-byte characters, but must be different charsA[i] = (char) (0x0180 + i); charsB[i] = (char) (0x0280 + i); } } String stringA = new String(charsA); String stringB = new String(charsB); assertThat(stringA).isNotEqualTo(stringB); assertThat(HASH_FN.hashUnencodedChars(stringA)) .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB)); assertThat(fingerprint(stringA.getBytes(UTF_8))) .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8))); // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters // greater than 127 (ie with their top bit set). // Don't attempt to do this in real code. assertEquals( fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1))); }
long h = 0; for (int i = 0; i < iterations; ++i) { h ^= fingerprint(buf, i); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, i * i % bufLen); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, i * i * i % bufLen); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, bufLen); h = remix(h); buf[bufLen++] = getChar(h);
/** Convenience method to compute a fingerprint on a full bytes array. */ private static long fingerprint(byte[] bytes) { return fingerprint(bytes, bytes.length); }
public void testReallySimpleFingerprints() { assertEquals(8581389452482819506L, fingerprint("test".getBytes(UTF_8))); // 32 characters long assertEquals(-4196240717365766262L, fingerprint(Strings.repeat("test", 8).getBytes(UTF_8))); // 256 characters long assertEquals(3500507768004279527L, fingerprint(Strings.repeat("test", 64).getBytes(UTF_8))); }
public void testUtf8() { char[] charsA = new char[128]; char[] charsB = new char[128]; for (int i = 0; i < charsA.length; i++) { if (i < 100) { charsA[i] = 'a'; charsB[i] = 'a'; } else { // Both two-byte characters, but must be different charsA[i] = (char) (0x0180 + i); charsB[i] = (char) (0x0280 + i); } } String stringA = new String(charsA); String stringB = new String(charsB); assertThat(stringA).isNotEqualTo(stringB); assertThat(HASH_FN.hashUnencodedChars(stringA)) .isNotEqualTo(HASH_FN.hashUnencodedChars(stringB)); assertThat(fingerprint(stringA.getBytes(UTF_8))) .isNotEqualTo(fingerprint(stringB.getBytes(UTF_8))); // ISO 8859-1 only has 0-255 (ubyte) representation so throws away UTF-8 characters // greater than 127 (ie with their top bit set). // Don't attempt to do this in real code. assertEquals( fingerprint(stringA.getBytes(ISO_8859_1)), fingerprint(stringB.getBytes(ISO_8859_1))); }
long h = 0; for (int i = 0; i < iterations; ++i) { h ^= fingerprint(buf, i); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, i * i % bufLen); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, i * i * i % bufLen); h = remix(h); buf[bufLen++] = getChar(h); h ^= fingerprint(buf, bufLen); h = remix(h); buf[bufLen++] = getChar(h);