private static void appendTypedArray(Block array, Type type, TypedSet typedSet, BlockBuilder blockBuilder) { for (int i = 0; i < array.getPositionCount(); i++) { if (!typedSet.contains(array, i)) { typedSet.add(array, i); type.appendTo(array, i, blockBuilder); } } }
private static void testBigintFor(TypedSet typedSet, Block longBlock) { Set<Long> set = new HashSet<>(); for (int blockPosition = 0; blockPosition < longBlock.getPositionCount(); blockPosition++) { long number = BIGINT.getLong(longBlock, blockPosition); assertEquals(typedSet.contains(longBlock, blockPosition), set.contains(number)); assertEquals(typedSet.size(), set.size()); set.add(number); typedSet.add(longBlock, blockPosition); assertEquals(typedSet.contains(longBlock, blockPosition), set.contains(number)); assertEquals(typedSet.size(), set.size()); } } }
@Test public void testMemoryExceeded() { try { TypedSet typedSet = new TypedSet(BIGINT, 10, FUNCTION_NAME); for (int i = 0; i <= TypedSet.FOUR_MEGABYTES + 1; i++) { Block block = createLongsBlock(nCopies(1, (long) i)); typedSet.add(block, 0); } fail("expected exception"); } catch (PrestoException e) { assertEquals(e.getErrorCode(), EXCEEDED_FUNCTION_MEMORY_LIMIT.toErrorCode()); } }
private static double mapDotProduct(Block leftMap, Block rightMap) { TypedSet rightMapKeys = new TypedSet(VARCHAR, rightMap.getPositionCount(), "cosine_similarity"); for (int i = 0; i < rightMap.getPositionCount(); i += 2) { rightMapKeys.add(rightMap, i); } double result = 0.0; for (int i = 0; i < leftMap.getPositionCount(); i += 2) { int position = rightMapKeys.positionOf(leftMap, i); if (position != -1) { result += DOUBLE.getDouble(leftMap, i + 1) * DOUBLE.getDouble(rightMap, 2 * position + 1); } } return result; }
@Test public void testGetElementPosition() { int elementCount = 100; // Set initialTypedSetEntryCount to a small number to trigger rehash() int initialTypedSetEntryCount = 10; TypedSet typedSet = new TypedSet(BIGINT, initialTypedSetEntryCount, FUNCTION_NAME); BlockBuilder blockBuilder = BIGINT.createFixedSizeBlockBuilder(elementCount); for (int i = 0; i < elementCount; i++) { BIGINT.writeLong(blockBuilder, i); typedSet.add(blockBuilder, i); } assertEquals(typedSet.size(), elementCount); for (int j = 0; j < blockBuilder.getPositionCount(); j++) { assertEquals(typedSet.positionOf(blockBuilder, j), j); } }
@TypeParameter("E") @SqlType("array(E)") public static Block except( @TypeParameter("E") Type type, @SqlType("array(E)") Block leftArray, @SqlType("array(E)") Block rightArray) { int leftPositionCount = leftArray.getPositionCount(); int rightPositionCount = rightArray.getPositionCount(); if (leftPositionCount == 0) { return leftArray; } TypedSet typedSet = new TypedSet(type, leftPositionCount + rightPositionCount, "array_except"); BlockBuilder distinctElementBlockBuilder = type.createBlockBuilder(null, leftPositionCount); for (int i = 0; i < rightPositionCount; i++) { typedSet.add(rightArray, i); } for (int i = 0; i < leftPositionCount; i++) { if (!typedSet.contains(leftArray, i)) { typedSet.add(leftArray, i); type.appendTo(leftArray, i, distinctElementBlockBuilder); } } return distinctElementBlockBuilder.build(); } }
@Test public void testGetElementPositionWithProvidedEmptyBlockBuilder() { int elementCount = 100; // Set initialTypedSetEntryCount to a small number to trigger rehash() int initialTypedSetEntryCount = 10; BlockBuilder emptyBlockBuilder = BIGINT.createFixedSizeBlockBuilder(elementCount); TypedSet typedSet = new TypedSet(BIGINT, emptyBlockBuilder, initialTypedSetEntryCount, FUNCTION_NAME); BlockBuilder externalBlockBuilder = BIGINT.createFixedSizeBlockBuilder(elementCount); for (int i = 0; i < elementCount; i++) { if (i % 10 == 0) { externalBlockBuilder.appendNull(); } else { BIGINT.writeLong(externalBlockBuilder, i); } typedSet.add(externalBlockBuilder, i); } assertEquals(typedSet.size(), emptyBlockBuilder.getPositionCount()); assertEquals(typedSet.size(), elementCount - elementCount / 10 + 1); for (int j = 0; j < typedSet.size(); j++) { assertEquals(typedSet.positionOf(emptyBlockBuilder, j), j); } }
BIGINT.writeLong(blockBuilder, i); typedSet.add(blockBuilder, i);
BIGINT.writeLong(externalBlockBuilder, i); typedSet.add(externalBlockBuilder, i);
private void testGetElementPositionRandomFor(TypedSet set) { BlockBuilder keys = VARCHAR.createBlockBuilder(null, 5); VARCHAR.writeSlice(keys, utf8Slice("hello")); VARCHAR.writeSlice(keys, utf8Slice("bye")); VARCHAR.writeSlice(keys, utf8Slice("abc")); for (int i = 0; i < keys.getPositionCount(); i++) { set.add(keys, i); } BlockBuilder values = VARCHAR.createBlockBuilder(null, 5); VARCHAR.writeSlice(values, utf8Slice("bye")); VARCHAR.writeSlice(values, utf8Slice("abc")); VARCHAR.writeSlice(values, utf8Slice("hello")); VARCHAR.writeSlice(values, utf8Slice("bad")); values.appendNull(); assertEquals(set.positionOf(values, 4), -1); assertEquals(set.positionOf(values, 2), 0); assertEquals(set.positionOf(values, 1), 2); assertEquals(set.positionOf(values, 0), 1); assertFalse(set.contains(values, 3)); set.add(values, 4); assertTrue(set.contains(values, 4)); }
typedSet.add(map, i); keyType.appendTo(map, i, blockBuilder); valueType.appendTo(map, i + 1, blockBuilder); for (int i = 0; i < map.getPositionCount(); i += 2) { if (!typedSet.contains(map, i)) { typedSet.add(map, i); keyType.appendTo(map, i, blockBuilder); valueType.appendTo(map, i + 1, blockBuilder);
for (int i = 0; i < fromMap.getPositionCount(); i += 2) { if (!typedSet.contains(keyBlock, i / 2)) { typedSet.add(keyBlock, i / 2); toKeyType.appendTo(keyBlock, i / 2, blockBuilder); if (fromMap.isNull(i + 1)) {
for (int i = 0; i < array.getPositionCount(); i++) { if (!typedSet.contains(array, i)) { typedSet.add(array, i); distinctCount++; type.appendTo(array, i, distinctElementBlockBuilder);
throw new PrestoException(INVALID_FUNCTION_ARGUMENT, format("Duplicate keys (%s) are not allowed", keyType.getObjectValue(session, rowBlock, 0))); uniqueKeys.add(rowBlock, 0);
@ScalarFunction @SqlType("array(varchar)") public static Block oldArrayDistinct(@SqlType("array(varchar)") Block array) { if (array.getPositionCount() == 0) { return array; } TypedSet typedSet = new TypedSet(VARCHAR, array.getPositionCount(), "old_array_distinct"); BlockBuilder distinctElementBlockBuilder = VARCHAR.createBlockBuilder(null, array.getPositionCount()); for (int i = 0; i < array.getPositionCount(); i++) { if (!typedSet.contains(array, i)) { typedSet.add(array, i); VARCHAR.appendTo(array, i, distinctElementBlockBuilder); } } return distinctElementBlockBuilder.build(); } }
keySet.add(key, keyValueIndex); keyType.appendTo(key, keyValueIndex, distinctKeyBlockBuilder); BlockBuilder valueArrayBuilder = valueType.createBlockBuilder(null, 10, expectedValueSize(valueType, EXPECTED_ENTRY_SIZE));
keySet.add(rowBlock, 0); entryIndicesList[keySet.size() - 1].add(i);
private static void testBigint(Block longBlock, int expectedSetSize) { TypedSet typedSet = new TypedSet(BIGINT, expectedSetSize); Set<Long> set = new HashSet<>(); for (int blockPosition = 0; blockPosition < longBlock.getPositionCount(); blockPosition++) { long number = BIGINT.getLong(longBlock, blockPosition); assertEquals(typedSet.contains(longBlock, blockPosition), set.contains(number)); assertEquals(typedSet.size(), set.size()); set.add(number); typedSet.add(longBlock, blockPosition); assertEquals(typedSet.contains(longBlock, blockPosition), set.contains(number)); assertEquals(typedSet.size(), set.size()); } } }
@Test public void testGetElementPosition() throws Exception { int elementCount = 100; TypedSet typedSet = new TypedSet(BIGINT, elementCount); BlockBuilder blockBuilder = BIGINT.createFixedSizeBlockBuilder(elementCount); for (int i = 0; i < elementCount; i++) { BIGINT.writeLong(blockBuilder, i); typedSet.add(blockBuilder, i); } for (int j = 0; j < blockBuilder.getPositionCount(); j++) { assertEquals(typedSet.positionOf(blockBuilder, j), j); } }