public GroupByHashPageIndexer(List<? extends Type> hashTypes, JoinCompiler joinCompiler) { this(GroupByHash.createGroupByHash( hashTypes, IntStream.range(0, hashTypes.size()).toArray(), Optional.empty(), 20, false, joinCompiler, NOOP)); }
public MarkDistinctHash(Session session, List<Type> types, int[] channels, Optional<Integer> hashChannel, int expectedDistinctValues, JoinCompiler joinCompiler, UpdateMemory updateMemory) { this.groupByHash = createGroupByHash(types, channels, hashChannel, expectedDistinctValues, isDictionaryAggregationEnabled(session), joinCompiler, updateMemory); }
static GroupByHash createGroupByHash( List<? extends Type> hashTypes, int[] hashChannels, Optional<Integer> inputHashChannel, int expectedSize, boolean processDictionary, JoinCompiler joinCompiler, UpdateMemory updateMemory) { if (hashTypes.size() == 1 && hashTypes.get(0).equals(BIGINT) && hashChannels.length == 1) { return new BigintGroupByHash(hashChannels[0], inputHashChannel.isPresent(), expectedSize, updateMemory); } return new MultiChannelGroupByHash(hashTypes, hashChannels, inputHashChannel, expectedSize, processDictionary, joinCompiler, updateMemory); }
@Test public void testTypes() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); // Additional bigint channel for hash assertEquals(groupByHash.getTypes(), ImmutableList.of(VARCHAR, BIGINT)); }
groupByHash = createGroupByHash( partitionTypes, Ints.toArray(partitionChannels),
this.groupByHash = Optional.of(createGroupByHash(partitionTypes, channels, hashChannel, expectedPositions, isDictionaryAggregationEnabled(operatorContext.getSession()), joinCompiler, this::updateMemoryReservation));
public ChannelSetBuilder(Type type, Optional<Integer> hashChannel, int expectedPositions, OperatorContext operatorContext, JoinCompiler joinCompiler) { List<Type> types = ImmutableList.of(type); this.hash = createGroupByHash( types, HASH_CHANNELS, hashChannel, expectedPositions, isDictionaryAggregationEnabled(operatorContext.getSession()), joinCompiler, this::updateMemoryReservation); this.nullBlockPage = new Page(type.createBlockBuilder(null, 1, UNKNOWN.getFixedSize()).appendNull().build()); this.operatorContext = requireNonNull(operatorContext, "operatorContext is null"); this.localMemoryContext = operatorContext.localUserMemoryContext(); }
}; this.groupByHash = createGroupByHash( groupByTypes, Ints.toArray(groupByChannels),
@Test public void testForceRehash() { // Create a page with positionCount >> expected size of groupByHash Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); // Create group by hash with extremely small size GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 4, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); // Ensure that all groups are present in group by hash for (int i = 0; i < valuesBlock.getPositionCount(); i++) { assertTrue(groupByHash.contains(i, new Page(valuesBlock, hashBlock), CONTAINS_CHANNELS)); } }
private static GroupByHash createGroupByHash(List<Type> partitionTypes, List<Integer> partitionChannels, UpdateMemory updateMemory) { return GroupByHash.createGroupByHash( partitionTypes, Ints.toArray(partitionChannels), Optional.empty(), 1, false, new JoinCompiler(createTestMetadataManager(), new FeaturesConfig()), updateMemory); }
@Test public void testGetGroupIds() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { Work<GroupByIdBlock> work = groupByHash.getGroupIds(page); work.process(); GroupByIdBlock groupIds = work.getResult(); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
@Test public void testContains() { Block valuesBlock = BlockAssertions.createDoubleSequenceBlock(0, 10); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(DOUBLE), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); Block testBlock = BlockAssertions.createDoublesBlock((double) 3); Block testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), testBlock); assertTrue(groupByHash.contains(0, new Page(testBlock, testHashBlock), CONTAINS_CHANNELS)); testBlock = BlockAssertions.createDoublesBlock(11.0); testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), testBlock); assertFalse(groupByHash.contains(0, new Page(testBlock, testHashBlock), CONTAINS_CHANNELS)); }
@Test public void testContainsMultipleColumns() { Block valuesBlock = BlockAssertions.createDoubleSequenceBlock(0, 10); Block stringValuesBlock = BlockAssertions.createStringSequenceBlock(0, 10); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE, VARCHAR), valuesBlock, stringValuesBlock); int[] hashChannels = {0, 1}; GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(DOUBLE, VARCHAR), hashChannels, Optional.of(2), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, stringValuesBlock, hashBlock)).process(); Block testValuesBlock = BlockAssertions.createDoublesBlock((double) 3); Block testStringValuesBlock = BlockAssertions.createStringsBlock("3"); Block testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE, VARCHAR), testValuesBlock, testStringValuesBlock); assertTrue(groupByHash.contains(0, new Page(testValuesBlock, testStringValuesBlock, testHashBlock), hashChannels)); }
@Test public void testAddPage() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { groupByHash.addPage(page).process(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // add the page again using get group ids and make sure the group count didn't change Work<GroupByIdBlock> work = groupByHash.getGroupIds(page); work.process(); GroupByIdBlock groupIds = work.getResult(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // verify the first position assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
@Test public void testNullGroup() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); Block block = createLongsBlock((Long) null); Block hashBlock = getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); groupByHash.addPage(page).process(); // Add enough values to force a rehash block = createLongSequenceBlock(1, 132748); hashBlock = getHashBlock(ImmutableList.of(BIGINT), block); page = new Page(block, hashBlock); groupByHash.addPage(page).process(); block = createLongsBlock(0); hashBlock = getHashBlock(ImmutableList.of(BIGINT), block); page = new Page(block, hashBlock); assertFalse(groupByHash.contains(0, page, CONTAINS_CHANNELS)); }
@Test public void testAppendToMultipleTuplesPerGroup() { List<Long> values = new ArrayList<>(); for (long i = 0; i < 100; i++) { values.add(i % 50); } Block valuesBlock = BlockAssertions.createLongsBlock(values); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); assertEquals(groupByHash.getGroupCount(), 50); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page outputPage = pageBuilder.build(); assertEquals(outputPage.getPositionCount(), 50); BlockAssertions.assertBlockEquals(BIGINT, outputPage.getBlock(0), BlockAssertions.createLongSequenceBlock(0, 50)); }
@Test public void testAppendTo() { Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); Work<GroupByIdBlock> work = groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)); work.process(); GroupByIdBlock groupIds = work.getResult(); for (int i = 0; i < groupIds.getPositionCount(); i++) { assertEquals(groupIds.getGroupId(i), i); } assertEquals(groupByHash.getGroupCount(), 100); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page page = pageBuilder.build(); // Ensure that all blocks have the same positionCount for (int i = 0; i < groupByHash.getTypes().size(); i++) { assertEquals(page.getBlock(i).getPositionCount(), 100); } assertEquals(page.getPositionCount(), 100); BlockAssertions.assertBlockEquals(VARCHAR, page.getBlock(0), valuesBlock); BlockAssertions.assertBlockEquals(BIGINT, page.getBlock(1), hashBlock); }
public GroupByHashPageIndexer(List<? extends Type> hashTypes) { this(GroupByHash.createGroupByHash( hashTypes, IntStream.range(0, hashTypes.size()).toArray(), Optional.empty(), Optional.empty(), 20, false)); }
@Test public void testTypes() throws Exception { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] { 0 }, Optional.<Integer>empty(), Optional.of(1), 100); // Additional bigint channel for hash assertEquals(groupByHash.getTypes(), ImmutableList.of(VARCHAR, BIGINT)); }
public ChannelSetBuilder(Type type, Optional<Integer> hashChannel, int expectedPositions, OperatorContext operatorContext) { List<Type> types = ImmutableList.of(type); this.hash = createGroupByHash(operatorContext.getSession(), types, HASH_CHANNELS, Optional.empty(), hashChannel, expectedPositions); this.operatorContext = operatorContext; this.nullBlockPage = new Page(type.createBlockBuilder(new BlockBuilderStatus(), 1, UNKNOWN.getFixedSize()).appendNull().build()); }