@Override public void addInput(Page page) { checkState(!finishing, "Operator is already finishing"); requireNonNull(page, "page is null"); checkState(!hasUnfinishedInput()); inputPage = page; if (groupByHash.isPresent()) { unfinishedWork = groupByHash.get().getGroupIds(inputPage); processUnfinishedWork(); } updateMemoryReservation(); }
public Work<?> processPage(Page page) { return new TransformWork<>( groupByHash.getGroupIds(page), groupIds -> { processPage(page, groupIds); return null; }); }
@Override public void addInput(Page page) { checkState(needsInput()); inputPage = page; unfinishedWork = groupByHash.getGroupIds(page); processUnfinishedWork(); updateMemoryReservation(); }
@Override public Work<?> processPage(Page page) { if (aggregators.isEmpty()) { return groupByHash.addPage(page); } else { return new TransformWork<>( groupByHash.getGroupIds(page), groupByIdBlock -> { for (Aggregator aggregator : aggregators) { aggregator.processPage(groupByIdBlock, page); } // we do not need any output from TransformWork for this case return null; }); } }
@Override public int[] indexPage(Page page) { Work<GroupByIdBlock> work = hash.getGroupIds(page); boolean done = work.process(); // TODO: this class does not yield wrt memory limit; enable it verify(done); GroupByIdBlock groupIds = work.getResult(); int[] indexes = new int[page.getPositionCount()]; for (int i = 0; i < indexes.length; i++) { indexes[i] = toIntExact(groupIds.getGroupId(i)); } return indexes; }
@Test public void testForceRehash() { // Create a page with positionCount >> expected size of groupByHash Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); // Create group by hash with extremely small size GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 4, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); // Ensure that all groups are present in group by hash for (int i = 0; i < valuesBlock.getPositionCount(); i++) { assertTrue(groupByHash.contains(i, new Page(valuesBlock, hashBlock), CONTAINS_CHANNELS)); } }
@Test public void testContains() { Block valuesBlock = BlockAssertions.createDoubleSequenceBlock(0, 10); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(DOUBLE), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); Block testBlock = BlockAssertions.createDoublesBlock((double) 3); Block testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), testBlock); assertTrue(groupByHash.contains(0, new Page(testBlock, testHashBlock), CONTAINS_CHANNELS)); testBlock = BlockAssertions.createDoublesBlock(11.0); testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), testBlock); assertFalse(groupByHash.contains(0, new Page(testBlock, testHashBlock), CONTAINS_CHANNELS)); }
@Test public void testContainsMultipleColumns() { Block valuesBlock = BlockAssertions.createDoubleSequenceBlock(0, 10); Block stringValuesBlock = BlockAssertions.createStringSequenceBlock(0, 10); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE, VARCHAR), valuesBlock, stringValuesBlock); int[] hashChannels = {0, 1}; GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(DOUBLE, VARCHAR), hashChannels, Optional.of(2), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, stringValuesBlock, hashBlock)).process(); Block testValuesBlock = BlockAssertions.createDoublesBlock((double) 3); Block testStringValuesBlock = BlockAssertions.createStringsBlock("3"); Block testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE, VARCHAR), testValuesBlock, testStringValuesBlock); assertTrue(groupByHash.contains(0, new Page(testValuesBlock, testStringValuesBlock, testHashBlock), hashChannels)); }
@Test public void testGetGroupIds() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { Work<GroupByIdBlock> work = groupByHash.getGroupIds(page); work.process(); GroupByIdBlock groupIds = work.getResult(); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
public Work<Block> markDistinctRows(Page page) { return new TransformWork<>( groupByHash.getGroupIds(page), ids -> { BlockBuilder blockBuilder = BOOLEAN.createBlockBuilder(null, ids.getPositionCount()); for (int i = 0; i < ids.getPositionCount(); i++) { if (ids.getGroupId(i) == nextDistinctId) { BOOLEAN.writeBoolean(blockBuilder, true); nextDistinctId++; } else { BOOLEAN.writeBoolean(blockBuilder, false); } } return blockBuilder.build(); }); }
@Test public void testAddPage() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { groupByHash.addPage(page).process(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // add the page again using get group ids and make sure the group count didn't change Work<GroupByIdBlock> work = groupByHash.getGroupIds(page); work.process(); GroupByIdBlock groupIds = work.getResult(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // verify the first position assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
@Test public void testAppendToMultipleTuplesPerGroup() { List<Long> values = new ArrayList<>(); for (long i = 0; i < 100; i++) { values.add(i % 50); } Block valuesBlock = BlockAssertions.createLongsBlock(values); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); assertEquals(groupByHash.getGroupCount(), 50); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page outputPage = pageBuilder.build(); assertEquals(outputPage.getPositionCount(), 50); BlockAssertions.assertBlockEquals(BIGINT, outputPage.getBlock(0), BlockAssertions.createLongSequenceBlock(0, 50)); }
@Test public void testAppendTo() { Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); Work<GroupByIdBlock> work = groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)); work.process(); GroupByIdBlock groupIds = work.getResult(); for (int i = 0; i < groupIds.getPositionCount(); i++) { assertEquals(groupIds.getGroupId(i), i); } assertEquals(groupByHash.getGroupCount(), 100); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page page = pageBuilder.build(); // Ensure that all blocks have the same positionCount for (int i = 0; i < groupByHash.getTypes().size(); i++) { assertEquals(page.getBlock(i).getPositionCount(), 100); } assertEquals(page.getPositionCount(), 100); BlockAssertions.assertBlockEquals(VARCHAR, page.getBlock(0), valuesBlock); BlockAssertions.assertBlockEquals(BIGINT, page.getBlock(1), hashBlock); }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object groupByHashPreCompute(BenchmarkData data) { GroupByHash groupByHash = new MultiChannelGroupByHash(data.getTypes(), data.getChannels(), data.getHashChannel(), EXPECTED_SIZE, false, getJoinCompiler(data.isGroupByUsesEqual()), NOOP); data.getPages().forEach(p -> groupByHash.getGroupIds(p).process()); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
private void processPage(Page page) { if (aggregators.isEmpty()) { groupByHash.addPage(page); return; } GroupByIdBlock groupIds = groupByHash.getGroupIds(page); for (Aggregator aggregator : aggregators) { aggregator.processPage(groupIds, page); } }
@Override public int[] indexPage(Page page) { GroupByIdBlock groupIds = hash.getGroupIds(page); int[] indexes = new int[page.getPositionCount()]; for (int i = 0; i < indexes.length; i++) { indexes[i] = Ints.checkedCast(groupIds.getGroupId(i)); } return indexes; }
@Override public void addInput(Page page) { checkState(!finishing, "Operator is already finishing"); requireNonNull(page, "page is null"); checkState(inputPage == null); inputPage = page; if (groupByHash.isPresent()) { partitionIds = groupByHash.get().getGroupIds(inputPage); partitionRowCount.ensureCapacity(partitionIds.getGroupCount()); } operatorContext.setMemoryReservation(getEstimatedByteSize()); }
@Test public void testForceRehash() throws Exception { // Create a page with positionCount >> expected size of groupByHash Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); // Create group by hash with extremely small size GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] { 0 }, Optional.<Integer>empty(), Optional.of(1), 4); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)); // Ensure that all groups are present in group by hash for (int i = 0; i < valuesBlock.getPositionCount(); i++) { assertTrue(groupByHash.contains(i, new Page(valuesBlock, hashBlock), CONTAINS_CHANNELS)); } } }
@Test public void testContains() throws Exception { Block valuesBlock = BlockAssertions.createDoubleSequenceBlock(0, 10); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(DOUBLE), new int[] { 0 }, Optional.<Integer>empty(), Optional.of(1), 100); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)); Block testBlock = BlockAssertions.createDoublesBlock((double) 3); Block testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), testBlock); assertTrue(groupByHash.contains(0, new Page(testBlock, testHashBlock), CONTAINS_CHANNELS)); testBlock = BlockAssertions.createDoublesBlock(11.0); testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE), testBlock); assertFalse(groupByHash.contains(0, new Page(testBlock, testHashBlock), CONTAINS_CHANNELS)); }
@Test public void testContainsMultipleColumns() throws Exception { Block valuesBlock = BlockAssertions.createDoubleSequenceBlock(0, 10); Block stringValuesBlock = BlockAssertions.createStringSequenceBlock(0, 10); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE, VARCHAR), valuesBlock, stringValuesBlock); int[] hashChannels = { 0, 1 }; GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(DOUBLE, VARCHAR), hashChannels, Optional.<Integer>empty(), Optional.of(2), 100); groupByHash.getGroupIds(new Page(valuesBlock, stringValuesBlock, hashBlock)); Block testValuesBlock = BlockAssertions.createDoublesBlock((double) 3); Block testStringValuesBlock = BlockAssertions.createStringsBlock("3"); Block testHashBlock = TypeUtils.getHashBlock(ImmutableList.of(DOUBLE, VARCHAR), testValuesBlock, testStringValuesBlock); assertTrue(groupByHash.contains(0, new Page(testValuesBlock, testStringValuesBlock, testHashBlock), hashChannels)); }