public AddDictionaryPageWork(Page page) { verify(canProcessDictionary(page), "invalid call to addDictionaryPage"); this.page = requireNonNull(page, "page is null"); this.dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]); updateDictionaryLookBack(dictionaryBlock.getDictionary()); this.dictionaryPage = createPageWithExtractedDictionary(page); }
@Override public void writeBlock(BlockEncodingSerde blockEncodingSerde, SliceOutput sliceOutput, Block block) { // The down casts here are safe because it is the block itself the provides this encoding implementation. DictionaryBlock dictionaryBlock = (DictionaryBlock) block; dictionaryBlock = dictionaryBlock.compact(); // positionCount int positionCount = dictionaryBlock.getPositionCount(); sliceOutput.appendInt(positionCount); // dictionary Block dictionary = dictionaryBlock.getDictionary(); blockEncodingSerde.writeBlock(sliceOutput, dictionary); // ids sliceOutput.writeBytes(dictionaryBlock.getIds()); // instance id sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getMostSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getLeastSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getSequenceId()); }
private Page createPageWithExtractedDictionary(Page page) { Block[] blocks = new Block[page.getChannelCount()]; Block dictionary = ((DictionaryBlock) page.getBlock(channels[0])).getDictionary(); // extract data dictionary blocks[channels[0]] = dictionary; // extract hash dictionary if (inputHashChannel.isPresent()) { blocks[inputHashChannel.get()] = ((DictionaryBlock) page.getBlock(inputHashChannel.get())).getDictionary(); } return new Page(dictionary.getPositionCount(), blocks); }
@Test public void testCompact() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlockWithUnreferencedKeys(expectedValues, 10); assertEquals(dictionaryBlock.isCompact(), false); DictionaryBlock compactBlock = dictionaryBlock.compact(); assertNotEquals(dictionaryBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); assertEquals(compactBlock.getDictionary().getPositionCount(), (expectedValues.length / 2) + 1); assertBlock(compactBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[3]}); assertDictionaryIds(compactBlock, 0, 1, 1, 2, 2, 0, 1, 1, 2, 2); assertEquals(compactBlock.isCompact(), true); DictionaryBlock reCompactedBlock = compactBlock.compact(); assertEquals(reCompactedBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); }
public DictionaryAwarePageProjectionWork(@Nullable ConnectorSession session, DriverYieldSignal yieldSignal, Page page, SelectedPositions selectedPositions) { this.session = session; this.yieldSignal = requireNonNull(yieldSignal, "yieldSignal is null"); Block block = requireNonNull(page, "page is null").getBlock(0).getLoadedBlock(); this.block = block; this.selectedPositions = requireNonNull(selectedPositions, "selectedPositions is null"); Optional<Block> dictionary = Optional.empty(); if (block instanceof RunLengthEncodedBlock) { dictionary = Optional.of(((RunLengthEncodedBlock) block).getValue()); } else if (block instanceof DictionaryBlock) { dictionary = Optional.of(((DictionaryBlock) block).getDictionary()); } // Try use dictionary processing first; if it fails, fall back to the generic case dictionaryProcessingProjectionWork = createDictionaryBlockProjection(dictionary); fallbackProcessingProjectionWork = null; }
@Override public Block getPositions(int[] positions, int offset, int length) { checkArrayRange(positions, offset, length); int[] newIds = new int[length]; boolean isCompact = isCompact() && length >= dictionary.getPositionCount(); boolean[] seen = null; if (isCompact) { seen = new boolean[dictionary.getPositionCount()]; } for (int i = 0; i < length; i++) { newIds[i] = getId(positions[offset + i]); if (isCompact) { seen[newIds[i]] = true; } } for (int i = 0; i < dictionary.getPositionCount() && isCompact; i++) { isCompact &= seen[i]; } return new DictionaryBlock(newIds.length, getDictionary(), newIds, isCompact, getDictionarySourceId()); }
public GetDictionaryGroupIdsWork(Page page) { this.page = requireNonNull(page, "page is null"); verify(canProcessDictionary(page), "invalid call to processDictionary"); this.dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]); updateDictionaryLookBack(dictionaryBlock.getDictionary()); this.dictionaryPage = createPageWithExtractedDictionary(page); // we know the exact size required for the block this.blockBuilder = BIGINT.createFixedSizeBlockBuilder(page.getPositionCount()); }
@Test public void testRoundTrip() { int positionCount = 40; // build dictionary BlockBuilder dictionaryBuilder = VARCHAR.createBlockBuilder(null, 4); VARCHAR.writeString(dictionaryBuilder, "alice"); VARCHAR.writeString(dictionaryBuilder, "bob"); VARCHAR.writeString(dictionaryBuilder, "charlie"); VARCHAR.writeString(dictionaryBuilder, "dave"); Block dictionary = dictionaryBuilder.build(); // build ids int[] ids = new int[positionCount]; for (int i = 0; i < 40; i++) { ids[i] = i % 4; } DictionaryBlock dictionaryBlock = new DictionaryBlock(dictionary, ids); DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024); blockEncodingSerde.writeBlock(sliceOutput, dictionaryBlock); Block actualBlock = blockEncodingSerde.readBlock(sliceOutput.slice().getInput()); assertTrue(actualBlock instanceof DictionaryBlock); DictionaryBlock actualDictionaryBlock = (DictionaryBlock) actualBlock; assertBlockEquals(VARCHAR, actualDictionaryBlock.getDictionary(), dictionary); for (int position = 0; position < actualDictionaryBlock.getPositionCount(); position++) { assertEquals(actualDictionaryBlock.getId(position), ids[position]); } assertEquals(actualDictionaryBlock.getDictionarySourceId(), dictionaryBlock.getDictionarySourceId()); }
@Test public void testCopyPositionsSamePosition() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {52, 52, 52}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getDictionary().getPositionCount(), 1); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[2]}); assertDictionaryIds(copiedBlock, 0, 0, 0); }
@Test public void testCopyPositionsWithCompactionsAndReorder() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {50, 55, 40, 45, 60}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getDictionary().getPositionCount(), 2); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[5]}); assertDictionaryIds(copiedBlock, 0, 1, 0, 1, 0); }
@Test public void testSizeInBytes() { Slice[] expectedValues = createExpectedValues(10); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); assertEquals(dictionaryBlock.getSizeInBytes(), dictionaryBlock.getDictionary().getSizeInBytes() + (100 * SIZE_OF_INT)); }
@Override public SelectedPositions filter(ConnectorSession session, Page page) { Block block = page.getBlock(0).getLoadedBlock(); if (block instanceof RunLengthEncodedBlock) { Block value = ((RunLengthEncodedBlock) block).getValue(); Optional<boolean[]> selectedPosition = processDictionary(session, value); // single value block is always considered effective, but the processing could have thrown // in that case we fallback and process again so the correct error message sent if (selectedPosition.isPresent()) { return SelectedPositions.positionsRange(0, selectedPosition.get()[0] ? page.getPositionCount() : 0); } } if (block instanceof DictionaryBlock) { DictionaryBlock dictionaryBlock = (DictionaryBlock) block; // Attempt to process the dictionary. If dictionary is processing has not been considered effective, an empty response will be returned Optional<boolean[]> selectedDictionaryPositions = processDictionary(session, dictionaryBlock.getDictionary()); // record the usage count regardless of dictionary processing choice, so we have stats for next time lastDictionaryUsageCount += page.getPositionCount(); // if dictionary was processed, produce a dictionary block; otherwise do normal processing if (selectedDictionaryPositions.isPresent()) { return selectDictionaryPositions(dictionaryBlock, selectedDictionaryPositions.get()); } } return filter.filter(session, new Page(block)); }
@Test public void testCopyPositionsWithCompaction() { Slice[] expectedValues = createExpectedValues(10); Slice firstExpectedValue = expectedValues[0]; DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {0, 10, 20, 30, 40}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getDictionary().getPositionCount(), 1); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {firstExpectedValue}); assertBlock(copiedBlock, TestDictionaryBlock::createBlockBuilder, new Slice[] {firstExpectedValue, firstExpectedValue, firstExpectedValue, firstExpectedValue, firstExpectedValue}); }
@Test public void testLogicalSizeInBytes() { // The 10 Slices in the array will be of lengths 0 to 9. Slice[] expectedValues = createExpectedValues(10); // The dictionary within the dictionary block is expected to be a VariableWidthBlock of size 95 bytes. // 45 bytes for the expectedValues Slices (sum of seq(0,9)) and 50 bytes for the position and isNull array (total 10 positions). DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); assertEquals(dictionaryBlock.getDictionary().getLogicalSizeInBytes(), 95); // The 100 positions in the dictionary block index to 10 positions in the underlying dictionary (10 each). // Logical size calculation accounts for 4 bytes of offset and 1 byte of isNull. Therefore the expected unoptimized // size is 10 times the size of the underlying dictionary (VariableWidthBlock). assertEquals(dictionaryBlock.getLogicalSizeInBytes(), 95 * 10); // With alternating nulls, we have 21 positions, with the same size calculation as above. dictionaryBlock = createDictionaryBlock(alternatingNullValues(expectedValues), 210); assertEquals(dictionaryBlock.getDictionary().getPositionCount(), 21); assertEquals(dictionaryBlock.getDictionary().getLogicalSizeInBytes(), 150); // The null positions should be included in the logical size. assertEquals(dictionaryBlock.getLogicalSizeInBytes(), 150 * 10); }
@Test public void testCompactAllKeysReferenced() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 10); DictionaryBlock compactBlock = dictionaryBlock.compact(); // When there is nothing to compact, we return the same block assertEquals(compactBlock.getDictionary(), dictionaryBlock.getDictionary()); assertEquals(compactBlock.getPositionCount(), dictionaryBlock.getPositionCount()); for (int position = 0; position < compactBlock.getPositionCount(); position++) { assertEquals(compactBlock.getId(position), dictionaryBlock.getId(position)); } assertEquals(compactBlock.isCompact(), true); }
@Test public void testCopyPositionsNoCompaction() { Slice[] expectedValues = createExpectedValues(1); DictionaryBlock dictionaryBlock = createDictionaryBlock(expectedValues, 100); int[] positionsToCopy = new int[] {0, 2, 4, 5}; DictionaryBlock copiedBlock = (DictionaryBlock) dictionaryBlock.copyPositions(positionsToCopy, 0, positionsToCopy.length); assertEquals(copiedBlock.getPositionCount(), positionsToCopy.length); assertBlock(copiedBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, expectedValues); }
@Test public void testSanityColumnarDictionary() { PageProcessor processor = compiler.compilePageProcessor(Optional.empty(), ImmutableList.of(field(0, VARCHAR)), MAX_BATCH_SIZE).get(); Page page = new Page(createDictionaryBlock(createExpectedValues(10), 100)); Page outputPage = getOnlyElement( processor.process( null, new DriverYieldSignal(), newSimpleAggregatedMemoryContext().newLocalMemoryContext(PageProcessor.class.getSimpleName()), page)) .orElseThrow(() -> new AssertionError("page is not present")); assertEquals(outputPage.getPositionCount(), 100); assertTrue(outputPage.getBlock(0) instanceof DictionaryBlock); DictionaryBlock dictionaryBlock = (DictionaryBlock) outputPage.getBlock(0); assertEquals(dictionaryBlock.getDictionary().getPositionCount(), 10); }
private static ColumnarRow toColumnarRow(DictionaryBlock dictionaryBlock) { // build a mapping from the old dictionary to a new dictionary with nulls removed Block dictionary = dictionaryBlock.getDictionary(); int[] newDictionaryIndex = new int[dictionary.getPositionCount()]; int nextNewDictionaryIndex = 0; for (int position = 0; position < dictionary.getPositionCount(); position++) { if (!dictionary.isNull(position)) { newDictionaryIndex[position] = nextNewDictionaryIndex; nextNewDictionaryIndex++; } } // reindex the dictionary int[] dictionaryIds = new int[dictionaryBlock.getPositionCount()]; int nonNullPositionCount = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { if (!dictionaryBlock.isNull(position)) { int oldDictionaryId = dictionaryBlock.getId(position); dictionaryIds[nonNullPositionCount] = newDictionaryIndex[oldDictionaryId]; nonNullPositionCount++; } } ColumnarRow columnarRow = toColumnarRow(dictionaryBlock.getDictionary()); Block[] fields = new Block[columnarRow.getFieldCount()]; for (int i = 0; i < columnarRow.getFieldCount(); i++) { fields[i] = new DictionaryBlock(nonNullPositionCount, columnarRow.getField(i), dictionaryIds); } return new ColumnarRow(dictionaryBlock, fields); }
private static ColumnarArray toColumnarArray(DictionaryBlock dictionaryBlock) { ColumnarArray columnarArray = toColumnarArray(dictionaryBlock.getDictionary()); // build new offsets int[] offsets = new int[dictionaryBlock.getPositionCount() + 1]; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); offsets[position + 1] = offsets[position] + columnarArray.getLength(dictionaryId); } // reindex dictionary int[] dictionaryIds = new int[offsets[dictionaryBlock.getPositionCount()]]; int nextDictionaryIndex = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); int length = columnarArray.getLength(dictionaryId); // adjust to the element block start offset int startOffset = columnarArray.getOffset(dictionaryId) - columnarArray.getOffset(0); for (int entryIndex = 0; entryIndex < length; entryIndex++) { dictionaryIds[nextDictionaryIndex] = startOffset + entryIndex; nextDictionaryIndex++; } } return new ColumnarArray( dictionaryBlock, 0, offsets, new DictionaryBlock(dictionaryIds.length, columnarArray.getElementsBlock(), dictionaryIds)); }
private static ColumnarMap toColumnarMap(DictionaryBlock dictionaryBlock) { ColumnarMap columnarMap = toColumnarMap(dictionaryBlock.getDictionary()); // build new offsets int[] offsets = new int[dictionaryBlock.getPositionCount() + 1]; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); offsets[position + 1] = offsets[position] + columnarMap.getEntryCount(dictionaryId); } // reindex dictionary int[] dictionaryIds = new int[offsets[dictionaryBlock.getPositionCount()]]; int nextDictionaryIndex = 0; for (int position = 0; position < dictionaryBlock.getPositionCount(); position++) { int dictionaryId = dictionaryBlock.getId(position); int entryCount = columnarMap.getEntryCount(dictionaryId); // adjust to the element block start offset int startOffset = columnarMap.getOffset(dictionaryId) - columnarMap.getOffset(0); for (int entryIndex = 0; entryIndex < entryCount; entryIndex++) { dictionaryIds[nextDictionaryIndex] = startOffset + entryIndex; nextDictionaryIndex++; } } return new ColumnarMap( dictionaryBlock, 0, offsets, new DictionaryBlock(dictionaryIds.length, columnarMap.getKeysBlock(), dictionaryIds), new DictionaryBlock(dictionaryIds.length, columnarMap.getValuesBlock(), dictionaryIds)); }