@Override public DictionaryId apply(DictionaryBlock block) { return dictionarySourceIds.computeIfAbsent(block.getDictionarySourceId(), ignored -> randomDictionaryId()); }
private Map<DictionaryId, DictionaryBlockIndexes> getRelatedDictionaryBlocks() { Map<DictionaryId, DictionaryBlockIndexes> relatedDictionaryBlocks = new HashMap<>(); for (int i = 0; i < blocks.length; i++) { Block block = blocks[i]; if (block instanceof DictionaryBlock) { DictionaryBlock dictionaryBlock = (DictionaryBlock) block; relatedDictionaryBlocks.computeIfAbsent(dictionaryBlock.getDictionarySourceId(), id -> new DictionaryBlockIndexes()) .addBlock(dictionaryBlock, i); } } return relatedDictionaryBlocks; }
private boolean canProcessDictionary(Page page) { if (!this.processDictionary || channels.length > 1 || !(page.getBlock(channels[0]) instanceof DictionaryBlock)) { return false; } if (inputHashChannel.isPresent()) { Block inputHashBlock = page.getBlock(inputHashChannel.get()); DictionaryBlock inputDataBlock = (DictionaryBlock) page.getBlock(channels[0]); if (!(inputHashBlock instanceof DictionaryBlock)) { // data channel is dictionary encoded but hash channel is not return false; } if (!((DictionaryBlock) inputHashBlock).getDictionarySourceId().equals(inputDataBlock.getDictionarySourceId())) { // dictionarySourceIds of data block and hash block do not match return false; } } return true; }
DictionaryId newDictionaryId = randomDictionaryId(); for (DictionaryBlock dictionaryBlock : blocks) { if (!firstDictionaryBlock.getDictionarySourceId().equals(dictionaryBlock.getDictionarySourceId())) { throw new IllegalArgumentException("dictionarySourceIds must be the same");
@Override public void writeBlock(BlockEncodingSerde blockEncodingSerde, SliceOutput sliceOutput, Block block) { // The down casts here are safe because it is the block itself the provides this encoding implementation. DictionaryBlock dictionaryBlock = (DictionaryBlock) block; dictionaryBlock = dictionaryBlock.compact(); // positionCount int positionCount = dictionaryBlock.getPositionCount(); sliceOutput.appendInt(positionCount); // dictionary Block dictionary = dictionaryBlock.getDictionary(); blockEncodingSerde.writeBlock(sliceOutput, dictionary); // ids sliceOutput.writeBytes(dictionaryBlock.getIds()); // instance id sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getMostSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getLeastSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getSequenceId()); }
@Test public void testCompact() { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlockWithUnreferencedKeys(expectedValues, 10); assertEquals(dictionaryBlock.isCompact(), false); DictionaryBlock compactBlock = dictionaryBlock.compact(); assertNotEquals(dictionaryBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); assertEquals(compactBlock.getDictionary().getPositionCount(), (expectedValues.length / 2) + 1); assertBlock(compactBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[3]}); assertDictionaryIds(compactBlock, 0, 1, 1, 2, 2, 0, 1, 1, 2, 2); assertEquals(compactBlock.isCompact(), true); DictionaryBlock reCompactedBlock = compactBlock.compact(); assertEquals(reCompactedBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); }
Block dictionaryBlock = new DictionaryBlock(createSlicesBlock(expectedValues), new int[] {0, 1, 2, 3, 4, 5}); assertBlock(dictionaryBlock, TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[2], expectedValues[3], expectedValues[4], expectedValues[5]}); DictionaryId dictionaryId = ((DictionaryBlock) dictionaryBlock).getDictionarySourceId(); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId); assertEquals(((DictionaryBlock) dictionaryBlock).getDictionarySourceId(), dictionaryId);
@Override public Block getPositions(int[] positions, int offset, int length) { checkArrayRange(positions, offset, length); int[] newIds = new int[length]; boolean isCompact = isCompact() && length >= dictionary.getPositionCount(); boolean[] seen = null; if (isCompact) { seen = new boolean[dictionary.getPositionCount()]; } for (int i = 0; i < length; i++) { newIds[i] = getId(positions[offset + i]); if (isCompact) { seen[newIds[i]] = true; } } for (int i = 0; i < dictionary.getPositionCount() && isCompact; i++) { isCompact &= seen[i]; } return new DictionaryBlock(newIds.length, getDictionary(), newIds, isCompact, getDictionarySourceId()); }
@Test public void testRoundTrip() { int positionCount = 40; // build dictionary BlockBuilder dictionaryBuilder = VARCHAR.createBlockBuilder(null, 4); VARCHAR.writeString(dictionaryBuilder, "alice"); VARCHAR.writeString(dictionaryBuilder, "bob"); VARCHAR.writeString(dictionaryBuilder, "charlie"); VARCHAR.writeString(dictionaryBuilder, "dave"); Block dictionary = dictionaryBuilder.build(); // build ids int[] ids = new int[positionCount]; for (int i = 0; i < 40; i++) { ids[i] = i % 4; } DictionaryBlock dictionaryBlock = new DictionaryBlock(dictionary, ids); DynamicSliceOutput sliceOutput = new DynamicSliceOutput(1024); blockEncodingSerde.writeBlock(sliceOutput, dictionaryBlock); Block actualBlock = blockEncodingSerde.readBlock(sliceOutput.slice().getInput()); assertTrue(actualBlock instanceof DictionaryBlock); DictionaryBlock actualDictionaryBlock = (DictionaryBlock) actualBlock; assertBlockEquals(VARCHAR, actualDictionaryBlock.getDictionary(), dictionary); for (int position = 0; position < actualDictionaryBlock.getPositionCount(); position++) { assertEquals(actualDictionaryBlock.getId(position), ids[position]); } assertEquals(actualDictionaryBlock.getDictionarySourceId(), dictionaryBlock.getDictionarySourceId()); }
assertNotEquals(((DictionaryBlock) page.getBlock(0)).getDictionarySourceId(), ((DictionaryBlock) page.getBlock(1)).getDictionarySourceId()); assertEquals(((DictionaryBlock) page.getBlock(0)).getDictionarySourceId(), ((DictionaryBlock) page.getBlock(2)).getDictionarySourceId());
private Map<DictionaryId, DictionaryBlockIndexes> getRelatedDictionaryBlocks() { Map<DictionaryId, DictionaryBlockIndexes> relatedDictionaryBlocks = new HashMap<>(); for (int i = 0; i < blocks.length; i++) { Block block = blocks[i]; if (block instanceof DictionaryBlock) { DictionaryBlock dictionaryBlock = (DictionaryBlock) block; relatedDictionaryBlocks.computeIfAbsent(dictionaryBlock.getDictionarySourceId(), id -> new DictionaryBlockIndexes()) .addBlock(dictionaryBlock, i); } } return relatedDictionaryBlocks; }
private boolean canProcessDictionary(Page page) { boolean processDictionary = this.processDictionary && maskChannel < 0 && channels.length == 1 && page.getBlock(channels[0]) instanceof DictionaryBlock; if (processDictionary && inputHashChannel.isPresent()) { Block inputHashBlock = page.getBlock(inputHashChannel.get()); DictionaryBlock inputDataBlock = (DictionaryBlock) page.getBlock(channels[0]); verify(inputHashBlock instanceof DictionaryBlock, "data channel is dictionary encoded but hash channel is not"); verify(((DictionaryBlock) inputHashBlock).getDictionarySourceId().equals(inputDataBlock.getDictionarySourceId()), "dictionarySourceIds of data block and hash block do not match"); } return processDictionary; }
DictionaryId newDictionaryId = randomDictionaryId(); for (DictionaryBlock dictionaryBlock : blocks) { if (!firstDictionaryBlock.getDictionarySourceId().equals(dictionaryBlock.getDictionarySourceId())) { throw new IllegalArgumentException("dictionarySourceIds must be the same");
@Override public void writeBlock(BlockEncodingSerde blockEncodingSerde, SliceOutput sliceOutput, Block block) { // The down casts here are safe because it is the block itself the provides this encoding implementation. DictionaryBlock dictionaryBlock = (DictionaryBlock) block; dictionaryBlock = dictionaryBlock.compact(); // positionCount int positionCount = dictionaryBlock.getPositionCount(); sliceOutput.appendInt(positionCount); // dictionary Block dictionary = dictionaryBlock.getDictionary(); blockEncodingSerde.writeBlock(sliceOutput, dictionary); // ids sliceOutput.writeBytes(dictionaryBlock.getIds()); // instance id sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getMostSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getLeastSignificantBits()); sliceOutput.appendLong(dictionaryBlock.getDictionarySourceId().getSequenceId()); }
@Test public void testCompact() throws Exception { Slice[] expectedValues = createExpectedValues(5); DictionaryBlock dictionaryBlock = createDictionaryBlockWithUnreferencedKeys(expectedValues, 10); assertEquals(dictionaryBlock.isCompact(), false); DictionaryBlock compactBlock = dictionaryBlock.compact(); assertNotEquals(dictionaryBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); assertEquals(compactBlock.getDictionary().getPositionCount(), (expectedValues.length / 2) + 1); assertBlock(compactBlock.getDictionary(), new Slice[] { expectedValues[0], expectedValues[1], expectedValues[3] }); assertEquals(compactBlock.getIds(), wrappedIntArray(0, 1, 1, 2, 2, 0, 1, 1, 2, 2)); assertEquals(compactBlock.isCompact(), true); DictionaryBlock reCompactedBlock = compactBlock.compact(); assertEquals(reCompactedBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId()); }
private Block projectColumnarDictionary(Page inputPage, int[] selectedPositions, ProjectionFunction projection, Map<DictionaryId, DictionaryId> dictionarySourceIds) { int inputChannel = getOnlyElement(projection.getInputChannels()); Block[] blocks = new Block[inputPage.getChannelCount()]; if (inputPage.getBlock(inputChannel) instanceof RunLengthEncodedBlock) { RunLengthEncodedBlock rleBlock = (RunLengthEncodedBlock) inputPage.getBlock(inputChannel); BlockBuilder builder = projection.getType().createBlockBuilder(new BlockBuilderStatus(), 1); blocks[inputChannel] = rleBlock.getValue(); projection.project(0, blocks, builder); return new RunLengthEncodedBlock(builder.build(), selectedPositions.length); } Block outputDictionary = projectDictionary(projection, inputPage); int[] outputIds = filterIds(projection, inputPage, selectedPositions); DictionaryBlock dictionaryBlock = (DictionaryBlock) inputPage.getBlock(inputChannel); DictionaryId sourceId = dictionarySourceIds.get(dictionaryBlock.getDictionarySourceId()); if (sourceId == null) { sourceId = randomDictionaryId(); dictionarySourceIds.put(dictionaryBlock.getDictionarySourceId(), sourceId); } return new DictionaryBlock(selectedPositions.length, outputDictionary, wrappedIntArray(outputIds), false, sourceId); }
@Override public Block getPositions(int[] positions, int offset, int length) { checkArrayRange(positions, offset, length); int[] newIds = new int[length]; boolean isCompact = isCompact() && length >= dictionary.getPositionCount(); boolean[] seen = null; if (isCompact) { seen = new boolean[dictionary.getPositionCount()]; } for (int i = 0; i < length; i++) { newIds[i] = getId(positions[offset + i]); if (isCompact) { seen[newIds[i]] = true; } } for (int i = 0; i < dictionary.getPositionCount() && isCompact; i++) { isCompact &= seen[i]; } return new DictionaryBlock(newIds.length, getDictionary(), newIds, isCompact, getDictionarySourceId()); }