static GroupByHash createGroupByHash( List<? extends Type> hashTypes, int[] hashChannels, Optional<Integer> inputHashChannel, int expectedSize, boolean processDictionary, JoinCompiler joinCompiler, UpdateMemory updateMemory) { if (hashTypes.size() == 1 && hashTypes.get(0).equals(BIGINT) && hashChannels.length == 1) { return new BigintGroupByHash(hashChannels[0], inputHashChannel.isPresent(), expectedSize, updateMemory); } return new MultiChannelGroupByHash(hashTypes, hashChannels, inputHashChannel, expectedSize, processDictionary, joinCompiler, updateMemory); }
private int putIfAbsent(int position, Page page, long rawHash) { int hashPosition = (int) getHashPosition(rawHash, mask); // look for an empty slot or a slot containing this key int groupId = -1; while (groupAddressByHash[hashPosition] != -1) { if (positionNotDistinctFromCurrentRow(groupAddressByHash[hashPosition], hashPosition, position, page, (byte) rawHash, channels)) { // found an existing slot for this key groupId = groupIdsByHash[hashPosition]; break; } // increment position and mask to handle wrap around hashPosition = (hashPosition + 1) & mask; hashCollisions++; } // did we find an existing group? if (groupId < 0) { groupId = addNewGroup(hashPosition, position, page, rawHash); } return groupId; }
(calculateMaxFill(newCapacity) - maxFill) * Long.BYTES + currentPageSizeInBytes; if (!updateMemory.update()) { expectedHashCollisions += estimateNumberOfHashCollisions(getGroupCount(), hashCapacity); long rawHash = hashPosition(address); int pos = (int) getHashPosition(rawHash, newMask); while (newKey[pos] != -1) { pos = (pos + 1) & newMask; this.maxFill = calculateMaxFill(newCapacity); this.groupAddressByHash = newKey; this.rawHashByHashPosition = rawHashes;
@Override public boolean contains(int position, Page page, int[] hashChannels) { long rawHash = hashStrategy.hashRow(position, page); int hashPosition = (int) getHashPosition(rawHash, mask); // look for a slot containing this key while (groupAddressByHash[hashPosition] != -1) { if (positionNotDistinctFromCurrentRow(groupAddressByHash[hashPosition], hashPosition, position, page, (byte) rawHash, hashChannels)) { // found an existing slot for this key return true; } // increment position and mask to handle wrap around hashPosition = (hashPosition + 1) & mask; } return false; }
@Override public Work<GroupByIdBlock> getGroupIds(Page page) { currentPageSizeInBytes = page.getRetainedSizeInBytes(); if (isRunLengthEncoded(page)) { return new GetRunLengthEncodedGroupIdsWork(page); } if (canProcessDictionary(page)) { return new GetDictionaryGroupIdsWork(page); } return new GetNonDictionaryGroupIdsWork(page); }
private void addDictionaryPage(Page page) { verify(canProcessDictionary(page), "invalid call to addDictionaryPage"); DictionaryBlock dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]); updateDictionaryLookBack(dictionaryBlock.getDictionary()); Page dictionaryPage = createPageWithExtractedDictionary(page); for (int i = 0; i < page.getPositionCount(); i++) { int positionInDictionary = dictionaryBlock.getId(i); getGroupId(hashGenerator, dictionaryPage, positionInDictionary); } }
int pos = getHashPosition(hashPosition(address), newMask); while (newKey[pos] != -1) { pos = (pos + 1) & newMask; this.maxFill = calculateMaxFill(newCapacity); this.groupAddressByHash = newKey; this.groupIdsByHash = newValue;
hashStrategy = pagesHashStrategyFactory.createPagesHashStrategy(this.channelBuilders, this.precomputedHashChannel); startNewPage(); maxFill = calculateMaxFill(hashCapacity); mask = hashCapacity - 1; groupAddressByHash = new long[hashCapacity];
private int putIfAbsent(int position, Page page, int rawHash) { int hashPosition = getHashPosition(rawHash, mask); // look for an empty slot or a slot containing this key int groupId = -1; while (groupAddressByHash[hashPosition] != -1) { long address = groupAddressByHash[hashPosition]; if (positionEqualsCurrentRow(decodeSliceIndex(address), decodePosition(address), position, page)) { // found an existing slot for this key groupId = groupIdsByHash[hashPosition]; break; } // increment position and mask to handle wrap around hashPosition = (hashPosition + 1) & mask; } // did we find an existing group? if (groupId < 0) { groupId = addNewGroup(hashPosition, position, page, rawHash); } return groupId; }
@Override public void addPage(Page page) { Block maskBlock = null; if (maskChannel >= 0) { maskBlock = page.getBlock(maskChannel); } if (canProcessDictionary(page)) { addDictionaryPage(page); return; } // get the group id for each position int positionCount = page.getPositionCount(); for (int position = 0; position < positionCount; position++) { // skip masked rows if (maskBlock != null && !BOOLEAN.getBoolean(maskBlock, position)) { continue; } // get the group for the current row putIfAbsent(position, page); } }
@Override public GroupByIdBlock getGroupIds(Page page) { int positionCount = page.getPositionCount(); // we know the exact size required for the block BlockBuilder blockBuilder = BIGINT.createFixedSizeBlockBuilder(positionCount); Block maskBlock = null; if (maskChannel >= 0) { maskBlock = page.getBlock(maskChannel); } if (canProcessDictionary(page)) { Block groupIds = processDictionary(page); return new GroupByIdBlock(nextGroupId, groupIds); } // get the group id for each position for (int position = 0; position < positionCount; position++) { // skip masked rows if (maskBlock != null && !BOOLEAN.getBoolean(maskBlock, position)) { blockBuilder.appendNull(); continue; } // get the group for the current row int groupId = putIfAbsent(position, page); // output the group id for this row BIGINT.writeLong(blockBuilder, groupId); } return new GroupByIdBlock(nextGroupId, blockBuilder.build()); }
startNewPage(); if (needRehash()) { tryRehash();
private int putIfAbsent(int position, Page page) { long rawHash = hashGenerator.hashPosition(position, page); return putIfAbsent(position, page, rawHash); }
private long hashPosition(long sliceAddress) { int sliceIndex = decodeSliceIndex(sliceAddress); int position = decodePosition(sliceAddress); if (precomputedHashChannel.isPresent()) { return getRawHash(sliceIndex, position); } return hashStrategy.hashPosition(sliceIndex, position); }
@Override public boolean contains(int position, Page page, int[] hashChannels) { int rawHash = hashStrategy.hashRow(position, page.getBlocks()); int hashPosition = getHashPosition(rawHash, mask); // look for a slot containing this key while (groupAddressByHash[hashPosition] != -1) { long address = groupAddressByHash[hashPosition]; if (hashStrategy.positionEqualsRow(decodeSliceIndex(address), decodePosition(address), position, page, hashChannels)) { // found an existing slot for this key return true; } // increment position and mask to handle wrap around hashPosition = (hashPosition + 1) & mask; } return false; }
private Block processDictionary(Page page) { verify(canProcessDictionary(page), "invalid call to processDictionary"); DictionaryBlock dictionaryBlock = (DictionaryBlock) page.getBlock(channels[0]); updateDictionaryLookBack(dictionaryBlock.getDictionary()); Page dictionaryPage = createPageWithExtractedDictionary(page); BlockBuilder blockBuilder = BIGINT.createFixedSizeBlockBuilder(page.getPositionCount()); for (int i = 0; i < page.getPositionCount(); i++) { int positionInDictionary = dictionaryBlock.getId(i); int groupId = getGroupId(hashGenerator, dictionaryPage, positionInDictionary); BIGINT.writeLong(blockBuilder, groupId); } verify(blockBuilder.getPositionCount() == page.getPositionCount(), "invalid position count"); return blockBuilder.build(); }
hashStrategy = pagesHashStrategyFactory.createPagesHashStrategy(this.channelBuilders, this.precomputedHashChannel); startNewPage(); maxFill = calculateMaxFill(hashSize); mask = hashSize - 1; groupAddressByHash = new long[hashSize];
@Override public Work<?> addPage(Page page) { currentPageSizeInBytes = page.getRetainedSizeInBytes(); if (isRunLengthEncoded(page)) { return new AddRunLengthEncodedPageWork(page); } if (canProcessDictionary(page)) { return new AddDictionaryPageWork(page); } return new AddNonDictionaryPageWork(page); }
private int getGroupId(HashGenerator hashGenerator, Page page, int positionInDictionary) { if (dictionaryLookBack.isProcessed(positionInDictionary)) { return dictionaryLookBack.getGroupId(positionInDictionary); } int groupId = putIfAbsent(positionInDictionary, page, hashGenerator.hashPosition(positionInDictionary, page)); dictionaryLookBack.setProcessed(positionInDictionary, groupId); return groupId; }