protected void commonSetup() throws HiveException { super.commonSetup(); // Semi join specific. VectorMapJoinHashSet baseHashSet = (VectorMapJoinHashSet) vectorMapJoinHashTable; hashSetResults = new VectorMapJoinHashSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashSetResults.length; i++) { hashSetResults[i] = baseHashSet.createHashSetResult(); } allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; }
/** * On close, make sure a partially filled overflow batch gets forwarded. */ @Override public void closeOp(boolean aborted) throws HiveException { super.closeOp(aborted); if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } if (isLogDebugEnabled) { LOG.debug("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed"); } }
/** * Generate optimized results when entire batch key is repeated and it matched the hash map. * * @param batch * The big table batch. * @param hashMapResult * The hash map results for the repeated key. */ protected void generateHashMapResultRepeatedAll(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult) throws IOException, HiveException { int[] selected = batch.selected; if (batch.selectedInUse) { // The selected array is already filled in as we want it. } else { for (int i = 0; i < batch.size; i++) { selected[i] = i; } batch.selectedInUse = true; } int numSel = 0; if (hashMapResult.isSingleRow()) { numSel = generateHashMapResultSingleValue(batch, hashMapResult, batch.selected, 0, batch.size, numSel); } else { generateHashMapResultMultiValue(batch, hashMapResult, batch.selected, 0, batch.size); } batch.size = numSel; }
protected void spillRow(VectorizedRowBatch batch, int batchIndex, int partitionId) throws HiveException, IOException { if (bigTableVectorSerializeRow == null) { setupSpillSerDe(batch); } spillSerializeRow(batch, batchIndex, partitionId); }
/** * Forward the overflow batch and reset the batch. */ protected void forwardOverflow() throws HiveException { forward(overflowBatch, null); overflowBatch.reset(); maybeCheckInterrupt(); }
generateHashMapResultLargeMultiValue( batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount); return; doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); forwardOverflow();
generateHashMapResultLargeMultiValue( batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount); return; doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); forwardOverflow();
forwardOverflow(); doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); forwardOverflowNoReset();
forwardOverflow(); doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); forwardOverflowNoReset();
generateHashMapResultLargeMultiValue( batch, hashMapResult, allMatchs, allMatchesIndex, duplicateCount); return; forwardOverflow();
/** * Forward the overflow batch, but do not reset the batch. */ private void forwardOverflowNoReset() throws HiveException { forward(overflowBatch, null); }
protected void spillBatchRepeated(VectorizedRowBatch batch, VectorMapJoinHashTableResult hashTableResult) throws HiveException, IOException { if (bigTableVectorSerializeRow == null) { setupSpillSerDe(batch); } int[] selected = batch.selected; boolean selectedInUse = batch.selectedInUse; for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); spillSerializeRow(batch, batchIndex, hashTableResult); } }
/** * Forward the big table batch to the children. * * @param batch * The big table batch. */ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException { // Save original projection. int[] originalProjections = batch.projectedColumns; int originalProjectionSize = batch.projectionSize; // Project with the output of our operator. batch.projectionSize = outputProjection.length; batch.projectedColumns = outputProjection; forward(batch, null); // Revert the projected columns back, because batch can be re-used by our parent operators. batch.projectionSize = originalProjectionSize; batch.projectedColumns = originalProjections; }
protected void spillHashMapBatch(VectorizedRowBatch batch, VectorMapJoinHashTableResult[] hashTableResults, int[] spills, int[] spillHashTableResultIndices, int spillCount) throws HiveException, IOException { if (bigTableVectorSerializeRow == null) { setupSpillSerDe(batch); } for (int i = 0; i < spillCount; i++) { int batchIndex = spills[i]; int hashTableResultIndex = spillHashTableResultIndices[i]; VectorMapJoinHashTableResult hashTableResult = hashTableResults[hashTableResultIndex]; spillSerializeRow(batch, batchIndex, hashTableResult); } }
protected void commonSetup() throws HiveException { super.commonSetup(); // Inner big-table only join specific. VectorMapJoinHashMultiSet baseHashMultiSet = (VectorMapJoinHashMultiSet) vectorMapJoinHashTable; hashMultiSetResults = new VectorMapJoinHashMultiSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMultiSetResults.length; i++) { hashMultiSetResults[i] = baseHashMultiSet.createHashMultiSetResult(); } allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; equalKeySeriesValueCounts = new long[VectorizedRowBatch.DEFAULT_SIZE]; equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; }
/** * Generate optimized results when entire batch key is repeated and it matched the hash map. * * @param batch * The big table batch. * @param hashMapResult * The hash map results for the repeated key. */ protected void generateHashMapResultRepeatedAll(VectorizedRowBatch batch, VectorMapJoinHashMapResult hashMapResult) throws IOException, HiveException { int[] selected = batch.selected; if (batch.selectedInUse) { // The selected array is already filled in as we want it. } else { for (int i = 0; i < batch.size; i++) { selected[i] = i; } batch.selectedInUse = true; } int numSel = 0; if (hashMapResult.isSingleRow()) { numSel = generateHashMapResultSingleValue(batch, hashMapResult, batch.selected, 0, batch.size, numSel); } else { generateHashMapResultMultiValue(batch, hashMapResult, batch.selected, 0, batch.size); } batch.size = numSel; }
/** * On close, make sure a partially filled overflow batch gets forwarded. */ @Override public void closeOp(boolean aborted) throws HiveException { super.closeOp(aborted); // NOTE: The closeOp call on super MapJoinOperator can trigger Hybrid Grace additional // NOTE: processing and also FULL OUTER MapJoin non-match Small Table result generation. So, // NOTE: we flush the overflowBatch after the call. // if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } if (LOG.isDebugEnabled()) { LOG.debug("VectorMapJoinInnerLongOperator closeOp " + batchCounter + " batches processed"); } }