private List<Byte> joinObject(int[] smallestPos) throws HiveException { List<Byte> needFetchList = new ArrayList<Byte>(); byte index = (byte) (smallestPos.length - 1); for (; index >= 0; index--) { if (smallestPos[index] > 0 || keyWritables[index] == null) { putDummyOrEmpty(index); continue; } storage[index] = candidateStorage[index]; needFetchList.add(index); if (smallestPos[index] < 0) { break; } } for (index--; index >= 0; index--) { putDummyOrEmpty(index); } checkAndGenObject(); for (Byte pos : needFetchList) { this.candidateStorage[pos].clearRows(); this.keyWritables[pos] = null; } return needFetchList; }
private boolean processKey(byte alias, List<Object> key) throws HiveException { List<Object> keyWritable = keyWritables[alias]; if (keyWritable == null) { //the first group. keyWritables[alias] = key; return false; } else { int cmp = compareKeys(key, keyWritable); if (cmp != 0) { nextKeyWritables[alias] = key; return true; } return false; } }
if (firstFetchHappened) { joinFinalLeftData(); String alias = entry.getKey(); MergeQueue mergeQueue = entry.getValue(); setUpFetchContexts(alias, mergeQueue); fetchNextGroup(pos); List<Object> key = smbJoinComputeKeys(row, alias); List<Object> value = getFilteredValue(alias, row); boolean nextKeyGroup = processKey(alias, key); if (nextKeyGroup) { reportProgress(); numMapRowsRead++; List<Byte> smallestPos = null; do { smallestPos = joinOneGroup();
private List<Byte> joinOneGroup() throws HiveException { int[] smallestPos = findSmallestKey(); List<Byte> listOfNeedFetchNext = null; if(smallestPos != null) { listOfNeedFetchNext = joinObject(smallestPos); if (listOfNeedFetchNext.size() > 0) { // listOfNeedFetchNext contains all tables that we have joined data in their // candidateStorage, and we need to clear candidate storage and promote their // nextGroupStorage to candidateStorage and fetch data until we reach a // new group. for (Byte b : listOfNeedFetchNext) { fetchNextGroup(b); } } } return listOfNeedFetchNext; }
private void fetchNextGroup(Byte t) throws HiveException { if (foundNextKeyGroup[t]) { // first promote the next group to be the current group if we reached a // new group in the previous fetch if (this.nextKeyWritables[t] != null) { promoteNextGroupToCandidate(t); } else { this.keyWritables[t] = null; this.candidateStorage[t] = null; this.nextGroupStorage[t] = null; } foundNextKeyGroup[t] = false; } //for the big table, we only need to promote the next group to the current group. if(t == posBigTable) { return; } //for tables other than the big table, we need to fetch more data until reach a new group or done. while (!foundNextKeyGroup[t]) { if (fetchDone[t]) { break; } fetchOneRow(t); } if (!foundNextKeyGroup[t] && fetchDone[t]) { this.nextKeyWritables[t] = null; } }
@Override public void initializeLocalWork(Configuration hconf) throws HiveException { initializeMapredLocalWork(this.getConf(), hconf, this.getConf().getLocalWork(), LOG); super.initializeLocalWork(hconf); }
throws SemanticException { SMBJoinDesc smbJoinDesc = smbJoinOp.getConf(); List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0)); TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils RowSchema joinRS = smbJoinOp.getSchema(); smbJoinOp.getCompilationOpContext(), mapJoinDesc, joinRS, new ArrayList<Operator<? extends OperatorDesc>>()); List<Operator<? extends OperatorDesc>> childOps = smbJoinOp.getChildOperators(); for (Operator<? extends OperatorDesc> childOp : childOps) { childOp.replaceParent(smbJoinOp, mapJoinOp); smbJoinOp.setChildOperators(null); List<Operator<? extends OperatorDesc>> parentOps = smbJoinOp.getParentOperators(); for (Operator<? extends OperatorDesc> parentOp : parentOps) { parentOp.replaceChild(smbJoinOp, mapJoinOp); smbJoinOp.setParentOperators(null);
SMBMapJoinOperator smbJop = new SMBMapJoinOperator(mapJoinOp); SMBJoinDesc smbJoinDesc = new SMBJoinDesc(mapJoinOp.getConf()); smbJop.setConf(smbJoinDesc); HashMap<Byte, String> tagToAlias = new HashMap<Byte, String>(); for (int i = 0; i < srcs.length; i++) { smbJop.getParentOperators().remove(i); smbJop.getParentOperators().add(i, dummyStoreOp); smbJop.getConf().setQBJoinTreeProps(mapJoinOp.getConf());
if (firstFetchHappened) { joinFinalLeftData(); FetchOperator fetchOp = entry.getValue(); fetchOp.clearFetchContext(); setUpFetchOpContext(fetchOp, alias); fetchNextGroup(t); boolean nextKeyGroup = processKey(alias, key); if (nextKeyGroup) { reportProgress(); numMapRowsRead++; List<Byte> smallestPos = null; do { smallestPos = joinOneGroup();
String alias = entry.getKey(); MergeQueue mergeQueue = entry.getValue(); setUpFetchContexts(alias, mergeQueue); fetchNextGroup(pos); joinFinalLeftData();
private boolean validateSMBMapJoinOperator(SMBMapJoinOperator op) { SMBJoinDesc desc = op.getConf(); // Validation is the same as for map join, since the 'small' tables are not vectorized return validateMapJoinDesc(desc); }
MapredLocalWork localWork = smbJoinOp.getConf().getLocalWork(); for (Operator<? extends OperatorDesc> parentOp : smbJoinOp.getParentOperators()) { if (parentOp instanceof DummyStoreOperator) { Operator<? extends OperatorDesc> grandParentOp = parentOp.getParentOperators().get(0); smbJoinOp.replaceParent(parentOp, grandParentOp); grandParentOp.setChildOperators(parentOp.getChildOperators()); parentOp.setParentOperators(null);
/** * testValidateSMBJoinOperator validates that the SMB join operator can be vectorized. */ @Test public void testValidateSMBJoinOperator() { SMBMapJoinOperator map = new SMBMapJoinOperator(new CompilationOpContext()); SMBJoinDesc mjdesc = new SMBJoinDesc(); prepareAbstractMapJoin(map, mjdesc); map.setConf(mjdesc); Vectorizer vectorizer = new Vectorizer(); vectorizer.testSetCurrentBaseWork(new MapWork()); // UNDONE // Assert.assertTrue(vectorizer.validateMapWorkOperator(map, null, false)); }
@Test public void testOperatorNames() throws Exception { assertEquals(SelectOperator.getOperatorName(), new SelectOperator().getName()); assertEquals(SelectOperator.getOperatorName(), new VectorSelectOperator().getName()); assertEquals(GroupByOperator.getOperatorName(), new GroupByOperator().getName()); assertEquals(GroupByOperator.getOperatorName(), new VectorGroupByOperator().getName()); assertEquals(FilterOperator.getOperatorName(), new FilterOperator().getName()); assertEquals(FilterOperator.getOperatorName(), new VectorFilterOperator().getName()); assertEquals(LimitOperator.getOperatorName(), new LimitOperator().getName()); assertEquals(LimitOperator.getOperatorName(), new VectorLimitOperator().getName()); assertEquals(MapOperator.getOperatorName(), new MapOperator().getName()); assertEquals(MapOperator.getOperatorName(), new VectorMapOperator().getName()); assertEquals(MapJoinOperator.getOperatorName(), new MapJoinOperator().getName()); assertEquals(MapJoinOperator.getOperatorName(), new VectorMapJoinOperator().getName()); assertEquals(AppMasterEventOperator.getOperatorName(), new AppMasterEventOperator().getName()); assertEquals(AppMasterEventOperator.getOperatorName(), new VectorAppMasterEventOperator().getName()); assertEquals(SMBMapJoinOperator.getOperatorName(), new SMBMapJoinOperator().getName()); assertEquals(SMBMapJoinOperator.getOperatorName(), new VectorSMBMapJoinOperator().getName()); assertEquals(MapJoinOperator.getOperatorName(), new VectorMapJoinOuterFilteredOperator().getName()); assertEquals(SparkHashTableSinkOperator.getOperatorName(), new SparkHashTableSinkOperator().getName()); assertEquals(SparkHashTableSinkOperator.getOperatorName(), new VectorSparkHashTableSinkOperator().getName()); assertEquals(SparkPartitionPruningSinkOperator.getOperatorName(), new SparkPartitionPruningSinkOperator().getName()); assertEquals(SparkPartitionPruningSinkOperator.getOperatorName(), new VectorSparkPartitionPruningSinkOperator().getName()); }
private List<Byte> joinObject(int smallestPos) throws HiveException { List<Byte> needFetchList = new ArrayList<Byte>(); ArrayList<Object> smallKey = keyWritables[smallestPos]; needFetchList.add((byte)smallestPos); this.storage.put((byte) smallestPos, this.candidateStorage[smallestPos]); for (Byte i : order) { if ((byte) smallestPos == i) { continue; } ArrayList<Object> key = keyWritables[i]; if (key == null) { putDummyOrEmpty(i); } else { int cmp = compareKeys(key, smallKey); if (cmp == 0) { this.storage.put((byte) i, this.candidateStorage[i]); needFetchList.add(i); continue; } else { putDummyOrEmpty(i); } } } checkAndGenObject(); for (Byte pos : needFetchList) { this.candidateStorage[pos].clear(); this.keyWritables[pos] = null; } return needFetchList; }
SMBMapJoinOperator newSMBJoinOp = getSMBMapJoinOp(currJoinWork); currWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin()); currWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc()); currWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases()); currJoinWork.getMapWork().setLeftInputJoin(originalSMBJoinOp.getConf().isLeftInputJoin()); currJoinWork.getMapWork().setBaseSrc(originalSMBJoinOp.getConf().getBaseSrc()); currJoinWork.getMapWork().setMapAliases(originalSMBJoinOp.getConf().getMapAliases()); SMBJoinDesc originalSMBJoinDesc = originalSMBJoinOp.getConf(); Byte[] order = originalSMBJoinDesc.getTagOrder(); int numAliases = order.length; Operator<?> parentOp = originalSMBJoinOp.getParentOperators().get(bigTablePosition); Set<String> aliases = GenMapRedUtils.findAliases(mapWork, parentOp);
private void setUpFetchContexts(String alias, MergeQueue mergeQueue) throws HiveException { mergeQueue.clearFetchContext(); Path currentInputPath = getExecContext().getCurrentInputPath(); BucketMapJoinContext bucketMatcherCxt = localWork.getBucketMapjoinContext(); Class<? extends BucketMatcher> bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass(); BucketMatcher bucketMatcher = ReflectionUtil.newInstance(bucketMatcherCls, null); getExecContext().setFileId(bucketMatcherCxt.createFileId(currentInputPath.toString())); if (LOG.isInfoEnabled()) { LOG.info("set task id: " + getExecContext().getFileId()); } bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt .getAliasBucketFileNameMapping()); List<Path> aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputPath.toString(), bucketMatcherCxt.getMapJoinBigTableAlias(), alias); mergeQueue.setupContext(aliasFiles); }
/** * Implements the getName function for the Node Interface. * * @return the name of the operator */ @Override public String getName() { return getOperatorName(); }
FetchOperator fetchOp = entry.getValue(); fetchOp.clearFetchContext(); setUpFetchOpContext(fetchOp, alias); fetchNextGroup(t); joinFinalLeftData();
throws SemanticException { SMBJoinDesc smbJoinDesc = smbJoinOp.getConf(); List<ExprNodeDesc> keyCols = smbJoinDesc.getKeys().get(Byte.valueOf((byte) 0)); TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(hconf, PlanUtils mapJoinDesc.setColumnExprMap(smbJoinDesc.getColumnExprMap()); RowSchema joinRS = smbJoinOp.getSchema(); smbJoinOp.getCompilationOpContext(), mapJoinDesc, joinRS, new ArrayList<Operator<? extends OperatorDesc>>()); List<Operator<? extends OperatorDesc>> childOps = smbJoinOp.getChildOperators(); for (Operator<? extends OperatorDesc> childOp : childOps) { childOp.replaceParent(smbJoinOp, mapJoinOp); smbJoinOp.setChildOperators(null); List<Operator<? extends OperatorDesc>> parentOps = smbJoinOp.getParentOperators(); for (Operator<? extends OperatorDesc> parentOp : parentOps) { parentOp.replaceChild(smbJoinOp, mapJoinOp); smbJoinOp.setParentOperators(null);