private PhysicalExec createBestLeftOuterJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { if (isHashOuterJoinFeasible(context, plan.getRightChild())) { // we can implement left outer join using hash join, using the right operand as the build relation LOG.info("Left Outer Join (" + plan.getPID() +") chooses [Hash Join]."); return new HashLeftOuterJoinExec(context, plan, leftExec, rightExec); } else { //the right operand is too large, so we opt for merge join implementation LOG.info("Left Outer Join (" + plan.getPID() +") chooses [Merge Join]."); return createRightOuterMergeJoinPlan(context, plan, rightExec, leftExec); } }
private PhysicalExec createBestLeftOuterJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { if (isHashOuterJoinFeasible(context, plan.getRightChild())) { // we can implement left outer join using hash join, using the right operand as the build relation LOG.info("Left Outer Join (" + plan.getPID() +") chooses [Hash Join]."); return new HashLeftOuterJoinExec(context, plan, leftExec, rightExec); } else { //the right operand is too large, so we opt for merge join implementation LOG.info("Left Outer Join (" + plan.getPID() +") chooses [Merge Join]."); return createRightOuterMergeJoinPlan(context, plan, rightExec, leftExec); } }
private PhysicalExec createBestRightJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { //if the left operand is small enough => implement it as a left outer hash join with exchanged operators (note: // blocking, but merge join is blocking as well) if (isHashOuterJoinFeasible(context, plan.getLeftChild())){ LOG.info("Right Outer Join (" + plan.getPID() +") chooses [Hash Join]."); return new HashLeftOuterJoinExec(context, plan, rightExec, leftExec); } else { return createRightOuterMergeJoinPlan(context, plan, leftExec, rightExec); } }
private PhysicalExec createBestRightJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { //if the left operand is small enough => implement it as a left outer hash join with exchanged operators (note: // blocking, but merge join is blocking as well) if (isHashOuterJoinFeasible(context, plan.getLeftChild())){ LOG.info("Right Outer Join (" + plan.getPID() +") chooses [Hash Join]."); return new HashLeftOuterJoinExec(context, plan, rightExec, leftExec); } else { return createRightOuterMergeJoinPlan(context, plan, leftExec, rightExec); } }
private PhysicalExec createLeftOuterJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { Enforcer enforcer = context.getEnforcer(); EnforceProperty property = getAlgorithmEnforceProperty(enforcer, plan); if (property != null) { JoinAlgorithm algorithm = property.getJoin().getAlgorithm(); switch (algorithm) { case IN_MEMORY_HASH_JOIN: LOG.info("Left Outer Join (" + plan.getPID() +") chooses [Hash Join]."); return new HashLeftOuterJoinExec(context, plan, leftExec, rightExec); case MERGE_JOIN: //the right operand is too large, so we opt for merge join implementation LOG.info("Left Outer Join (" + plan.getPID() +") chooses [Merge Join]."); return createRightOuterMergeJoinPlan(context, plan, rightExec, leftExec); default: LOG.error("Invalid Left Outer Join Algorithm Enforcer: " + algorithm.name()); LOG.error("Choose a fallback to join algorithm: " + JoinAlgorithm.MERGE_JOIN); return createRightOuterMergeJoinPlan(context, plan, rightExec, leftExec); } } else { return createBestLeftOuterJoinPlan(context, plan, leftExec, rightExec); } }
private PhysicalExec createRightOuterJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { Enforcer enforcer = context.getEnforcer(); EnforceProperty property = getAlgorithmEnforceProperty(enforcer, plan); if (property != null) { JoinAlgorithm algorithm = property.getJoin().getAlgorithm(); switch (algorithm) { case IN_MEMORY_HASH_JOIN: LOG.info("Right Outer Join (" + plan.getPID() +") chooses [Hash Join]."); return new HashLeftOuterJoinExec(context, plan, rightExec, leftExec); case MERGE_JOIN: return createRightOuterMergeJoinPlan(context, plan, leftExec, rightExec); default: LOG.error("Invalid Right Outer Join Algorithm Enforcer: " + algorithm.name()); LOG.error("Choose a fallback to join algorithm: " + JoinAlgorithm.MERGE_JOIN); return createRightOuterMergeJoinPlan(context, plan, leftExec, rightExec); } } else { return createBestRightJoinPlan(context, plan, leftExec, rightExec); } }
private PhysicalExec createRightOuterJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { Enforcer enforcer = context.getEnforcer(); EnforceProperty property = getAlgorithmEnforceProperty(enforcer, plan); if (property != null) { JoinAlgorithm algorithm = property.getJoin().getAlgorithm(); switch (algorithm) { case IN_MEMORY_HASH_JOIN: LOG.info("Right Outer Join (" + plan.getPID() +") chooses [Hash Join]."); return new HashLeftOuterJoinExec(context, plan, rightExec, leftExec); case MERGE_JOIN: return createRightOuterMergeJoinPlan(context, plan, leftExec, rightExec); default: LOG.error("Invalid Right Outer Join Algorithm Enforcer: " + algorithm.name()); LOG.error("Choose a fallback to join algorithm: " + JoinAlgorithm.MERGE_JOIN); return createRightOuterMergeJoinPlan(context, plan, leftExec, rightExec); } } else { return createBestRightJoinPlan(context, plan, leftExec, rightExec); } }
private PhysicalExec createBestInnerJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { boolean inMemoryHashJoin = false; if (checkIfInMemoryInnerJoinIsPossible(context, plan.getLeftChild(), true) || checkIfInMemoryInnerJoinIsPossible(context, plan.getRightChild(), false)) { inMemoryHashJoin = true; } if (inMemoryHashJoin) { LOG.info("Join (" + plan.getPID() +") chooses [In-memory Hash Join]"); // returns two PhysicalExec. smaller one is 0, and larger one is 1. PhysicalExec [] orderedChilds = switchJoinSidesIfNecessary(context, plan, leftExec, rightExec); return new HashJoinExec(context, plan, orderedChilds[1], orderedChilds[0]); } else { return createMergeInnerJoin(context, plan, leftExec, rightExec); } }
private PhysicalExec createBestInnerJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { boolean inMemoryHashJoin = false; if (checkIfInMemoryInnerJoinIsPossible(context, plan.getLeftChild(), true) || checkIfInMemoryInnerJoinIsPossible(context, plan.getRightChild(), false)) { inMemoryHashJoin = true; } if (inMemoryHashJoin) { LOG.info("Join (" + plan.getPID() +") chooses [In-memory Hash Join]"); // returns two PhysicalExec. smaller one is 0, and larger one is 1. PhysicalExec [] orderedChilds = switchJoinSidesIfNecessary(context, plan, leftExec, rightExec); return new HashJoinExec(context, plan, orderedChilds[1], orderedChilds[0]); } else { return createMergeInnerJoin(context, plan, leftExec, rightExec); } }
private PhysicalExec createCrossJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { Enforcer enforcer = context.getEnforcer(); EnforceProperty property = getAlgorithmEnforceProperty(enforcer, plan); if (property != null) { JoinAlgorithm algorithm = property.getJoin().getAlgorithm(); switch (algorithm) { default: // fallback algorithm LOG.error("Invalid Cross Join Algorithm Enforcer: " + algorithm.name()); PhysicalExec [] orderedChilds = switchJoinSidesIfNecessary(context, plan, leftExec, rightExec); return new HashJoinExec(context, plan, orderedChilds[1], orderedChilds[0]); } } else { LOG.info("Join (" + plan.getPID() +") chooses [In-memory Hash Join]"); // returns two PhysicalExec. smaller one is 0, and larger one is 1. PhysicalExec [] orderedChilds = switchJoinSidesIfNecessary(context, plan, leftExec, rightExec); return new HashJoinExec(context, plan, orderedChilds[1], orderedChilds[0]); } }
private HashFullOuterJoinExec createFullOuterHashJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { String [] leftLineage = PlannerUtil.getRelationLineage(plan.getLeftChild()); String [] rightLineage = PlannerUtil.getRelationLineage(plan.getRightChild()); long outerSize2 = estimateSizeRecursive(context, leftLineage); long innerSize2 = estimateSizeRecursive(context, rightLineage); PhysicalExec selectedRight; PhysicalExec selectedLeft; // HashJoinExec loads the smaller relation to memory. if (outerSize2 <= innerSize2) { selectedLeft = leftExec; selectedRight = rightExec; } else { selectedLeft = rightExec; selectedRight = leftExec; } LOG.info("Full Outer Join (" + plan.getPID() + ") chooses [Hash Join]"); return new HashFullOuterJoinExec(context, plan, selectedRight, selectedLeft); }
private HashFullOuterJoinExec createFullOuterHashJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { String [] leftLineage = PlannerUtil.getRelationLineage(plan.getLeftChild()); String [] rightLineage = PlannerUtil.getRelationLineage(plan.getRightChild()); long outerSize2 = estimateSizeRecursive(context, leftLineage); long innerSize2 = estimateSizeRecursive(context, rightLineage); PhysicalExec selectedRight; PhysicalExec selectedLeft; // HashJoinExec loads the smaller relation to memory. if (outerSize2 <= innerSize2) { selectedLeft = leftExec; selectedRight = rightExec; } else { selectedLeft = rightExec; selectedRight = leftExec; } LOG.info("Full Outer Join (" + plan.getPID() + ") chooses [Hash Join]"); return new HashFullOuterJoinExec(context, plan, selectedRight, selectedLeft); }
private PhysicalExec createCrossJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { Enforcer enforcer = context.getEnforcer(); EnforceProperty property = getAlgorithmEnforceProperty(enforcer, plan); if (property != null) { JoinAlgorithm algorithm = property.getJoin().getAlgorithm(); switch (algorithm) { default: // fallback algorithm LOG.error("Invalid Cross Join Algorithm Enforcer: " + algorithm.name()); PhysicalExec [] orderedChilds = switchJoinSidesIfNecessary(context, plan, leftExec, rightExec); return new HashJoinExec(context, plan, orderedChilds[1], orderedChilds[0]); } } else { LOG.info("Join (" + plan.getPID() +") chooses [In-memory Hash Join]"); // returns two PhysicalExec. smaller one is 0, and larger one is 1. PhysicalExec [] orderedChilds = switchJoinSidesIfNecessary(context, plan, leftExec, rightExec); return new HashJoinExec(context, plan, orderedChilds[1], orderedChilds[0]); } }
@Override public LogicalNode visitJoin(GlobalPlanContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode node, Stack<LogicalNode> stack) throws TajoException { LogicalNode leftChild = visit(context, plan, block, node.getLeftChild(), stack); ExecutionBlock leftChildBlock = context.execBlockMap.get(leftChild.getPID()); LogicalNode rightChild = visit(context, plan, block, node.getRightChild(), stack); ExecutionBlock rightChildBlock = context.execBlockMap.get(rightChild.getPID()); if (node.getJoinType() == JoinType.LEFT_OUTER) { leftChildBlock.setPreservedRow(); rightChildBlock.setNullSuppllying(); } else if (node.getJoinType() == JoinType.RIGHT_OUTER) { leftChildBlock.setNullSuppllying(); rightChildBlock.setPreservedRow(); } else if (node.getJoinType() == JoinType.FULL_OUTER) { leftChildBlock.setPreservedRow(); leftChildBlock.setNullSuppllying(); rightChildBlock.setPreservedRow(); rightChildBlock.setNullSuppllying(); } ExecutionBlock newExecBlock = buildJoinPlan(context, node, leftChildBlock, rightChildBlock); context.execBlockMap.put(node.getPID(), newExecBlock); return node; }
private PhysicalExec createRightOuterMergeJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { //the left operand is too large, so opt for merge join implementation LOG.info("Right Outer Join (" + plan.getPID() +") chooses [Merge Join]."); SortSpec[][] sortSpecs2 = PlannerUtil.getSortKeysFromJoinQual( plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode2 = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode2.setSortSpecs(sortSpecs2[0]); leftSortNode2.setInSchema(leftExec.getSchema()); leftSortNode2.setOutSchema(leftExec.getSchema()); ExternalSortExec outerSort2 = new ExternalSortExec(context, leftSortNode2, leftExec); SortNode rightSortNode2 = LogicalPlan.createNodeWithoutPID(SortNode.class); rightSortNode2.setSortSpecs(sortSpecs2[1]); rightSortNode2.setInSchema(rightExec.getSchema()); rightSortNode2.setOutSchema(rightExec.getSchema()); ExternalSortExec innerSort2 = new ExternalSortExec(context, rightSortNode2, rightExec); return new RightOuterMergeJoinExec(context, plan, outerSort2, innerSort2, sortSpecs2[0], sortSpecs2[1]); }
private MergeJoinExec createMergeInnerJoin(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { SortSpec[][] sortSpecs = PlannerUtil.getSortKeysFromJoinQual( plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs[0]); leftSortNode.setInSchema(leftExec.getSchema()); leftSortNode.setOutSchema(leftExec.getSchema()); ExternalSortExec outerSort = new ExternalSortExec(context, leftSortNode, leftExec); SortNode rightSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); rightSortNode.setSortSpecs(sortSpecs[1]); rightSortNode.setInSchema(rightExec.getSchema()); rightSortNode.setOutSchema(rightExec.getSchema()); ExternalSortExec innerSort = new ExternalSortExec(context, rightSortNode, rightExec); LOG.info("Join (" + plan.getPID() +") chooses [Merge Join]"); return new MergeJoinExec(context, plan, outerSort, innerSort, sortSpecs[0], sortSpecs[1]); }
private MergeJoinExec createMergeInnerJoin(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { SortSpec[][] sortSpecs = PlannerUtil.getSortKeysFromJoinQual( plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs[0]); leftSortNode.setInSchema(leftExec.getSchema()); leftSortNode.setOutSchema(leftExec.getSchema()); ExternalSortExec outerSort = new ExternalSortExec(context, leftSortNode, leftExec); SortNode rightSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); rightSortNode.setSortSpecs(sortSpecs[1]); rightSortNode.setInSchema(rightExec.getSchema()); rightSortNode.setOutSchema(rightExec.getSchema()); ExternalSortExec innerSort = new ExternalSortExec(context, rightSortNode, rightExec); LOG.info("Join (" + plan.getPID() +") chooses [Merge Join]"); return new MergeJoinExec(context, plan, outerSort, innerSort, sortSpecs[0], sortSpecs[1]); }
private PhysicalExec createRightOuterMergeJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { //the left operand is too large, so opt for merge join implementation LOG.info("Right Outer Join (" + plan.getPID() +") chooses [Merge Join]."); SortSpec[][] sortSpecs2 = PlannerUtil.getSortKeysFromJoinQual( plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode2 = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode2.setSortSpecs(sortSpecs2[0]); leftSortNode2.setInSchema(leftExec.getSchema()); leftSortNode2.setOutSchema(leftExec.getSchema()); ExternalSortExec outerSort2 = new ExternalSortExec(context, leftSortNode2, leftExec); SortNode rightSortNode2 = LogicalPlan.createNodeWithoutPID(SortNode.class); rightSortNode2.setSortSpecs(sortSpecs2[1]); rightSortNode2.setInSchema(rightExec.getSchema()); rightSortNode2.setOutSchema(rightExec.getSchema()); ExternalSortExec innerSort2 = new ExternalSortExec(context, rightSortNode2, rightExec); return new RightOuterMergeJoinExec(context, plan, outerSort2, innerSort2, sortSpecs2[0], sortSpecs2[1]); }
private MergeFullOuterJoinExec createFullOuterMergeJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { // if size too large, full outer merge join implementation LOG.info("Full Outer Join (" + plan.getPID() +") chooses [Merge Join]"); SortSpec[][] sortSpecs3 = PlannerUtil.getSortKeysFromJoinQual(plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs3[0]); leftSortNode.setInSchema(leftExec.getSchema()); leftSortNode.setOutSchema(leftExec.getSchema()); ExternalSortExec outerSort3 = new ExternalSortExec(context, leftSortNode, leftExec); SortNode rightSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); rightSortNode.setSortSpecs(sortSpecs3[1]); rightSortNode.setInSchema(rightExec.getSchema()); rightSortNode.setOutSchema(rightExec.getSchema()); ExternalSortExec innerSort3 = new ExternalSortExec(context, rightSortNode, rightExec); return new MergeFullOuterJoinExec(context, plan, outerSort3, innerSort3, sortSpecs3[0], sortSpecs3[1]); }
private MergeFullOuterJoinExec createFullOuterMergeJoinPlan(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { // if size too large, full outer merge join implementation LOG.info("Full Outer Join (" + plan.getPID() +") chooses [Merge Join]"); SortSpec[][] sortSpecs3 = PlannerUtil.getSortKeysFromJoinQual(plan.getJoinQual(), leftExec.getSchema(), rightExec.getSchema()); SortNode leftSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); leftSortNode.setSortSpecs(sortSpecs3[0]); leftSortNode.setInSchema(leftExec.getSchema()); leftSortNode.setOutSchema(leftExec.getSchema()); ExternalSortExec outerSort3 = new ExternalSortExec(context, leftSortNode, leftExec); SortNode rightSortNode = LogicalPlan.createNodeWithoutPID(SortNode.class); rightSortNode.setSortSpecs(sortSpecs3[1]); rightSortNode.setInSchema(rightExec.getSchema()); rightSortNode.setOutSchema(rightExec.getSchema()); ExternalSortExec innerSort3 = new ExternalSortExec(context, rightSortNode, rightExec); return new MergeFullOuterJoinExec(context, plan, outerSort3, innerSort3, sortSpecs3[0], sortSpecs3[1]); }