public HashJoinExec(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) { super(context, plan, leftExec, rightExec); isCrossJoin = plan.getJoinType().equals(JoinType.CROSS); }
public HashJoinExec(TaskAttemptContext context, JoinNode plan, PhysicalExec leftExec, PhysicalExec rightExec) { super(context, plan, leftExec, rightExec); isCrossJoin = plan.getJoinType().equals(JoinType.CROSS); }
/** * Check the given tuple is able to be loaded into the hash table or not. * When the plan is full outer join, every tuple including null values should be loaded * because both input tables of the join are preserved-row relations as well as null-supplying relations. * * Otherwise, except for anti join, only the tuples not containing null values should be loaded. * * For the case of anti join, the right table is expected to be empty if there are any null values. * * @param plan * @param tuple * @return */ private static boolean isLoadable(JoinNode plan, Tuple tuple) { return plan.getJoinType().equals(JoinType.FULL_OUTER) || Arrays.stream(tuple.getValues()).noneMatch(Datum::isNull); }
@Override public LogicalNode visitJoin(StringBuilder sb, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode joinNode, Stack<LogicalNode> stack) throws TajoException { stack.push(joinNode); sb.append("("); visit(sb, plan, block, joinNode.getLeftChild(), stack); sb.append(" ").append(getJoinNotation(joinNode.getJoinType())).append(" "); visit(sb, plan, block, joinNode.getRightChild(), stack); sb.append(")"); stack.pop(); return joinNode; }
@Override public LogicalNode visitJoin(StringBuilder sb, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode joinNode, Stack<LogicalNode> stack) throws TajoException { stack.push(joinNode); sb.append("("); visit(sb, plan, block, joinNode.getLeftChild(), stack); sb.append(" ").append(getJoinNotation(joinNode.getJoinType())).append(" "); visit(sb, plan, block, joinNode.getRightChild(), stack); sb.append(")"); stack.pop(); return joinNode; }
private static Set<EvalNode> extractNonPushableJoinQuals(final LogicalPlan plan, final LogicalPlan.QueryBlock block, final JoinNode joinNode, final Set<EvalNode> onPredicates, final Set<EvalNode> wherePredicates) throws TajoException { Set<EvalNode> nonPushableQuals = new HashSet<>(); // TODO: non-equi theta join quals must not be pushed until TAJO-742 is resolved. nonPushableQuals.addAll(extractNonEquiThetaJoinQuals(wherePredicates, block, joinNode)); nonPushableQuals.addAll(extractNonEquiThetaJoinQuals(onPredicates, block, joinNode)); // for outer joins if (PlannerUtil.isOuterJoinType(joinNode.getJoinType())) { nonPushableQuals.addAll(extractNonPushableOuterJoinQuals(plan, onPredicates, wherePredicates, joinNode)); } return nonPushableQuals; }
private static Set<EvalNode> extractNonPushableJoinQuals(final LogicalPlan plan, final LogicalPlan.QueryBlock block, final JoinNode joinNode, final Set<EvalNode> onPredicates, final Set<EvalNode> wherePredicates) throws TajoException { Set<EvalNode> nonPushableQuals = new HashSet<>(); // TODO: non-equi theta join quals must not be pushed until TAJO-742 is resolved. nonPushableQuals.addAll(extractNonEquiThetaJoinQuals(wherePredicates, block, joinNode)); nonPushableQuals.addAll(extractNonEquiThetaJoinQuals(onPredicates, block, joinNode)); // for outer joins if (PlannerUtil.isOuterJoinType(joinNode.getJoinType())) { nonPushableQuals.addAll(extractNonPushableOuterJoinQuals(plan, onPredicates, wherePredicates, joinNode)); } return nonPushableQuals; }
protected TupleMap<TupleList> buildRightToHashTable() throws IOException { if (plan.getJoinType().equals(JoinType.CROSS)) { return buildRightToHashTableForCrossJoin(); } else { return buildRightToHashTableForNonCrossJoin(); } }
protected TupleMap<TupleList> buildRightToHashTable() throws IOException { if (plan.getJoinType().equals(JoinType.CROSS)) { return buildRightToHashTableForCrossJoin(); } else { return buildRightToHashTableForNonCrossJoin(); } }
/** * Check the given tuple is able to be loaded into the hash table or not. * When the plan is full outer join, every tuple including null values should be loaded * because both input tables of the join are preserved-row relations as well as null-supplying relations. * * Otherwise, except for anti join, only the tuples not containing null values should be loaded. * * For the case of anti join, the right table is expected to be empty if there are any null values. * * @param plan * @param tuple * @return */ private static boolean isLoadable(JoinNode plan, Tuple tuple) { if (plan.getJoinType().equals(JoinType.FULL_OUTER)) { return true; } else { for (Datum datum : tuple.getValues()) { if (datum.isNull()) { return false; } } return true; } }
@Override public LogicalNode visitJoin(GlobalPlanContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode node, Stack<LogicalNode> stack) throws TajoException { LogicalNode leftChild = visit(context, plan, block, node.getLeftChild(), stack); ExecutionBlock leftChildBlock = context.execBlockMap.get(leftChild.getPID()); LogicalNode rightChild = visit(context, plan, block, node.getRightChild(), stack); ExecutionBlock rightChildBlock = context.execBlockMap.get(rightChild.getPID()); if (node.getJoinType() == JoinType.LEFT_OUTER) { leftChildBlock.setPreservedRow(); rightChildBlock.setNullSuppllying(); } else if (node.getJoinType() == JoinType.RIGHT_OUTER) { leftChildBlock.setNullSuppllying(); rightChildBlock.setPreservedRow(); } else if (node.getJoinType() == JoinType.FULL_OUTER) { leftChildBlock.setPreservedRow(); leftChildBlock.setNullSuppllying(); rightChildBlock.setPreservedRow(); rightChildBlock.setNullSuppllying(); } ExecutionBlock newExecBlock = buildJoinPlan(context, node, leftChildBlock, rightChildBlock); context.execBlockMap.put(node.getPID(), newExecBlock); return node; }
@Override public LogicalNode visitJoin(GlobalPlanContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode node, Stack<LogicalNode> stack) throws TajoException { LogicalNode leftChild = visit(context, plan, block, node.getLeftChild(), stack); ExecutionBlock leftChildBlock = context.execBlockMap.get(leftChild.getPID()); LogicalNode rightChild = visit(context, plan, block, node.getRightChild(), stack); ExecutionBlock rightChildBlock = context.execBlockMap.get(rightChild.getPID()); if (node.getJoinType() == JoinType.LEFT_OUTER) { leftChildBlock.setPreservedRow(); rightChildBlock.setNullSuppllying(); } else if (node.getJoinType() == JoinType.RIGHT_OUTER) { leftChildBlock.setNullSuppllying(); rightChildBlock.setPreservedRow(); } else if (node.getJoinType() == JoinType.FULL_OUTER) { leftChildBlock.setPreservedRow(); leftChildBlock.setNullSuppllying(); rightChildBlock.setPreservedRow(); rightChildBlock.setNullSuppllying(); } ExecutionBlock newExecBlock = buildJoinPlan(context, node, leftChildBlock, rightChildBlock); context.execBlockMap.put(node.getPID(), newExecBlock); return node; }
private LogicalNode createCartesianProduct(PlanContext context, LogicalNode left, LogicalNode right) throws TajoException { LogicalPlan plan = context.plan; QueryBlock block = context.queryBlock; Schema merged = SchemaUtil.merge(left.getOutSchema(), right.getOutSchema()); JoinNode join = plan.createNode(JoinNode.class); join.init(JoinType.CROSS, left, right); join.setInSchema(merged); block.addJoinType(join.getJoinType()); EvalNode evalNode; List<String> newlyEvaluatedExprs = TUtil.newList(); for (Iterator<NamedExpr> it = block.namedExprsMgr.getIteratorForUnevaluatedExprs(); it.hasNext();) { NamedExpr namedExpr = it.next(); try { evalNode = exprAnnotator.createEvalNode(context, namedExpr.getExpr(), NameResolvingMode.LEGACY); if (EvalTreeUtil.findDistinctAggFunction(evalNode).size() == 0 && EvalTreeUtil.findWindowFunction(evalNode).size() == 0) { block.namedExprsMgr.markAsEvaluated(namedExpr.getAlias(), evalNode); newlyEvaluatedExprs.add(namedExpr.getAlias()); } } catch (UndefinedColumnException ve) {} } List<Target> targets = TUtil.newList(PlannerUtil.schemaToTargets(merged)); for (String newAddedExpr : newlyEvaluatedExprs) { targets.add(block.namedExprsMgr.getTarget(newAddedExpr, true)); } join.setTargets(targets.toArray(new Target[targets.size()])); return join; }
public PhysicalExec createJoinPlan(TaskAttemptContext context, JoinNode joinNode, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { switch (joinNode.getJoinType()) { case CROSS: return createCrossJoinPlan(context, joinNode, leftExec, rightExec); case INNER: return createInnerJoinPlan(context, joinNode, leftExec, rightExec); case LEFT_OUTER: return createLeftOuterJoinPlan(context, joinNode, leftExec, rightExec); case RIGHT_OUTER: return createRightOuterJoinPlan(context, joinNode, leftExec, rightExec); case FULL_OUTER: return createFullOuterJoinPlan(context, joinNode, leftExec, rightExec); case LEFT_SEMI: return createLeftSemiJoinPlan(context, joinNode, leftExec, rightExec); case RIGHT_SEMI: return createRightSemiJoinPlan(context, joinNode, leftExec, rightExec); case LEFT_ANTI: return createLeftAntiJoinPlan(context, joinNode, leftExec, rightExec); case RIGHT_ANTI: return createRightAntiJoinPlan(context, joinNode, leftExec, rightExec); default: throw new PhysicalPlanningException("Cannot support join type: " + joinNode.getJoinType().name()); } }
public PhysicalExec createJoinPlan(TaskAttemptContext context, JoinNode joinNode, PhysicalExec leftExec, PhysicalExec rightExec) throws IOException { switch (joinNode.getJoinType()) { case CROSS: return createCrossJoinPlan(context, joinNode, leftExec, rightExec); case INNER: return createInnerJoinPlan(context, joinNode, leftExec, rightExec); case LEFT_OUTER: return createLeftOuterJoinPlan(context, joinNode, leftExec, rightExec); case RIGHT_OUTER: return createRightOuterJoinPlan(context, joinNode, leftExec, rightExec); case FULL_OUTER: return createFullOuterJoinPlan(context, joinNode, leftExec, rightExec); case LEFT_SEMI: return createLeftSemiJoinPlan(context, joinNode, leftExec, rightExec); case RIGHT_SEMI: return createRightSemiJoinPlan(context, joinNode, leftExec, rightExec); case LEFT_ANTI: return createLeftAntiJoinPlan(context, joinNode, leftExec, rightExec); case RIGHT_ANTI: return createRightAntiJoinPlan(context, joinNode, leftExec, rightExec); default: throw new PhysicalPlanningException("Cannot support join type: " + joinNode.getJoinType().name()); } }
private DataChannel createDataChannelFromJoin(ExecutionBlock leftBlock, ExecutionBlock rightBlock, ExecutionBlock parent, JoinNode join, boolean leftTable) { ExecutionBlock childBlock = leftTable ? leftBlock : rightBlock; DataChannel channel = new DataChannel(childBlock, parent, HASH_SHUFFLE, 32); channel.setDataFormat(dataFormat); if (join.getJoinType() != JoinType.CROSS) { // ShuffleKeys need to not have thea-join condition because Tajo supports only equi-join. Column [][] joinColumns = PlannerUtil.joinJoinKeyForEachTable(join.getJoinQual(), leftBlock.getPlan().getOutSchema(), rightBlock.getPlan().getOutSchema(), false); if (leftTable) { channel.setShuffleKeys(joinColumns[0]); } else { channel.setShuffleKeys(joinColumns[1]); } } return channel; }
private DataChannel createDataChannelFromJoin(ExecutionBlock leftBlock, ExecutionBlock rightBlock, ExecutionBlock parent, JoinNode join, boolean leftTable) { ExecutionBlock childBlock = leftTable ? leftBlock : rightBlock; DataChannel channel = new DataChannel(childBlock, parent, HASH_SHUFFLE, 32); channel.setDataFormat(dataFormat); if (join.getJoinType() != JoinType.CROSS) { // ShuffleKeys need to not have thea-join condition because Tajo supports only equi-join. Column [][] joinColumns = PlannerUtil.joinJoinKeyForEachTable(join.getJoinQual(), leftBlock.getPlan().getOutSchema(), rightBlock.getPlan().getOutSchema(), false); if (leftTable) { channel.setShuffleKeys(joinColumns[0]); } else { channel.setShuffleKeys(joinColumns[1]); } } return channel; }
@Override public LogicalNode visitJoin(SerializeContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode join, Stack<LogicalNode> stack) throws TajoException { super.visitJoin(context, plan, block, join, stack); int [] childIds = registerGetChildIds(context, join); // building itself PlanProto.JoinNode.Builder joinBuilder = PlanProto.JoinNode.newBuilder(); joinBuilder.setJoinType(convertJoinType(join.getJoinType())); joinBuilder.setLeftChildSeq(childIds[0]); joinBuilder.setRightChilSeq(childIds[1]); if (join.hasJoinQual()) { joinBuilder.setJoinQual(EvalNodeSerializer.serialize(join.getJoinQual())); } if (join.hasTargets()) { joinBuilder.setExistsTargets(true); joinBuilder.addAllTargets(ProtoUtil.<PlanProto.Target>toProtoObjects(join.getTargets().toArray(new ProtoObject[join.getTargets().size()]))); } else { joinBuilder.setExistsTargets(false); } PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, join); nodeBuilder.setJoin(joinBuilder); context.treeBuilder.addNodes(nodeBuilder); return join; }
@Override public LogicalNode visitJoin(SerializeContext context, LogicalPlan plan, LogicalPlan.QueryBlock block, JoinNode join, Stack<LogicalNode> stack) throws TajoException { super.visitJoin(context, plan, block, join, stack); int [] childIds = registerGetChildIds(context, join); // building itself PlanProto.JoinNode.Builder joinBuilder = PlanProto.JoinNode.newBuilder(); joinBuilder.setJoinType(convertJoinType(join.getJoinType())); joinBuilder.setLeftChildSeq(childIds[0]); joinBuilder.setRightChilSeq(childIds[1]); if (join.hasJoinQual()) { joinBuilder.setJoinQual(EvalNodeSerializer.serialize(join.getJoinQual())); } if (join.hasTargets()) { joinBuilder.setExistsTargets(true); joinBuilder.addAllTargets(ProtoUtil.<PlanProto.Target>toProtoObjects(join.getTargets())); } else { joinBuilder.setExistsTargets(false); } PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, join); nodeBuilder.setJoin(joinBuilder); context.treeBuilder.addNodes(nodeBuilder); return join; }
@Test public final void testNaturalJoinPlan() throws TajoException { QueryContext qc = createQueryContext(); // two relations Expr context = sqlAnalyzer.parse(JOINS[0]); LogicalNode plan = planner.createPlan(qc, context).getRootBlock().getRoot(); assertSchema(expectedJoinSchema, plan.getOutSchema()); assertEquals(NodeType.ROOT, plan.getType()); LogicalRootNode root = (LogicalRootNode) plan; assertEquals(NodeType.PROJECTION, root.getChild().getType()); ProjectionNode proj = root.getChild(); assertEquals(NodeType.JOIN, proj.getChild().getType()); JoinNode join = proj.getChild(); assertEquals(JoinType.INNER, join.getJoinType()); assertEquals(NodeType.SCAN, join.getRightChild().getType()); assertTrue(join.hasJoinQual()); ScanNode scan = join.getRightChild(); assertEquals("default.score", scan.getTableName()); assertEquals(NodeType.JOIN, join.getLeftChild().getType()); join = join.getLeftChild(); assertEquals(JoinType.INNER, join.getJoinType()); assertEquals(NodeType.SCAN, join.getLeftChild().getType()); ScanNode outer = join.getLeftChild(); assertEquals("default.employee", outer.getTableName()); assertEquals(NodeType.SCAN, join.getRightChild().getType()); ScanNode inner = join.getRightChild(); assertEquals("default.dept", inner.getTableName()); }