private boolean checkRepartitionShipStrategies(DualInputPlanNode join, SingleInputPlanNode reducer, SingleInputPlanNode combiner) { if (ShipStrategyType.PARTITION_HASH == join.getInput1().getShipStrategy() && ShipStrategyType.PARTITION_HASH == join.getInput2().getShipStrategy() && ShipStrategyType.FORWARD == reducer.getInput().getShipStrategy()) { // check combiner Assert.assertNull("Plan should not have a combiner", combiner); return true; } else { return false; } }
private boolean checkBroadcastShipStrategies(DualInputPlanNode join, SingleInputPlanNode reducer, SingleInputPlanNode combiner) { if (ShipStrategyType.BROADCAST == join.getInput1().getShipStrategy() && ShipStrategyType.FORWARD == join.getInput2().getShipStrategy() && ShipStrategyType.PARTITION_HASH == reducer.getInput().getShipStrategy()) { // check combiner Assert.assertNotNull("Plan should have a combiner", combiner); Assert.assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); return true; } else { return false; } }
public DualInputPlanNode(OptimizerNode template, String nodeName, Channel input1, Channel input2, DriverStrategy diverStrategy, FieldList driverKeyFields1, FieldList driverKeyFields2, boolean[] driverSortOrders) { super(template, nodeName, diverStrategy); this.input1 = input1; this.input2 = input2; this.keys1 = driverKeyFields1; this.keys2 = driverKeyFields2; this.sortOrders = driverSortOrders; if (this.input1.getShipStrategy() == ShipStrategyType.BROADCAST) { this.input1.setReplicationFactor(getParallelism()); } if (this.input2.getShipStrategy() == ShipStrategyType.BROADCAST) { this.input2.setReplicationFactor(getParallelism()); } mergeBranchPlanMaps(input1.getSource(), input2.getSource()); }
public void collect(Channel in, List<Channel> inputs) { if (in.getSource() instanceof NAryUnionPlanNode) { // sanity check if (in.getShipStrategy() != ShipStrategyType.FORWARD) { throw new CompilerException("Bug: Plan generation for Unions picked a ship strategy between binary plan operators."); } if (!(in.getLocalStrategy() == null || in.getLocalStrategy() == LocalStrategy.NONE)) { throw new CompilerException("Bug: Plan generation for Unions picked a local strategy between binary plan operators."); } inputs.addAll(((NAryUnionPlanNode) in.getSource()).getListOfInputs()); } else { // is not a collapsed union node, so we take the channel directly inputs.add(in); } } }
@Override public boolean preVisit(PlanNode visitable) { if (visitable instanceof DualInputPlanNode) { DualInputPlanNode node = (DualInputPlanNode) visitable; Channel c1 = node.getInput1(); Channel c2 = node.getInput2(); Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.FORWARD, c1.getShipStrategy()); Assert.assertEquals("Incompatible shipping strategy chosen for match", ShipStrategyType.PARTITION_HASH, c2.getShipStrategy()); return false; } return true; }
@Override public boolean preVisit(PlanNode visitable) { if (visitable instanceof DualInputPlanNode && visitable.getProgramOperator() instanceof InnerJoinOperatorBase) { DualInputPlanNode node = ((DualInputPlanNode) visitable); final Channel inConn1 = node.getInput1(); final Channel inConn2 = node.getInput2(); Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn1.getShipStrategy() == ShipStrategyType.FORWARD); Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn2.getShipStrategy() == ShipStrategyType.FORWARD); return false; } return true; }
@Test public void testBroadcastHashSecondTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.BROADCAST_HASH_SECOND); assertEquals(ShipStrategyType.FORWARD, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.BROADCAST, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
@Test public void testBroadcastHashFirstTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.BROADCAST_HASH_FIRST); assertEquals(ShipStrategyType.BROADCAST, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
@Test public void testPartitionHashFirstTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_HASH_FIRST); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
@Test public void testPartitionHashSecondTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_HASH_SECOND); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
@Test public void testPartitionSortMergeTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.REPARTITION_SORT_MERGE); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy()); assertEquals(DriverStrategy.INNER_MERGE, node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
@Override public boolean preVisit(PlanNode visitable) { if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) { for (Channel inConn : visitable.getInputs()) { Assert.assertTrue("Reduce should just forward the input if it is already partitioned", inConn.getShipStrategy() == ShipStrategyType.FORWARD); } //just check latest ReduceNode return false; } return true; }
@Test public void testOptimizerChoosesTest() { try { DualInputPlanNode node = createPlanAndGetJoinNode(JoinHint.OPTIMIZER_CHOOSES); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput1().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, node.getInput2().getShipStrategy()); assertTrue(DriverStrategy.HYBRIDHASH_BUILD_FIRST == node.getDriverStrategy() || DriverStrategy.HYBRIDHASH_BUILD_SECOND == node.getDriverStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getClass().getSimpleName() + ": " + e.getMessage()); } }
@Override public boolean preVisit(PlanNode visitable) { if (visitable instanceof WorksetIterationPlanNode) { PlanNode deltaNode = ((WorksetIterationPlanNode) visitable).getSolutionSetDeltaPlanNode(); //get the CoGroup DualInputPlanNode dpn = (DualInputPlanNode) deltaNode.getInputs().iterator().next().getSource(); Channel in1 = dpn.getInput1(); Channel in2 = dpn.getInput2(); Assert.assertTrue(in1.getLocalProperties().getOrdering() == null); Assert.assertTrue(in2.getLocalProperties().getOrdering() != null); Assert.assertTrue(in2.getLocalProperties().getOrdering().getInvolvedIndexes().contains(0)); Assert.assertTrue(in1.getShipStrategy() == ShipStrategyType.FORWARD); Assert.assertTrue(in2.getShipStrategy() == ShipStrategyType.PARTITION_HASH); return false; } return true; }
@Test public void testCustomPartitioningTupleReduce() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Pojo2> data = env.fromElements(new Pojo2()) .rebalance().setParallelism(4); data.groupBy("a").withPartitioner(new TestPartitionerInt()) .reduce(new SelectOneReducer<Pojo2>()) .output(new DiscardingOutputFormat<Pojo2>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private void checkStandardStrategies(SingleInputPlanNode map, DualInputPlanNode join, SingleInputPlanNode combiner, SingleInputPlanNode reducer, SinkPlanNode sink) { // check ship strategies that are always fix Assert.assertEquals(ShipStrategyType.FORWARD, map.getInput().getShipStrategy()); Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // check the driver strategies that are always fix Assert.assertEquals(DriverStrategy.FLAT_MAP, map.getDriverStrategy()); Assert.assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reducer.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy()); if (combiner != null) { Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy()); Assert.assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy()); } }
@Test public void JoinWithSameDistributionTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); TestDistribution dist1 = new TestDistribution(3); TestDistribution dist2 = new TestDistribution(3); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = DataSetUtils.partitionByRange(set1, dist1, 0) .join(DataSetUtils.partitionByRange(set2, dist2, 0)) .where(0).equalTo(0).with(new JoinFunc()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); Channel input1 = join.getInput1(); Channel input2 = join.getInput2(); assertEquals(ShipStrategyType.FORWARD, input1.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, input2.getShipStrategy()); }
@Test public void JoinWithDifferentDistributionTest() throws Exception{ ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); TestDistribution dist1 = new TestDistribution(3); TestDistribution dist2 = new TestDistribution(4); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = DataSetUtils.partitionByRange(set1, dist1, 0) .join(DataSetUtils.partitionByRange(set2, dist2, 0)) .where(0).equalTo(0).with(new JoinFunc()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource(); Channel input1 = join.getInput1(); Channel input2 = join.getInput2(); assertEquals(ShipStrategyType.PARTITION_HASH, input1.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, input2.getShipStrategy()); }
@Test public void CoGroupWithDifferentDistributionTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); TestDistribution testDistribution1 = new TestDistribution(3); TestDistribution testDistribution2 = new TestDistribution(2); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = DataSetUtils.partitionByRange(set1, testDistribution1, 0) .coGroup(DataSetUtils.partitionByRange(set2, testDistribution2, 0)) .where(0).equalTo(0).with(new CoGroupFunc()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); Channel input1 = coGroup.getInput1(); Channel input2 = coGroup.getInput2(); assertEquals(ShipStrategyType.PARTITION_HASH, input1.getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_HASH, input2.getShipStrategy()); }
@Test public void CoGroupWithSameDistributionTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class); TestDistribution testDistribution1 = new TestDistribution(3); TestDistribution testDistribution2 = new TestDistribution(3); DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = DataSetUtils.partitionByRange(set1, testDistribution1, 0) .coGroup(DataSetUtils.partitionByRange(set2, testDistribution2, 0)) .where(0).equalTo(0).with(new CoGroupFunc()); coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileWithStats(plan); SinkPlanNode sink = oPlan.getDataSinks().iterator().next(); DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource(); Channel input1 = coGroup.getInput1(); Channel input2 = coGroup.getInput2(); assertEquals(ShipStrategyType.FORWARD, input1.getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, input2.getShipStrategy()); }