@Test public void testIterationNotPushingWorkOut() throws Exception { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(8); DataSet<Tuple2<Long, Long>> input1 = env.readCsvFile("/some/file/path").types(Long.class).map(new DuplicateValue()); DataSet<Tuple2<Long, Long>> input2 = env.readCsvFile("/some/file/path").types(Long.class, Long.class); // Use input1 as partial solution. Partial solution is used in a single join operation --> it is cheaper // to do the hash partitioning between the partial solution node and the join node // instead of pushing the partitioning out doSimpleBulkIteration(input1, input2).output(new DiscardingOutputFormat<Tuple2<Long,Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); assertEquals(1, op.getDataSinks().size()); assertTrue(op.getDataSinks().iterator().next().getInput().getSource() instanceof BulkIterationPlanNode); BulkIterationPlanNode bipn = (BulkIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); // check that work has not been pushed out for (Channel c : bipn.getPartialSolutionPlanNode().getOutgoingChannels()) { assertEquals(ShipStrategyType.PARTITION_HASH, c.getShipStrategy()); } assertEquals(ShipStrategyType.FORWARD, bipn.getInput().getShipStrategy()); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private JobVertex createDataSinkVertex(SinkPlanNode node) throws CompilerException { final OutputFormatVertex vertex = new OutputFormatVertex(node.getNodeName()); final TaskConfig config = new TaskConfig(vertex.getConfiguration()); vertex.setResources(node.getMinResources(), node.getPreferredResources()); vertex.setInvokableClass(DataSinkTask.class); vertex.setFormatDescription(getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper())); // set user code config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper()); config.setStubParameters(node.getProgramOperator().getParameters()); return vertex; }
outputPlans.add(new SinkPlanNode(this, "DataSink ("+this.getOperator().getName()+")" ,c));
private void checkStandardStrategies(SingleInputPlanNode map, DualInputPlanNode join, SingleInputPlanNode combiner, SingleInputPlanNode reducer, SinkPlanNode sink) { // check ship strategies that are always fix Assert.assertEquals(ShipStrategyType.FORWARD, map.getInput().getShipStrategy()); Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); // check the driver strategies that are always fix Assert.assertEquals(DriverStrategy.FLAT_MAP, map.getDriverStrategy()); Assert.assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reducer.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy()); if (combiner != null) { Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy()); Assert.assertEquals(LocalStrategy.NONE, combiner.getInput().getLocalStrategy()); } }
assertEquals(reduceNode, sinkNode.getInput().getSource()); assertEquals(1, sinkNode.getParallelism());
/** * Applies the given visitor top down to all nodes, starting at the sinks. * * @param visitor * The visitor to apply to the nodes in this plan. * @see org.apache.flink.util.Visitable#accept(org.apache.flink.util.Visitor) */ @Override public void accept(Visitor<PlanNode> visitor) { for (SinkPlanNode node : this.dataSinks) { node.accept(visitor); } } }
Channel inchannel = sn.getInput(); sn.getSinkNode().getOperator().getName() + "'."); sn.getSinkNode().getOperator().getName() + "'.");
assertTrue(sinks.remove(sink.getProgramOperator()));
String path = ((TextOutputFormat<String>)n.getSinkNode().getOperator() .getFormatWrapper().getUserCodeObject()).getOutputFilePath().toString(); Assert.assertTrue("Invalid data sink.", allSinks.remove(path));
Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy()); Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy()); Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys()); Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy()); Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy());
node.accept(this);
SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode partitioner = (SingleInputPlanNode) mapper.getInput().getSource(); SingleInputPlanNode balancer = (SingleInputPlanNode) partitioner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(parallelism, sink.getParallelism());
@Test public void testWorksetIterationNotDependingOnSolutionSet() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1); DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>()); iteration.closeWith(iterEnd, iterEnd) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty()); JobGraphGenerator jgg = new JobGraphGenerator(); jgg.compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private JobVertex createDataSinkVertex(SinkPlanNode node) throws CompilerException { final OutputFormatVertex vertex = new OutputFormatVertex(node.getNodeName()); final TaskConfig config = new TaskConfig(vertex.getConfiguration()); vertex.setResources(node.getMinResources(), node.getPreferredResources()); vertex.setInvokableClass(DataSinkTask.class); vertex.setFormatDescription(getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper())); // set user code config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper()); config.setStubParameters(node.getProgramOperator().getParameters()); return vertex; }
Assert.assertEquals(DriverStrategy.NONE, sink.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, vertexSource.getDriverStrategy()); Assert.assertEquals(DriverStrategy.NONE, edgesSource.getDriverStrategy()); Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); Assert.assertEquals(ShipStrategyType.PARTITION_HASH, iter.getInitialSolutionSetInput().getShipStrategy()); Assert.assertEquals(set0, iter.getInitialSolutionSetInput().getShipStrategyKeys()); Assert.assertEquals(LocalStrategy.NONE, sink.getInput().getLocalStrategy()); Assert.assertEquals(LocalStrategy.NONE, iter.getInitialSolutionSetInput().getLocalStrategy()); Assert.assertEquals(LocalStrategy.NONE, iter.getInitialWorksetInput().getLocalStrategy());
/** * Applies the given visitor top down to all nodes, starting at the sinks. * * @param visitor * The visitor to apply to the nodes in this plan. * @see org.apache.flink.util.Visitable#accept(org.apache.flink.util.Visitor) */ @Override public void accept(Visitor<PlanNode> visitor) { for (SinkPlanNode node : this.dataSinks) { node.accept(visitor); } } }
SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode partitioner = (SingleInputPlanNode) mapper.getInput().getSource(); SingleInputPlanNode balancer = (SingleInputPlanNode) partitioner.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(parallelism, sink.getParallelism());
outputPlans.add(new SinkPlanNode(this, "DataSink ("+this.getOperator().getName()+")" ,c));
@Test public void testNoBreakerForIndependentVariable() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> source1 = env.fromElements("test"); DataSet<String> source2 = env.fromElements("test"); source1.map(new IdentityMapper<String>()).withBroadcastSet(source2, "some name") .output(new DiscardingOutputFormat<String>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource(); assertEquals(TempMode.NONE, mapper.getInput().getTempMode()); assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getInput().getDataExchangeMode()); assertEquals(DataExchangeMode.PIPELINED, mapper.getBroadcastInputs().get(0).getDataExchangeMode()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private JobVertex createDataSinkVertex(SinkPlanNode node) throws CompilerException { final OutputFormatVertex vertex = new OutputFormatVertex(node.getNodeName()); final TaskConfig config = new TaskConfig(vertex.getConfiguration()); vertex.setResources(node.getMinResources(), node.getPreferredResources()); vertex.setInvokableClass(DataSinkTask.class); vertex.setFormatDescription(getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper())); // set user code config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper()); config.setStubParameters(node.getProgramOperator().getParameters()); return vertex; }