/** * Translates a {@link org.apache.flink.optimizer.plan.OptimizedPlan} into a * {@link org.apache.flink.runtime.jobgraph.JobGraph}. * * @param program Optimized plan that is translated into a JobGraph. * @return JobGraph generated from the plan. */ public JobGraph compileJobGraph(OptimizedPlan program) { return compileJobGraph(program, null); }
public static String getOperatorStrategyString(DriverStrategy strategy) { return getOperatorStrategyString(strategy, "input 1", "input 2"); }
@Override public JobExecutionResult execute(String jobName) throws Exception { OptimizedPlan op = compileProgram(jobName); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jobGraph = jgg.compileJobGraph(op); for (Path jarFile: jarFiles) { jobGraph.addJar(jarFile); } jobGraph.setClasspaths(new ArrayList<>(classPaths)); this.lastJobExecutionResult = jobExecutor.executeJobBlocking(jobGraph); return this.lastJobExecutionResult; }
@Test public void testArtifactCompression() throws IOException { Path plainFile1 = tmp.newFile("plainFile1").toPath(); Path plainFile2 = tmp.newFile("plainFile2").toPath(); Path directory1 = tmp.newFolder("directory1").toPath(); Files.createDirectory(directory1.resolve("containedFile1")); Path directory2 = tmp.newFolder("directory2").toPath(); Files.createDirectory(directory2.resolve("containedFile2")); JobGraph jb = new JobGraph(); final String executableFileName = "executableFile"; final String nonExecutableFileName = "nonExecutableFile"; final String executableDirName = "executableDir"; final String nonExecutableDirName = "nonExecutableDIr"; Collection<Tuple2<String, DistributedCache.DistributedCacheEntry>> originalArtifacts = Arrays.asList( Tuple2.of(executableFileName, new DistributedCache.DistributedCacheEntry(plainFile1.toString(), true)), Tuple2.of(nonExecutableFileName, new DistributedCache.DistributedCacheEntry(plainFile2.toString(), false)), Tuple2.of(executableDirName, new DistributedCache.DistributedCacheEntry(directory1.toString(), true)), Tuple2.of(nonExecutableDirName, new DistributedCache.DistributedCacheEntry(directory2.toString(), false)) ); JobGraphGenerator.addUserArtifactEntries(originalArtifacts, jb); Map<String, DistributedCache.DistributedCacheEntry> submittedArtifacts = jb.getUserArtifacts(); DistributedCache.DistributedCacheEntry executableFileEntry = submittedArtifacts.get(executableFileName); assertState(executableFileEntry, true, false); DistributedCache.DistributedCacheEntry nonExecutableFileEntry = submittedArtifacts.get(nonExecutableFileName); assertState(nonExecutableFileEntry, false, false); DistributedCache.DistributedCacheEntry executableDirEntry = submittedArtifacts.get(executableDirName); assertState(executableDirEntry, true, true); DistributedCache.DistributedCacheEntry nonExecutableDirEntry = submittedArtifacts.get(nonExecutableDirName); assertState(nonExecutableDirEntry, false, true); }
private JobVertex createDataSinkVertex(SinkPlanNode node) throws CompilerException { final OutputFormatVertex vertex = new OutputFormatVertex(node.getNodeName()); final TaskConfig config = new TaskConfig(vertex.getConfiguration()); vertex.setResources(node.getMinResources(), node.getPreferredResources()); vertex.setInvokableClass(DataSinkTask.class); vertex.setFormatDescription(getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper())); // set user code config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper()); config.setStubParameters(node.getProgramOperator().getParameters()); return vertex; }
assignLocalStrategyResources(channel, config, inputNum);
private JobVertex createDualInputVertex(DualInputPlanNode node) throws CompilerException { final String taskName = node.getNodeName(); final DriverStrategy ds = node.getDriverStrategy(); final JobVertex vertex = new JobVertex(taskName); final TaskConfig config = new TaskConfig(vertex.getConfiguration()); vertex.setResources(node.getMinResources(), node.getPreferredResources()); vertex.setInvokableClass( (this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediateTask.class : BatchTask.class); // set user code config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper()); config.setStubParameters(node.getProgramOperator().getParameters()); // set the driver strategy config.setDriver(ds.getDriverClass()); config.setDriverStrategy(ds); if (node.getComparator1() != null) { config.setDriverComparator(node.getComparator1(), 0); } if (node.getComparator2() != null) { config.setDriverComparator(node.getComparator2(), 1); } if (node.getPairComparator() != null) { config.setDriverPairComparator(node.getPairComparator()); } // assign memory, file-handles, etc. assignDriverResources(node, config); return vertex; }
public static JobGraph getJobGraph(Configuration flinkConfig, FlinkPlan optPlan, List<URL> jarFiles, List<URL> classpaths, SavepointRestoreSettings savepointSettings) { JobGraph job; if (optPlan instanceof StreamingPlan) { job = ((StreamingPlan) optPlan).getJobGraph(); job.setSavepointRestoreSettings(savepointSettings); } else { JobGraphGenerator gen = new JobGraphGenerator(flinkConfig); job = gen.compileJobGraph((OptimizedPlan) optPlan); } for (URL jar : jarFiles) { try { job.addJar(new Path(jar.toURI())); } catch (URISyntaxException e) { throw new RuntimeException("URL is invalid. This should not happen.", e); } } job.setClasspaths(classpaths); return job; }
private InputFormatVertex createDataSourceVertex(SourcePlanNode node) throws CompilerException { final InputFormatVertex vertex = new InputFormatVertex(node.getNodeName()); final TaskConfig config = new TaskConfig(vertex.getConfiguration()); vertex.setResources(node.getMinResources(), node.getPreferredResources()); vertex.setInvokableClass(DataSourceTask.class); vertex.setFormatDescription(getDescriptionForUserCode(node.getProgramOperator().getUserCodeWrapper())); // set user code config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper()); config.setStubParameters(node.getProgramOperator().getParameters()); config.setOutputSerializer(node.getSerializer()); return vertex; }
private JobGraph getJobGraph(final Plan plan) { final Optimizer pc = new Optimizer(new DataStatistics(), getConfiguration()); final OptimizedPlan op = pc.compile(plan); final JobGraphGenerator jgg = new JobGraphGenerator(); return jgg.compileJobGraph(op); } }
/** * Helpers to generate the JobGraph. */ private static JobGraph getJobGraph(Plan plan) { Optimizer pc = new Optimizer(new DataStatistics(), new Configuration()); JobGraphGenerator jgg = new JobGraphGenerator(); OptimizedPlan op = pc.compile(plan); return jgg.compileJobGraph(op); } }
@Test public void testDisjointFlows() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // generate two different flows env.generateSequence(1, 10) .output(new DiscardingOutputFormat<Long>()); env.generateSequence(1, 10) .output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } } }
@Test public void testUnionReplacement() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> input1 = env.fromElements("test1"); DataSet<String> input2 = env.fromElements("test2"); DataSet<String> union = input1.union(input2); union.output(new DiscardingOutputFormat<String>()); union.output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = compileNoStats(plan); JobGraphGenerator jobGen = new JobGraphGenerator(); jobGen.compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testIdentityIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100); iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
/** * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant */ @Test public void testLeftSideCountercheck() { try { Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND, innerJoin.getDriverStrategy()); assertEquals(TempMode.CACHED, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
/** * This test makes sure that only a HYBRIDHASH on the static path is transformed to the cached variant */ @Test public void testRightSideCountercheck() { try { Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST, innerJoin.getDriverStrategy()); assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.CACHED, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
/** * This tests whether a HYBRIDHASH_BUILD_FIRST is correctly transformed to a HYBRIDHASH_BUILD_FIRST_CACHED * when inside of an iteration an on the static path */ @Test public void testLeftSide() { try { Plan plan = getTestPlanLeftStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_FIRST); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_FIRST_CACHED, innerJoin.getDriverStrategy()); assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
/** * This tests whether a HYBRIDHASH_BUILD_SECOND is correctly transformed to a HYBRIDHASH_BUILD_SECOND_CACHED * when inside of an iteration an on the static path */ @Test public void testRightSide() { try { Plan plan = getTestPlanRightStatic(Optimizer.HINT_LOCAL_STRATEGY_HASH_BUILD_SECOND); OptimizedPlan oPlan = compileNoStats(plan); OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan); DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner"); // verify correct join strategy assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy()); assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode()); assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode()); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test errored: " + e.getMessage()); } }
@Override public JobExecutionResult execute(String jobName) throws Exception { Plan plan = createProgramPlan(jobName); Optimizer pc = new Optimizer(new Configuration()); OptimizedPlan op = pc.compile(plan); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jobGraph = jgg.compileJobGraph(op); String jsonPlan = JsonPlanGenerator.generatePlan(jobGraph); // first check that the JSON is valid JsonParser parser = new JsonFactory().createJsonParser(jsonPlan); while (parser.nextToken() != null) {} validator.validateJson(jsonPlan); throw new AbortError(); }
@Test public void testCostComputationWithMultipleDataSinks() { final int SINKS = 5; try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(DEFAULT_PARALLELISM); DataSet<Long> source = env.generateSequence(1, 10000); DataSet<Long> mappedA = source.map(new IdentityMapper<Long>()); DataSet<Long> mappedC = source.map(new IdentityMapper<Long>()); for (int sink = 0; sink < SINKS; sink++) { mappedA.output(new DiscardingOutputFormat<Long>()); mappedC.output(new DiscardingOutputFormat<Long>()); } Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks"); OptimizedPlan oPlan = compileNoStats(plan); new JobGraphGenerator().compileJobGraph(oPlan); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }