Refine search
jobName = getDefaultName(); Plan plan = translator.translateToPlan(this.sinks, jobName); if (getParallelism() > 0) { plan.setDefaultParallelism(getParallelism()); plan.setExecutionConfig(getConfig()); plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {
@Test public void testCorrectTranslation() { final String jobName = "Test JobName"; final int numIterations = 13; final int defaultParallelism = 133; final int iterationParallelism = 77; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // ------------ construct the test program ------------------ { env.setParallelism(defaultParallelism); @SuppressWarnings("unchecked") DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc")); IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations); bulkIteration.setParallelism(iterationParallelism); // test that multiple iteration consumers are supported DataSet<Tuple3<Double, Long, String>> identity = bulkIteration .map(new IdentityMapper<Tuple3<Double, Long, String>>()); DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity); result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>()); result.writeAsText("/dev/null"); } Plan p = env.createProgramPlan(jobName); // ------------- validate the plan ---------------- BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput(); assertEquals(jobName, p.getJobName()); assertEquals(defaultParallelism, p.getDefaultParallelism()); assertEquals(iterationParallelism, iteration.getParallelism()); }
graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig()); graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout()); vertex.setInputDependencyConstraint(program.getOriginalPlan().getExecutionConfig().getDefaultInputDependencyConstraint()); graph.addVertex(vertex); program.getOriginalPlan().getCachedFiles().stream() .map(entry -> Tuple2.of(entry.getKey(), entry.getValue())) .collect(Collectors.toList());
public static OptimizedPlan getOptimizedPlan(Optimizer compiler, Plan p, int parallelism) throws CompilerException { Logger log = LoggerFactory.getLogger(ClusterClient.class); if (parallelism > 0 && p.getDefaultParallelism() <= 0) { log.debug("Changing plan default parallelism from {} to {}", p.getDefaultParallelism(), parallelism); p.setDefaultParallelism(parallelism); } log.debug("Set parallelism {}, plan default parallelism {}", parallelism, p.getDefaultParallelism()); return compiler.compile(p); }
public JobExecutionResult execute(Plan program) throws Exception { long startTime = System.currentTimeMillis(); initCache(program.getCachedFiles()); Collection<? extends GenericDataSinkBase<?>> sinks = program.getDataSinks(); for (Operator<?> sink : sinks) { execute(sink); } long endTime = System.currentTimeMillis(); Map<String, OptionalFailure<Object>> accumulatorResults = AccumulatorHelper.toResultMap(accumulators); return new JobExecutionResult(null, endTime - startTime, accumulatorResults); }
@Test public void testUnaryFunctionReadFieldsAnnotation() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L)); input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput(); SingleInputSemanticProperties semantics = mapper.getSemanticProperties(); FieldSet read = semantics.getReadFields(0); assertNotNull(read); assertEquals(2, read.size()); assertTrue(read.contains(0)); assertTrue(read.contains(2)); }
@Test public void testProjectionSemProps1() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); tupleDs.project(1, 3, 2, 0, 3).output(new DiscardingOutputFormat<Tuple>()); Plan plan = env.createProgramPlan(); GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next(); PlanProjectOperator<?, ?> projectOperator = ((PlanProjectOperator<?, ?>) sink.getInput()); SingleInputSemanticProperties props = projectOperator.getSemanticProperties(); assertEquals(1, props.getForwardingTargetFields(0, 0).size()); assertEquals(1, props.getForwardingTargetFields(0, 1).size()); assertEquals(1, props.getForwardingTargetFields(0, 2).size()); assertEquals(2, props.getForwardingTargetFields(0, 3).size()); assertTrue(props.getForwardingTargetFields(0, 1).contains(0)); assertTrue(props.getForwardingTargetFields(0, 3).contains(1)); assertTrue(props.getForwardingTargetFields(0, 2).contains(2)); assertTrue(props.getForwardingTargetFields(0, 0).contains(3)); assertTrue(props.getForwardingTargetFields(0, 3).contains(4)); }
@Test public void testJoinWith() { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); List<Tuple1<String>> strLi = new ArrayList<Tuple1<String>>(); strLi.add(new Tuple1<String>("a")); strLi.add(new Tuple1<String>("b")); DataSet<Tuple1<String>> strs = env.fromCollection(strLi); DataSet<Tuple1<String>> strs1 = env.fromCollection(strLi); strs.join(strs1).where(0).equalTo(0).with(new FlatJoinFunction<Tuple1<String>, Tuple1<String>, String>() { @Override public void join(Tuple1<String> first, Tuple1<String> second, Collector<String> out) throws Exception { // } }) .output(new DiscardingOutputFormat<String>()); Plan plan = env.createProgramPlan(); plan.accept(new Visitor<Operator<?>>() { @Override public boolean preVisit(Operator<?> visitable) { if (visitable instanceof InnerJoinOperatorBase) { Assert.assertEquals("Join at testJoinWith(NamesTest.java:93)", visitable.getName()); } return true; } @Override public void postVisit(Operator<?> visitable) {} }); }
private DualInputPlanNode createPlanAndGetJoinNode(JoinHint hint) { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> i1 = env.generateSequence(1, 1000); DataSet<Long> i2 = env.generateSequence(1, 1000); plan.accept(new Visitor<Operator<?>>() { @Override public boolean preVisit(Operator<?> visitable) {
LOG.debug("Beginning compilation of program '" + program.getJobName() + '\''); final ExecutionMode defaultDataExchangeMode = program.getExecutionConfig().getExecutionMode(); final int defaultParallelism = program.getDefaultParallelism() > 0 ? program.getDefaultParallelism() : this.defaultParallelism; program.accept(graphCreator); OptimizedPlan plan = new PlanFinalizer().createFinalPlan(bestPlanSinks, program.getJobName(), program);
try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77))); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
@Test public void translateDistinctPlain() { try { final int parallelism = 8; ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism); DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env); initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>()); Plan p = env.createProgramPlan(); GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next(); // currently distinct is translated to a Reduce ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput(); // check types assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType()); assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType()); // check keys assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0)); // parallelism was not configured on the operator assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1); assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); fail("Test caused an error: " + e.getMessage()); } }
public int getMaximumParallelism() { MaxDopVisitor visitor = new MaxDopVisitor(); accept(visitor); return Math.max(visitor.maxDop, this.defaultParallelism); }
/** * Creates a JSON representation of the given dataflow's execution plan. * * @param plan The dataflow plan. * @return The dataflow's execution plan, as a JSON string. * @throws Exception Thrown, if the optimization process that creates the execution plan failed. */ @Override public String getOptimizerPlanAsJSON(Plan plan) throws Exception { final int parallelism = plan.getDefaultParallelism() == ExecutionConfig.PARALLELISM_DEFAULT ? 1 : plan.getDefaultParallelism(); Optimizer pc = new Optimizer(new DataStatistics(), this.baseConfiguration); pc.setDefaultParallelism(parallelism); OptimizedPlan op = pc.compile(plan); return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(op); }
int maxParallelism = plan.getMaximumParallelism(); if (maxParallelism > 0) { this.taskManagerNumSlots = maxParallelism; plan.setDefaultParallelism(slotsPerTaskManager * numTaskManagers); JobGraph jobGraph = jgg.compileJobGraph(op, plan.getJobId());
private static List<DataSinkNode> convertPlan(Plan p) { GraphCreatingVisitor dagCreator = new GraphCreatingVisitor(17, p.getExecutionConfig().getExecutionMode()); // create the DAG p.accept(dagCreator); List<DataSinkNode> sinks = dagCreator.getSinks(); // build a single root and run the branch tracking logic OptimizerNode rootNode; if (sinks.size() == 1) { rootNode = sinks.get(0); } else { Iterator<DataSinkNode> iter = sinks.iterator(); rootNode = iter.next(); while (iter.hasNext()) { rootNode = new SinkJoiner(rootNode, iter.next()); } } rootNode.accept(new IdAndEstimatesVisitor(null)); rootNode.accept(new BranchesVisitor()); return sinks; } }
@Override public void postPass(OptimizedPlan plan) { executionConfig = plan.getOriginalPlan().getExecutionConfig(); for (SinkPlanNode sink : plan.getDataSinks()) { traverse(sink); } }
int maxParallelism = plan.getMaximumParallelism(); if (maxParallelism > 0) { this.taskManagerNumSlots = maxParallelism; JobGraph jobGraph = jgg.compileJobGraph(op, plan.getJobId());
/** * Returns the plan without the required jars when the files are already provided by the cluster. * * @return The plan without attached jar files. * @throws ProgramInvocationException */ public JobWithJars getPlanWithoutJars() throws ProgramInvocationException { if (isUsingProgramEntryPoint()) { return new JobWithJars(getPlan(), Collections.<URL>emptyList(), classpaths, userCodeClassLoader); } else { throw new ProgramInvocationException("Cannot create a " + JobWithJars.class.getSimpleName() + " for a program that is using the interactive mode.", getPlan().getJobId()); } }
private OptimizerPostPass getPostPassFromPlan(Plan program) { final String className = program.getPostPassClassName(); if (className == null) { throw new CompilerException("Optimizer Post Pass class description is null"); } try { Class<? extends OptimizerPostPass> clazz = Class.forName(className).asSubclass(OptimizerPostPass.class); try { return InstantiationUtil.instantiate(clazz, OptimizerPostPass.class); } catch (RuntimeException rtex) { // unwrap the source exception if (rtex.getCause() != null) { throw new CompilerException("Cannot instantiate optimizer post pass: " + rtex.getMessage(), rtex.getCause()); } else { throw rtex; } } } catch (ClassNotFoundException cnfex) { throw new CompilerException("Cannot load Optimizer post-pass class '" + className + "'.", cnfex); } catch (ClassCastException ccex) { throw new CompilerException("Class '" + className + "' is not an optimizer post-pass.", ccex); } } }