org.apache.flink.api.java.ExecutionEnvironment.createProgramPlan java code examples

/**
 * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
 * and operations and how they interact, as an isolated unit that can be executed with a
 * {@link org.apache.flink.api.common.PlanExecutor}. Obtaining a plan and starting it with an
 * executor is an alternative way to run a program and is only possible if the program consists
 * only of distributed operations.
 * This automatically starts a new stage of execution.
 *
 * @return The program's plan.
 */
@Internal
public Plan createProgramPlan() {
  return createProgramPlan(null);
}

/**
 * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
 * and operations and how they interact, as an isolated unit that can be executed with a
 * {@link org.apache.flink.api.common.PlanExecutor}. Obtaining a plan and starting it with an
 * executor is an alternative way to run a program and is only possible if the program consists
 * only of distributed operations.
 * This automatically starts a new stage of execution.
 *
 * @param jobName The name attached to the plan (displayed in logs and monitoring).
 * @return The program's plan.
 */
@Internal
public Plan createProgramPlan(String jobName) {
  return createProgramPlan(jobName, true);
}

private static Plan getProgram() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.fromElements(1, 2, 3).output(new DiscardingOutputFormat<Integer>());
  return env.createProgramPlan();
}

  private Plan getWordCountPlan(File inFile, File outFile, int parallelism) {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(parallelism);
    env.readTextFile(inFile.getAbsolutePath())
      .flatMap(new Tokenizer())
      .groupBy(0)
      .sum(1)
      .writeAsCsv(outFile.getAbsolutePath());
    return env.createProgramPlan();
  }
}

@Test
public void testDefaultName() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<String> strs = env.fromCollection(Arrays.asList("a", "b"));
  // WARNING: The test will fail if this line is being moved down in the file (the line-number is hard-coded)
  strs.filter(new FilterFunction<String>() {
    private static final long serialVersionUID = 1L;
    @Override
    public boolean filter(String value) throws Exception {
      return value.equals("a");
    }
  }).output(new DiscardingOutputFormat<String>());
  Plan plan = env.createProgramPlan();
  testForName("Filter at testDefaultName(NamesTest.java:55)", plan);
}

@Test
public void testGivenName() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<String> strs = env.fromCollection(Arrays.asList("a", "b"));
  strs.filter(new FilterFunction<String>() {
    private static final long serialVersionUID = 1L;
    @Override
    public boolean filter(String value) throws Exception {
      return value.equals("a");
    }
  }).name("GivenName").output(new DiscardingOutputFormat<String>());
  Plan plan = env.createProgramPlan();
  testForName("GivenName", plan);
}

@Test
public void testIdentityIteration() {
  try {
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(43);
    
    IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
    iteration.closeWith(iteration).output(new DiscardingOutputFormat<Long>());
    
    Plan p = env.createProgramPlan();
    OptimizedPlan op = compileNoStats(p);
    
    new JobGraphGenerator().compileJobGraph(op);
  }
  catch (Exception e) {
    e.printStackTrace();
    fail(e.getMessage());
  }
}

public void executeTask(MapFunction<Integer, Integer> mapper) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env
      .createInput(new InfiniteIntegerInputFormat(false))
      .map(mapper)
      .output(new DiscardingOutputFormat<Integer>());
  env.setParallelism(PARALLELISM);
  runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000);
}

@Test
public void testBatch() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(1);
  DataSet<Integer> input = env.fromCollection(inputData);
  input
      .flatMap(new NotifyingMapper())
      .output(new DummyOutputFormat());
  // Extract job graph and set job id for the task to notify of accumulator changes.
  JobGraph jobGraph = getJobGraph(env.createProgramPlan());
  submitJobAndVerifyResults(jobGraph);
}

@Test
public void noPreviousPartitioningJoin2() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
      .where(0,1).equalTo(2,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

@Test
public void noPreviousPartitioningCoGroup2() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1
      .coGroup(set2)
      .where(0,1).equalTo(2,1).with(new MockCoGroup());
  coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource();
  checkValidCoGroupInputProperties(coGroup);
}

@Test
public void noPreviousPartitioningJoin1() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
        .where(0,1).equalTo(0,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

@Test
public void noPreviousPartitioningCoGroup1() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1
      .coGroup(set2)
      .where(0,1).equalTo(0,1).with(new MockCoGroup());
  coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource();
  checkValidCoGroupInputProperties(coGroup);
}

@Test
public void testUnaryFunctionReadFieldsAnnotation() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  @SuppressWarnings("unchecked")
  DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
  input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
  Plan plan = env.createProgramPlan();
  GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
  MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
  SingleInputSemanticProperties semantics = mapper.getSemanticProperties();
  FieldSet read = semantics.getReadFields(0);
  assertNotNull(read);
  assertEquals(2, read.size());
  assertTrue(read.contains(0));
  assertTrue(read.contains(2));
}

@Test
public void testUnaryFunctionInPlaceForwardedAnnotation() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  @SuppressWarnings("unchecked")
  DataSet<Tuple3<Long, String, Integer>> input = env.fromElements(new Tuple3<Long, String, Integer>(3L, "test", 42));
  input.map(new IndividualForwardedMapper<Long, String, Integer>()).output(new DiscardingOutputFormat<Tuple3<Long, String, Integer>>());
  Plan plan = env.createProgramPlan();
  GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
  MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
  SingleInputSemanticProperties semantics = mapper.getSemanticProperties();
  FieldSet fw1 = semantics.getForwardingTargetFields(0, 0);
  FieldSet fw2 = semantics.getForwardingTargetFields(0, 2);
  assertNotNull(fw1);
  assertNotNull(fw2);
  assertTrue(fw1.contains(0));
  assertTrue(fw2.contains(2));
}

private void executeTask(JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner, boolean slow, int parallelism) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new InfiniteIntegerTupleInputFormat(slow));
  DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new InfiniteIntegerTupleInputFormat(slow));
  input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
      .where(0)
      .equalTo(0)
      .with(joiner)
      .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
  env.setParallelism(parallelism);
  runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000);
}

@Test
public void reuseSinglePartitioningCoGroup1() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> coGrouped = set1
      .partitionByHash(0,1)
      .map(new MockMapper()).withForwardedFields("0;1")
      .coGroup(set2)
      .where(0,1).equalTo(0,1).with(new MockCoGroup());
  coGrouped.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode coGroup= (DualInputPlanNode)sink.getInput().getSource();
  checkValidCoGroupInputProperties(coGroup);
}

private void executeTaskWithGenerator(
    JoinFunction<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> joiner,
    int keys, int vals, int msecsTillCanceling, int maxTimeTillCanceled) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Integer, Integer>> input1 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));
  DataSet<Tuple2<Integer, Integer>> input2 = env.createInput(new UniformIntTupleGeneratorInputFormat(keys, vals));
  input1.join(input2, JoinOperatorBase.JoinHint.REPARTITION_SORT_MERGE)
      .where(0)
      .equalTo(0)
      .with(joiner)
      .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
  env.setParallelism(PARALLELISM);
  runAndCancelJob(env.createProgramPlan(), msecsTillCanceling, maxTimeTillCanceled);
}

private Plan getTestPlanRightStatic(String strategy) {
  
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  
  DataSet<Tuple3<Long, Long, Long>> bigInput = env.readCsvFile("file://bigFile").types(Long.class, Long.class, Long.class).name("bigFile");
  
  DataSet<Tuple3<Long, Long, Long>> smallInput = env.readCsvFile("file://smallFile").types(Long.class, Long.class, Long.class).name("smallFile");
  
  IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
  
  Configuration joinStrategy = new Configuration();
  joinStrategy.setString(Optimizer.HINT_SHIP_STRATEGY, Optimizer.HINT_SHIP_STRATEGY_REPARTITION_HASH);
  
  if(!strategy.equals("")) {
    joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
  }
  
  DataSet<Tuple3<Long, Long, Long>> inner = iteration.join(smallInput).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);
  DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
  
  output.output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
  
  return env.createProgramPlan();
  
}

private Plan getTestPlanLeftStatic(String strategy) {
  
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  
  @SuppressWarnings("unchecked")
  DataSet<Tuple3<Long, Long, Long>> bigInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L),
      new Tuple3<Long, Long, Long>(1L, 2L, 3L),new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Big");
  
  @SuppressWarnings("unchecked")
  DataSet<Tuple3<Long, Long, Long>> smallInput = env.fromElements(new Tuple3<Long, Long, Long>(1L, 2L, 3L)).name("Small");
  
  IterativeDataSet<Tuple3<Long, Long, Long>> iteration = bigInput.iterate(10);
  
  Configuration joinStrategy = new Configuration();
  joinStrategy.setString(Optimizer.HINT_LOCAL_STRATEGY, strategy);
  
  DataSet<Tuple3<Long, Long, Long>> inner = smallInput.join(iteration).where(0).equalTo(0).with(new DummyJoiner()).name("DummyJoiner").withParameters(joinStrategy);
  DataSet<Tuple3<Long, Long, Long>> output = iteration.closeWith(inner);
  
  output.output(new DiscardingOutputFormat<Tuple3<Long,Long,Long>>());
  
  return env.createProgramPlan();
  
}

Javadoc

Creates the program's Plan. The plan is a description of all data sources, data sinks, and operations and how they interact, as an isolated unit that can be executed with a org.apache.flink.api.common.PlanExecutor. Obtaining a plan and starting it with an executor is an alternative way to run a program and is only possible if the program consists only of distributed operations. This automatically starts a new stage of execution.

Popular methods of ExecutionEnvironment

getExecutionEnvironment
Creates an execution environment that represents the context in which the program is currently execu
execute
Triggers the program execution. The environment will execute all parts of the program that have resu
getConfig
Gets the config object that defines execution parameters.
fromCollection
Creates a DataSet from the given iterator. Because the iterator will remain unmodified until the act
fromElements
Creates a new data set that contains the given elements. The elements must all be of the same type,
setParallelism
Sets the parallelism for operations executed through this environment. Setting a parallelism of x he
createInput
Generic method to create an input DataSet with in InputFormat. The DataSet will not be immediately c
getParallelism
Gets the parallelism with which operation are executed by default. Operations can individually overr
createLocalEnvironment
Creates a LocalEnvironment which is used for executing Flink jobs.
readTextFile
Creates a DataSet that represents the Strings produced by reading the given file line wise. The java
getLastJobExecutionResult
Returns the org.apache.flink.api.common.JobExecutionResult of the last executed job.
readCsvFile
Creates a CSV reader to read a comma separated value (CSV) file. The reader has options to define pa

Popular in Java

Reactive rest calls using spring rest template
getContentResolver (Context)
getSystemService (Context)
notifyDataSetChanged (ArrayAdapter)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
NumberFormat (java.text)
The abstract base class for all number formats. This class provides the interface for formatting and
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
JButton (javax.swing)
Top 12 Jupyter Notebook extensions

How to use createProgramPlanmethodin org.apache.flink.api.java.ExecutionEnvironment

Best Java code snippets using org.apache.flink.api.java.ExecutionEnvironment.createProgramPlan (Showing top 20 results out of 315)

How to use
createProgramPlan
method
in
org.apache.flink.api.java.ExecutionEnvironment