org.apache.flink.api.common.Plan java code examples

Refine search

ExecutionEnvironment

  jobName = getDefaultName();
Plan plan = translator.translateToPlan(this.sinks, jobName);
if (getParallelism() > 0) {
  plan.setDefaultParallelism(getParallelism());
plan.setExecutionConfig(getConfig());
  plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {

@Test
public void testCorrectTranslation() {
  final String jobName = "Test JobName";
  final int numIterations = 13;
  final int defaultParallelism = 133;
  final int iterationParallelism = 77;
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // ------------ construct the test program ------------------
  {
    env.setParallelism(defaultParallelism);
    @SuppressWarnings("unchecked")
    DataSet<Tuple3<Double, Long, String>> initialDataSet = env.fromElements(new Tuple3<>(3.44, 5L, "abc"));
    IterativeDataSet<Tuple3<Double, Long, String>> bulkIteration = initialDataSet.iterate(numIterations);
    bulkIteration.setParallelism(iterationParallelism);
    // test that multiple iteration consumers are supported
    DataSet<Tuple3<Double, Long, String>> identity = bulkIteration
      .map(new IdentityMapper<Tuple3<Double, Long, String>>());
    DataSet<Tuple3<Double, Long, String>> result = bulkIteration.closeWith(identity);
    result.output(new DiscardingOutputFormat<Tuple3<Double, Long, String>>());
    result.writeAsText("/dev/null");
  }
  Plan p = env.createProgramPlan(jobName);
  // ------------- validate the plan ----------------
  BulkIterationBase<?> iteration = (BulkIterationBase<?>) p.getDataSinks().iterator().next().getInput();
  assertEquals(jobName, p.getJobName());
  assertEquals(defaultParallelism, p.getDefaultParallelism());
  assertEquals(iterationParallelism, iteration.getParallelism());
}

  graph.setExecutionConfig(program.getOriginalPlan().getExecutionConfig());
graph.setSessionTimeout(program.getOriginalPlan().getSessionTimeout());
  vertex.setInputDependencyConstraint(program.getOriginalPlan().getExecutionConfig().getDefaultInputDependencyConstraint());
  graph.addVertex(vertex);
  program.getOriginalPlan().getCachedFiles().stream()
  .map(entry -> Tuple2.of(entry.getKey(), entry.getValue()))
  .collect(Collectors.toList());

public static OptimizedPlan getOptimizedPlan(Optimizer compiler, Plan p, int parallelism) throws CompilerException {
  Logger log = LoggerFactory.getLogger(ClusterClient.class);
  if (parallelism > 0 && p.getDefaultParallelism() <= 0) {
    log.debug("Changing plan default parallelism from {} to {}", p.getDefaultParallelism(), parallelism);
    p.setDefaultParallelism(parallelism);
  }
  log.debug("Set parallelism {}, plan default parallelism {}", parallelism, p.getDefaultParallelism());
  return compiler.compile(p);
}

public JobExecutionResult execute(Plan program) throws Exception {
  long startTime = System.currentTimeMillis();
  initCache(program.getCachedFiles());
  Collection<? extends GenericDataSinkBase<?>> sinks = program.getDataSinks();
  for (Operator<?> sink : sinks) {
    execute(sink);
  }
  
  long endTime = System.currentTimeMillis();
  Map<String, OptionalFailure<Object>> accumulatorResults = AccumulatorHelper.toResultMap(accumulators);
  return new JobExecutionResult(null, endTime - startTime, accumulatorResults);
}

@Test
public void testUnaryFunctionReadFieldsAnnotation() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  @SuppressWarnings("unchecked")
  DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(3L, 2L, 1L));
  input.map(new ReadSetMapper<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
  Plan plan = env.createProgramPlan();
  GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
  MapOperatorBase<?, ?, ?> mapper = (MapOperatorBase<?, ?, ?>) sink.getInput();
  SingleInputSemanticProperties semantics = mapper.getSemanticProperties();
  FieldSet read = semantics.getReadFields(0);
  assertNotNull(read);
  assertEquals(2, read.size());
  assertTrue(read.contains(0));
  assertTrue(read.contains(2));
}

@Test
public void testProjectionSemProps1() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo);
  tupleDs.project(1, 3, 2, 0, 3).output(new DiscardingOutputFormat<Tuple>());
  Plan plan = env.createProgramPlan();
  GenericDataSinkBase<?> sink = plan.getDataSinks().iterator().next();
  PlanProjectOperator<?, ?> projectOperator = ((PlanProjectOperator<?, ?>) sink.getInput());
  SingleInputSemanticProperties props = projectOperator.getSemanticProperties();
  assertEquals(1, props.getForwardingTargetFields(0, 0).size());
  assertEquals(1, props.getForwardingTargetFields(0, 1).size());
  assertEquals(1, props.getForwardingTargetFields(0, 2).size());
  assertEquals(2, props.getForwardingTargetFields(0, 3).size());
  assertTrue(props.getForwardingTargetFields(0, 1).contains(0));
  assertTrue(props.getForwardingTargetFields(0, 3).contains(1));
  assertTrue(props.getForwardingTargetFields(0, 2).contains(2));
  assertTrue(props.getForwardingTargetFields(0, 0).contains(3));
  assertTrue(props.getForwardingTargetFields(0, 3).contains(4));
}

@Test
public void testJoinWith() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  List<Tuple1<String>> strLi = new ArrayList<Tuple1<String>>();
  strLi.add(new Tuple1<String>("a"));
  strLi.add(new Tuple1<String>("b"));
  DataSet<Tuple1<String>> strs = env.fromCollection(strLi);
  DataSet<Tuple1<String>> strs1 = env.fromCollection(strLi);
  strs.join(strs1).where(0).equalTo(0).with(new FlatJoinFunction<Tuple1<String>, Tuple1<String>, String>() {
    @Override
    public void join(Tuple1<String> first, Tuple1<String> second, Collector<String> out) throws Exception {
      //
    }
  })
      .output(new DiscardingOutputFormat<String>());
  Plan plan = env.createProgramPlan();
  plan.accept(new Visitor<Operator<?>>() {
    @Override
    public boolean preVisit(Operator<?> visitable) {
      if (visitable instanceof InnerJoinOperatorBase) {
        Assert.assertEquals("Join at testJoinWith(NamesTest.java:93)", visitable.getName());
      }
      return true;
    }
    @Override
    public void postVisit(Operator<?> visitable) {}
  });
}

private DualInputPlanNode createPlanAndGetJoinNode(JoinHint hint) {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Long> i1 = env.generateSequence(1, 1000);
  DataSet<Long> i2 = env.generateSequence(1, 1000);
  plan.accept(new Visitor<Operator<?>>() {
    @Override
    public boolean preVisit(Operator<?> visitable) {

  LOG.debug("Beginning compilation of program '" + program.getJobName() + '\'');
final ExecutionMode defaultDataExchangeMode = program.getExecutionConfig().getExecutionMode();
final int defaultParallelism = program.getDefaultParallelism() > 0 ?
  program.getDefaultParallelism() : this.defaultParallelism;
program.accept(graphCreator);
OptimizedPlan plan = new PlanFinalizer().createFinalPlan(bestPlanSinks, program.getJobName(), program);

try {
  final int parallelism = 8;
  ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
      env.fromElements(new Tuple3<Double, StringValue, Long>(3.141592, new StringValue("foobar"), Long.valueOf(77)));
  Plan p = env.createProgramPlan();
  GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();

@Test
public void translateDistinctPlain() {
  try {
    final int parallelism = 8;
    ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(parallelism);
    DataSet<Tuple3<Double, StringValue, LongValue>> initialData = getSourceDataSet(env);
    initialData.distinct().output(new DiscardingOutputFormat<Tuple3<Double, StringValue, LongValue>>());
    Plan p = env.createProgramPlan();
    GenericDataSinkBase<?> sink = p.getDataSinks().iterator().next();
    // currently distinct is translated to a Reduce
    ReduceOperatorBase<?, ?> reducer = (ReduceOperatorBase<?, ?>) sink.getInput();
    // check types
    assertEquals(initialData.getType(), reducer.getOperatorInfo().getInputType());
    assertEquals(initialData.getType(), reducer.getOperatorInfo().getOutputType());
    // check keys
    assertArrayEquals(new int[] {0, 1, 2}, reducer.getKeyColumns(0));
    // parallelism was not configured on the operator
    assertTrue(reducer.getParallelism() == 1 || reducer.getParallelism() == -1);
    assertTrue(reducer.getInput() instanceof GenericDataSourceBase<?, ?>);
  }
  catch (Exception e) {
    System.err.println(e.getMessage());
    e.printStackTrace();
    fail("Test caused an error: " + e.getMessage());
  }
}

public int getMaximumParallelism() {
  MaxDopVisitor visitor = new MaxDopVisitor();
  accept(visitor);
  return Math.max(visitor.maxDop, this.defaultParallelism);
}

/**
 * Creates a JSON representation of the given dataflow's execution plan.
 *
 * @param plan The dataflow plan.
 * @return The dataflow's execution plan, as a JSON string.
 * @throws Exception Thrown, if the optimization process that creates the execution plan failed.
 */
@Override
public String getOptimizerPlanAsJSON(Plan plan) throws Exception {
  final int parallelism = plan.getDefaultParallelism() == ExecutionConfig.PARALLELISM_DEFAULT ? 1 : plan.getDefaultParallelism();
  Optimizer pc = new Optimizer(new DataStatistics(), this.baseConfiguration);
  pc.setDefaultParallelism(parallelism);
  OptimizedPlan op = pc.compile(plan);
  return new PlanJSONDumpGenerator().getOptimizerPlanAsJSON(op);
}

  int maxParallelism = plan.getMaximumParallelism();
  if (maxParallelism > 0) {
    this.taskManagerNumSlots = maxParallelism;
plan.setDefaultParallelism(slotsPerTaskManager * numTaskManagers);
JobGraph jobGraph = jgg.compileJobGraph(op, plan.getJobId());

  private static List<DataSinkNode> convertPlan(Plan p) {
    GraphCreatingVisitor dagCreator =
        new GraphCreatingVisitor(17, p.getExecutionConfig().getExecutionMode());

    // create the DAG
    p.accept(dagCreator);
    List<DataSinkNode> sinks = dagCreator.getSinks();

    // build a single root and run the branch tracking logic
    OptimizerNode rootNode;
    if (sinks.size() == 1) {
      rootNode = sinks.get(0);
    }
    else {
      Iterator<DataSinkNode> iter = sinks.iterator();
      rootNode = iter.next();

      while (iter.hasNext()) {
        rootNode = new SinkJoiner(rootNode, iter.next());
      }
    }
    rootNode.accept(new IdAndEstimatesVisitor(null));
    rootNode.accept(new BranchesVisitor());

    return sinks;
  }
}

@Override
public void postPass(OptimizedPlan plan) {
  executionConfig = plan.getOriginalPlan().getExecutionConfig();
  for (SinkPlanNode sink : plan.getDataSinks()) {
    traverse(sink);
  }
}

  int maxParallelism = plan.getMaximumParallelism();
  if (maxParallelism > 0) {
    this.taskManagerNumSlots = maxParallelism;
JobGraph jobGraph = jgg.compileJobGraph(op, plan.getJobId());

/**
 * Returns the plan without the required jars when the files are already provided by the cluster.
 *
 * @return The plan without attached jar files.
 * @throws ProgramInvocationException
 */
public JobWithJars getPlanWithoutJars() throws ProgramInvocationException {
  if (isUsingProgramEntryPoint()) {
    return new JobWithJars(getPlan(), Collections.<URL>emptyList(), classpaths, userCodeClassLoader);
  } else {
    throw new ProgramInvocationException("Cannot create a " + JobWithJars.class.getSimpleName() +
      " for a program that is using the interactive mode.", getPlan().getJobId());
  }
}

  private OptimizerPostPass getPostPassFromPlan(Plan program) {
    final String className = program.getPostPassClassName();
    if (className == null) {
      throw new CompilerException("Optimizer Post Pass class description is null");
    }
    try {
      Class<? extends OptimizerPostPass> clazz = Class.forName(className).asSubclass(OptimizerPostPass.class);
      try {
        return InstantiationUtil.instantiate(clazz, OptimizerPostPass.class);
      } catch (RuntimeException rtex) {
        // unwrap the source exception
        if (rtex.getCause() != null) {
          throw new CompilerException("Cannot instantiate optimizer post pass: " + rtex.getMessage(), rtex.getCause());
        } else {
          throw rtex;
        }
      }
    }
    catch (ClassNotFoundException cnfex) {
      throw new CompilerException("Cannot load Optimizer post-pass class '" + className + "'.", cnfex);
    }
    catch (ClassCastException ccex) {
      throw new CompilerException("Class '" + className + "' is not an optimizer post-pass.", ccex);
    }
  }
}

Javadoc

This class represents Flink programs, in the form of dataflow plans.

The dataflow is referenced by the data sinks, from which all connected operators of the data flow can be reached via backwards traversal

Most used methods

accept
Traverses the job depth first from all data sinks on towards the sources.
getDefaultParallelism
Gets the default parallelism for this job. That degree is always used when an operator is not explic
getCachedFiles
return the registered caches files
setDefaultParallelism
Sets the default parallelism for this plan. That degree is always used when an operator is not expli
getExecutionConfig
Gets the execution config object.
getJobName
Gets the name of this job.
getDataSinks
Gets all the data sinks of this job.
getJobId
Gets the ID of the job that the dataflow plan belongs to. If this ID is not set, then the dataflow r
getMaximumParallelism
getPostPassClassName
Gets the optimizer post-pass class for this job. The post-pass typically creates utility classes for
getSessionTimeout
setExecutionConfig
Sets the runtime config object defining execution parameters.

Popular in Java

Making http requests using okhttp
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
getContentResolver (Context)
onRequestPermissionsResult (Fragment)
PrintStream (java.io)
Fake signature of an existing Java class.
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Top 12 Jupyter Notebook extensions

How to usePlan in org.apache.flink.api.common

Best Java code snippets using org.apache.flink.api.common.Plan (Showing top 20 results out of 315)

Refine search

How to use
Plan
in
org.apache.flink.api.common