org.apache.flink.api.java.ExecutionEnvironment java code examples

Refine search

@Test(expected = InvalidProgramException.class)
public void testJoinKeyInvalidAtomic1() {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Integer> ds1 = env.fromElements(0, 0, 0);
  DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo);
  ds1.join(ds2).where("*", "invalidKey");
}

@Test
public void testFaultyMergeAccumulator() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.getConfig().disableSysoutLogging();
  // Test Exception forwarding with faulty Accumulator implementation
  env.generateSequence(0, 10000)
    .map(new FaultyMergeAccumulatorUsingMapper())
    .output(new DiscardingOutputFormat<>());
  assertAccumulatorsShouldFail(env.execute());
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(4);
  DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
  IterativeDataSet<Integer> iteration = data.iterate(10);
  DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
  final List<Integer> resultList = new ArrayList<Integer>();
  iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
  env.execute();
  Assert.assertEquals(8, resultList.get(0).intValue());
}

@SuppressWarnings("unchecked")
private static DataSet<StringTriple> getDataSet(ExecutionEnvironment env, ParameterTool params) {
  if (params.has("input")) {
    return env.readCsvFile(params.get("input"))
      .fieldDelimiter(";")
      .pojoType(StringTriple.class);
  } else {
    System.out.println("Executing EmptyFieldsCountAccumulator example with default input data set.");
    System.out.println("Use --input to specify file input.");
    return env.fromCollection(getExampleInputTuples());
  }
}

private void receiveParameters(ExecutionEnvironment env) throws IOException {
  for (int x = 0; x < Parameters.values().length; x++) {
    Tuple value = (Tuple) streamer.getRecord(true);
    switch (Parameters.valueOf(((String) value.getField(0)).toUpperCase())) {
      case DOP:
        Integer dop = value.<Integer>getField(1);
        env.setParallelism(dop);
        break;
      case RETRY:
        int retry = value.<Integer>getField(1);
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(retry, 10000L));
        break;
      case ID:
        currentEnvironmentID = value.<Integer>getField(1);
        break;
    }
  }
  if (env.getParallelism() < 0) {
    env.setParallelism(1);
  }
}

/**
 * Ensure that the user can pass a custom configuration object to the LocalEnvironment.
 */
@Test
public void testLocalEnvironmentWithConfig() throws Exception {
  Configuration conf = new Configuration();
  conf.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, PARALLELISM);
  final ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment(conf);
  env.setParallelism(ExecutionConfig.PARALLELISM_AUTO_MAX);
  env.getConfig().disableSysoutLogging();
  DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat())
      .rebalance()
      .mapPartition(new RichMapPartitionFunction<Integer, Integer>() {
        @Override
        public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception {
          out.collect(getRuntimeContext().getIndexOfThisSubtask());
        }
      });
  List<Integer> resultCollection = result.collect();
  assertEquals(PARALLELISM, resultCollection.size());
}

Configuration config = new Configuration();
config.setString(HighAvailabilityOptions.HA_MODE, "ZOOKEEPER");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zkQuorum);
config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, zookeeperStoragePath.getAbsolutePath());
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment(
    "leader", 1, config);
env.setParallelism(PARALLELISM);
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));
env.getConfig().setExecutionMode(executionMode);
env.getConfig().disableSysoutLogging();
final DataSet<Long> result = env.generateSequence(1, numElements)
env.execute();

/**
 * Ensure that the program parallelism can be set even if the configuration is supplied.
 */
@Test
public void testUserSpecificParallelism() throws Exception {
  Configuration config = new Configuration();
  config.setString(AkkaOptions.STARTUP_TIMEOUT, VALID_STARTUP_TIMEOUT);
  final URI restAddress = MINI_CLUSTER_RESOURCE.getMiniCluster().getRestAddress();
  final String hostname = restAddress.getHost();
  final int port = restAddress.getPort();
  final ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment(
      hostname,
      port,
      config
  );
  env.setParallelism(USER_DOP);
  env.getConfig().disableSysoutLogging();
  DataSet<Integer> result = env.createInput(new ParallelismDependentInputFormat())
      .rebalance()
      .mapPartition(new RichMapPartitionFunction<Integer, Integer>() {
        @Override
        public void mapPartition(Iterable<Integer> values, Collector<Integer> out) throws Exception {
          out.collect(getRuntimeContext().getIndexOfThisSubtask());
        }
      });
  List<Integer> resultCollection = result.collect();
  assertEquals(USER_DOP, resultCollection.size());
}

@Override
protected void testProgram() throws Exception {
  /*
   * Test passing a configuration object to an input format
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  Configuration ifConf = new Configuration();
  ifConf.setString("prepend", "test");
  DataSet<String> ds = env.createInput(new TestInputFormat(new Path(inputPath))).withParameters(ifConf);
  List<String> result = ds.collect();
  String expectedResult = "ab\n"
      + "cd\n"
      + "ef\n";
  compareResultAsText(result, expectedResult);
}

@Test
public void testIdentityMapWithMissingTypesAndStringTypeHint() throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.getConfig().disableSysoutLogging();
  DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
  DataSet<Tuple3<Integer, Long, String>> identityMapDs = ds
    .map(new Mapper<Tuple3<Integer, Long, String>, Tuple3<Integer, Long, String>>())
    .returns(new TypeHint<Tuple3<Integer, Long, String>>(){});
  List<Tuple3<Integer, Long, String>> result = identityMapDs.collect();
  String expectedResult = "(2,2,Hello)\n" +
    "(3,2,Hello world)\n" +
    "(1,1,Hi)\n";
  compareResultAsText(result, expectedResult);
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<String> input = env.readTextFile(dataPath);
  input.flatMap(new TokenizeLine())
    .groupBy(0)
    .reduceGroup(new CountWords())
    .writeAsCsv(resultPath, "\n", " ");
  this.result = env.execute();
}

  public static void main(String[] args) throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    env.fromElements(1, 2).output(new DiscardingOutputFormat<Integer>());
    env.execute().getNetRuntime();
  }
}

  @Override
  protected void testProgram() throws Exception {

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    TestNonRichOutputFormat output = new TestNonRichOutputFormat();
    env.createInput(new TestNonRichInputFormat()).output(output);
    try {
      env.execute();
    } catch (Exception e){
      // we didn't break anything by making everything rich.
      e.printStackTrace();
      fail(e.getMessage());
    }
  }
}

public void run() throws Exception {
  LOG.info("Random seed = {}", RANDOM_SEED);
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.getConfig().disableSysoutLogging();
  for (int parallelism = MAX_PARALLELISM; parallelism > 0; parallelism--) {
    LOG.info("Parallelism = {}", parallelism);
    env.setParallelism(parallelism);
    testReduce(env);
    testGroupedReduce(env);
    testJoin(env);
    testCross(env);
  }
}

/**
 * Tests compiler fail for join program with replicated data source and changing parallelism.
 */
@Test(expected = CompilerException.class)
public void checkJoinWithReplicatedSourceInputChangingparallelism() {
  ExecutionEnvironment env = ExecutionEnvironment.createLocalEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.class);
  ReplicatingInputFormat<Tuple1<String>, FileInputSplit> rif =
      new ReplicatingInputFormat<Tuple1<String>, FileInputSplit>(new TupleCsvInputFormat<Tuple1<String>>(new Path("/some/path"), typeInfo));
  DataSet<Tuple1<String>> source1 = env.createInput(rif, new TupleTypeInfo<Tuple1<String>>(BasicTypeInfo.STRING_TYPE_INFO));
  DataSet<Tuple1<String>> source2 = env.readCsvFile("/some/otherpath").types(String.class);
  DataSink<Tuple2<Tuple1<String>, Tuple1<String>>> out = source1
      .join(source2).where("*").equalTo("*").setParallelism(DEFAULT_PARALLELISM+2)
      .writeAsText("/some/newpath");
  Plan plan = env.createProgramPlan();
  // submit the plan to the compiler
  OptimizedPlan oPlan = compileNoStats(plan);
}

public void executeTask(MapFunction<Integer, Integer> mapper) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env
      .createInput(new InfiniteIntegerInputFormat(false))
      .map(mapper)
      .output(new DiscardingOutputFormat<Integer>());
  env.setParallelism(PARALLELISM);
  runAndCancelJob(env.createProgramPlan(), 5 * 1000, 10 * 1000);
}

@Test
public void noPreviousPartitioningJoin2() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Integer, Integer>> set1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> set2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
  DataSet<Tuple3<Integer, Integer, Integer>> joined = set1
      .join(set2, JoinOperatorBase.JoinHint.REPARTITION_HASH_FIRST)
      .where(0,1).equalTo(2,1).with(new MockJoin());
  joined.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
  Plan plan = env.createProgramPlan();
  OptimizedPlan oPlan = compileWithStats(plan);
  SinkPlanNode sink = oPlan.getDataSinks().iterator().next();
  DualInputPlanNode join = (DualInputPlanNode)sink.getInput().getSource();
  checkValidJoinInputProperties(join);
}

public FlinkPlanner(List<String> classPath) {
  super();
  this.classPath = classPath;
  env.getConfig().disableSysoutLogging();
  if (env.getParallelism() <= 0) {
    // load the default parallelism from config
    GlobalConfiguration.loadConfiguration(new File(CliFrontend.getConfigurationDirectoryFromEnv()).getAbsolutePath());
    org.apache.flink.configuration.Configuration configuration = GlobalConfiguration.getConfiguration();
    int parallelism = configuration.getInteger(ConfigConstants.DEFAULT_PARALLELISM_KEY, -1);
    if (parallelism <= 0) {
      throw new RuntimeException("Please set the default parallelism via the -p command-line flag");
    } else {
      env.setParallelism(parallelism);
    }
  }
}

@Test(expected = InvalidProgramException.class)
public void testArrayOrderFull() {
  List<Object[]> arrayData = new ArrayList<>();
  arrayData.add(new Object[0]);
  final ExecutionEnvironment env = ExecutionEnvironment
      .getExecutionEnvironment();
  DataSet<Object[]> pojoDs = env
      .fromCollection(arrayData);
  // must not work
  pojoDs.writeAsText("/tmp/willNotHappen")
    .sortLocalOutput("*", Order.ASCENDING);
}

  jobName = getDefaultName();
Plan plan = translator.translateToPlan(this.sinks, jobName);
if (getParallelism() > 0) {
  plan.setDefaultParallelism(getParallelism());
plan.setExecutionConfig(getConfig());
if (!config.isAutoTypeRegistrationDisabled()) {
  plan.accept(new Visitor<org.apache.flink.api.common.operators.Operator<?>>() {
  registerCachedFilesWithPlan(plan);
} catch (Exception e) {
  throw new RuntimeException("Error while registering cached files: " + e.getMessage(), e);
int registeredTypes = config.getRegisteredKryoTypes().size() +
    config.getRegisteredPojoTypes().size() +
    config.getRegisteredTypesWithKryoSerializerClasses().size() +
    config.getRegisteredTypesWithKryoSerializers().size();

Javadoc

The ExecutionEnvironment is the context in which a program is executed. A LocalEnvironment will cause execution in the current JVM, a RemoteEnvironment will cause execution on a remote setup.

The environment provides methods to control the job execution (such as setting the parallelism) and to interact with the outside world (data access).

Please note that the execution environment needs strong type information for the input and return types of all operations that are executed. This means that the environments needs to know that the return value of an operation is for example a Tuple of String and Integer. Because the Java compiler throws much of the generic type information away, most methods attempt to re- obtain that information using reflection. In certain cases, it may be necessary to manually supply that information to some of the methods.

Most used methods

getExecutionEnvironment
Creates an execution environment that represents the context in which the program is currently execu
execute
Triggers the program execution. The environment will execute all parts of the program that have resu
getConfig
Gets the config object that defines execution parameters.
fromCollection
Creates a DataSet from the given iterator. Because the iterator will remain unmodified until the act
fromElements
Creates a new data set that contains the given elements. The elements must all be of the same type,
setParallelism
Sets the parallelism for operations executed through this environment. Setting a parallelism of x he
createInput
Generic method to create an input DataSet with in InputFormat. The DataSet will not be immediately c
getParallelism
Gets the parallelism with which operation are executed by default. Operations can individually overr
createLocalEnvironment
Creates a LocalEnvironment which is used for executing Flink jobs.
readTextFile
Creates a DataSet that represents the Strings produced by reading the given file line wise. The java
getLastJobExecutionResult
Returns the org.apache.flink.api.common.JobExecutionResult of the last executed job.
readCsvFile
Creates a CSV reader to read a comma separated value (CSV) file. The reader has options to define pa

Popular in Java

Reactive rest calls using spring rest template
setContentView (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
runOnUiThread (Activity)
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
BoxLayout (javax.swing)
Top 12 Jupyter Notebook extensions

How to useExecutionEnvironment in org.apache.flink.api.java

Best Java code snippets using org.apache.flink.api.java.ExecutionEnvironment (Showing top 20 results out of 666)

Refine search

How to use
ExecutionEnvironment
in
org.apache.flink.api.java