eu.stratosphere.api.java.ExecutionEnvironment java code examples

public static void main(String[] args) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  
  DataSet<Long> vertexIds = env.generateSequence(0, 10);
  DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 2L), new Tuple2<Long, Long>(2L, 4L), new Tuple2<Long, Long>(4L, 8L),
                            new Tuple2<Long, Long>(1L, 5L), new Tuple2<Long, Long>(3L, 7L), new Tuple2<Long, Long>(3L, 9L));
  
  DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner());
  
  DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100));
  
  result.print();
  env.execute("Spargel Connected Components");
}

public static DataSet<Tuple1<Long>> getDefaultPagesDataSet(ExecutionEnvironment env) {
  
  List<Tuple1<Long>> data = new ArrayList<Tuple1<Long>>();
  
  for(long i=0; i<numPages; i++) {
    data.add(new Tuple1<Long>(i));
  }
  return env.fromCollection(data);
}

/**
 * Generic method to create an input DataSet with in {@link InputFormat}. The DataSet will not be
 * immediately created - instead, this method returns a DataSet that will be lazily created from
 * the input format once the program is executed.
 * <p>
 * Since all data sets need specific information about their types, this method needs to determine
 * the type of the data produced by the input format. It will attempt to determine the data type
 * by reflection, unless the the input format implements the {@link ResultTypeQueryable} interface.
 * In the latter case, this method will invoke the {@link ResultTypeQueryable#getProducedType()}
 * method to determine data type produced by the input format.
 * 
 * @param inputFormat The input format used to create the data set.
 * @return A DataSet that represents the data created by the input format.
 * 
 * @see #createInput(InputFormat, TypeInformation)
 */
public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat) {
  if (inputFormat == null) {
    throw new IllegalArgumentException("InputFormat must not be null.");
  }
  
  try {
    return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat));
  }
  catch (Exception e) {
    throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " +
        "Please specify the TypeInformation of the produced type explicitly.");
  }
}

/**
 * Triggers the program execution. The environment will execute all parts of the program that have
 * resulted in a "sink" operation. Sink operations are for example printing results ({@link DataSet#print()},
 * writing results (e.g. {@link DataSet#writeAsText(String)},
 * {@link DataSet#write(eu.stratosphere.api.common.io.FileOutputFormat, String)}, or other generic
 * data sinks created with {@link DataSet#output(eu.stratosphere.api.common.io.OutputFormat)}.
 * <p>
 * The program execution will be logged and displayed with a generated default name.
 * 
 * @return The result of the job execution, containing elapsed time and accumulators.
 * @throws Exception Thrown, if the program executions fails.
 */
public JobExecutionResult execute() throws Exception {
  return execute(getDefaultName());
}

public static void main(String[] args) throws Exception {
  final int numVertices = 100;
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Long, Double>> intialRanks = env.generateSequence(1, numVertices)
              .map(new MapFunction<Long, Tuple2<Long, Double>>() {
                public Tuple2<Long, Double> map(Long value) {
  DataSet<Tuple3<Long, Long, Double>> edgesWithProbability = env.generateSequence(1, numVertices)
              .flatMap(new FlatMapFunction<Long, Tuple3<Long, Long, Double>>() {
                public void flatMap(Long value, Collector<Tuple3<Long, Long, Double>> out) {
  env.execute("Spargel PageRank");

public static void main(String[] args) throws Exception {
  
  if(!parseParameters(args)) {
    return;
  }
  
  // set up the execution environment
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  
  // get input data
  DataSet<String> text = getTextDataSet(env);
  
  DataSet<Tuple2<String, Integer>> counts = 
      // split up the lines in pairs (2-tuples) containing: (word,1)
      text.flatMap(new Tokenizer())
      // group by the tuple field "0" and sum up tuple field "1"
      .groupBy(0)
      .aggregate(Aggregations.SUM, 1);
  // emit result
  if(fileOutput) {
    counts.writeAsCsv(outputPath, "\n", " ");
  } else {
    counts.print();
  }
  
  // execute program
  env.execute("WordCount Example");
}

  @Override
  protected void testProgram() throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

    DataSet<String> text = env.fromElements(WordCountData.TEXT);
    DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer());
    DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1);

    result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected));
    env.execute("Word Count Collection");
  }
}

@Override
protected void testProgram() throws Exception {
  
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setDegreeOfParallelism(1);
  
  DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8);
  
  IterativeDataSet<Integer> iteration = data.iterate(10);
  
  DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc");
  
  final List<Integer> resultList = new ArrayList<Integer>();
  iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList));
  
  env.execute();
  
  Assert.assertEquals(8, resultList.get(0).intValue());
}

public static String runProgram(String resultPath) throws Exception {
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setDegreeOfParallelism(DOP);
  DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput);
  DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput);
  int keyPosition = 0;
  env.execute();

public static void main(String[] args) throws Exception {
  prepareTestDb();
  ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5> source
      = environment.createInput(JDBCInputFormat.buildJDBCInputFormat()
          .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
          .setDBUrl("jdbc:derby:memory:ebookshop")
          .setQuery("select * from books")
          .finish(),
          new TupleTypeInfo(Tuple5.class, INT_TYPE_INFO, STRING_TYPE_INFO, STRING_TYPE_INFO, DOUBLE_TYPE_INFO, INT_TYPE_INFO)
      );
  source.output(JDBCOutputFormat.buildJDBCOutputFormat()
      .setDrivername("org.apache.derby.jdbc.EmbeddedDriver")
      .setDBUrl("jdbc:derby:memory:ebookshop")
      .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)")
      .finish());
  environment.execute();
}

private static DataSet<Tuple3<Integer, String, Integer>> getRanksDataSet(ExecutionEnvironment env) {
  // Create DataSet for ranks relation (Rank, URL, Avg-Visit-Duration)
  if(fileOutput) {
    return env.readCsvFile(ranksPath)
          .fieldDelimiter('|')
          .types(Integer.class, String.class, Integer.class);
  } else {
    return WebLogData.getRankDataSet(env);
  }
}

public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) {
  
  return env.fromElements(
      1L, 2L, 3L, 4L, 5L, 
      6L, 7L, 8L, 9L, 10L,
      11L, 12L, 13L, 14L, 15L, 16L);
}

try {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0));
  DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10);
  result.print();
  Plan p = env.createProgramPlan();

/**
 * Creates a new local environment.
 */
public LocalEnvironment() {
  if(!ExecutionEnvironment.localExecutionIsAllowed()) {
    throw new InvalidProgramException("The LocalEnvironment cannot be used when submitting a program through a client.");
  }
}

/**
 * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks,
 * and operations and how they interact, as an isolated unit that can be executed with a
 * {@link eu.stratosphere.api.common.PlanExecutor}. Obtaining a plan and starting it with an
 * executor is an alternative way to run a program and is only possible if the program consists
 * only of distributed operations.
 * 
 * @return The program's plan.
 */
public JavaPlan createProgramPlan() {
  return createProgramPlan(null);
}

public static void main(String[] args) throws Exception {
  
  if(!parseParameters(args)) {
    return;
  }
  
  // set up the execution environment
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  
  // get input data
  DataSet<String> text = getTextDataSet(env);
  
  DataSet<Tuple2<String, Integer>> counts = 
      // split up the lines in pairs (2-tuples) containing: (word,1)
      text.flatMap(new Tokenizer())
      // group by the tuple field "0" and sum up tuple field "1"
      .groupBy(0)
      .aggregate(Aggregations.SUM, 1);
  // emit result
  if(fileOutput) {
    counts.writeAsCsv(outputPath, "\n", " ");
  } else {
    counts.print();
  }
  
  // execute program
  env.execute("WordCount Example");
}

  @Override
  protected void testProgram() throws Exception {
    
    final int NUM_ITERS = 4;
    final double expectedFactor = (int) Math.pow(7, NUM_ITERS);
    
    // this is an artificial program, it does not compute anything sensical
    ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    
    @SuppressWarnings("unchecked")
    DataSet<Tuple2<Long, Double>> initialData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0), new Tuple2<Long, Double>(2L, 2.0),
                              new Tuple2<Long, Double>(3L, 3.0), new Tuple2<Long, Double>(4L, 4.0),
                              new Tuple2<Long, Double>(5L, 5.0), new Tuple2<Long, Double>(6L, 6.0));
    
    DataSet<Tuple2<Long, Double>> result = MultipleJoinsWithSolutionSetCompilerTest.constructPlan(initialData, NUM_ITERS);
    
    List<Tuple2<Long, Double>> resultCollector = new ArrayList<Tuple2<Long,Double>>();
    result.output(new LocalCollectionOutputFormat<Tuple2<Long,Double>>(resultCollector));
    
    env.execute();
    
    for (Tuple2<Long, Double> tuple : resultCollector) {
      Assert.assertEquals(expectedFactor * tuple.f0, tuple.f1.doubleValue(), 0.0);
    }
  }
}

public static void main(String[] args) throws Exception {
  final int NUM_VERTICES = 100;
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Long> vertices = env.generateSequence(1, NUM_VERTICES);
  DataSet<Tuple3<Long, Long, Double>> edgesWithProbability = env.generateSequence(1, NUM_VERTICES)
              .flatMap(new FlatMapFunction<Long, Tuple3<Long, Long, Double>>() {
                public void flatMap(Long value, Collector<Tuple3<Long, Long, Double>> out) {
  env.execute("Spargel PageRank");

private static DataSet<Tuple5<Integer, String, String, String, Integer>> getOrdersDataSet(ExecutionEnvironment env) {
  return env.readCsvFile(ordersPath)
        .fieldDelimiter('|')
        .includeFields("101011010")
        .types(Integer.class, String.class, String.class, String.class, Integer.class);
}

public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) {
  
  return env.fromElements(
      1L, 2L, 3L, 4L, 5L, 
      6L, 7L, 8L, 9L, 10L,
      11L, 12L, 13L, 14L, 15L, 16L);
}

Most used methods

execute
getExecutionEnvironment
fromCollection
fromElements
createInput
Generic method to create an input DataSet with in InputFormat. The DataSet will not be immediately c
createProgramPlan
Creates the program's Plan. The plan is a description of all data sources, data sinks, and operation
localExecutionIsAllowed
readCsvFile
createLocalEnvironment
Creates a LocalEnvironment. The local execution environment will run the program in a multi-threaded
disableLocalExecution
fromParallelCollection
Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing
generateSequence
Creates a new data set that contains a sequence of numbers. The data set will be created in parallel

Popular in Java

Start an intent from android
scheduleAtFixedRate (ScheduledExecutorService)
startActivity (Activity)
setScale (BigDecimal)
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
Permission (java.security)
Legacy security code; do not use.
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
Best plugins for Eclipse

How to useExecutionEnvironment in eu.stratosphere.api.java

Best Java code snippets using eu.stratosphere.api.java.ExecutionEnvironment (Showing top 20 results out of 315)

How to use
ExecutionEnvironment
in
eu.stratosphere.api.java