public static void main(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> vertexIds = env.generateSequence(0, 10); DataSet<Tuple2<Long, Long>> edges = env.fromElements(new Tuple2<Long, Long>(0L, 2L), new Tuple2<Long, Long>(2L, 4L), new Tuple2<Long, Long>(4L, 8L), new Tuple2<Long, Long>(1L, 5L), new Tuple2<Long, Long>(3L, 7L), new Tuple2<Long, Long>(3L, 9L)); DataSet<Tuple2<Long, Long>> initialVertices = vertexIds.map(new IdAssigner()); DataSet<Tuple2<Long, Long>> result = initialVertices.runOperation(VertexCentricIteration.withPlainEdges(edges, new CCUpdater(), new CCMessager(), 100)); result.print(); env.execute("Spargel Connected Components"); }
public static DataSet<Tuple1<Long>> getDefaultPagesDataSet(ExecutionEnvironment env) { List<Tuple1<Long>> data = new ArrayList<Tuple1<Long>>(); for(long i=0; i<numPages; i++) { data.add(new Tuple1<Long>(i)); } return env.fromCollection(data); }
/** * Generic method to create an input DataSet with in {@link InputFormat}. The DataSet will not be * immediately created - instead, this method returns a DataSet that will be lazily created from * the input format once the program is executed. * <p> * Since all data sets need specific information about their types, this method needs to determine * the type of the data produced by the input format. It will attempt to determine the data type * by reflection, unless the the input format implements the {@link ResultTypeQueryable} interface. * In the latter case, this method will invoke the {@link ResultTypeQueryable#getProducedType()} * method to determine data type produced by the input format. * * @param inputFormat The input format used to create the data set. * @return A DataSet that represents the data created by the input format. * * @see #createInput(InputFormat, TypeInformation) */ public <X> DataSource<X> createInput(InputFormat<X, ?> inputFormat) { if (inputFormat == null) { throw new IllegalArgumentException("InputFormat must not be null."); } try { return createInput(inputFormat, TypeExtractor.getInputFormatTypes(inputFormat)); } catch (Exception e) { throw new InvalidProgramException("The type returned by the input format could not be automatically determined. " + "Please specify the TypeInformation of the produced type explicitly."); } }
/** * Triggers the program execution. The environment will execute all parts of the program that have * resulted in a "sink" operation. Sink operations are for example printing results ({@link DataSet#print()}, * writing results (e.g. {@link DataSet#writeAsText(String)}, * {@link DataSet#write(eu.stratosphere.api.common.io.FileOutputFormat, String)}, or other generic * data sinks created with {@link DataSet#output(eu.stratosphere.api.common.io.OutputFormat)}. * <p> * The program execution will be logged and displayed with a generated default name. * * @return The result of the job execution, containing elapsed time and accumulators. * @throws Exception Thrown, if the program executions fails. */ public JobExecutionResult execute() throws Exception { return execute(getDefaultName()); }
public static void main(String[] args) throws Exception { final int numVertices = 100; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Double>> intialRanks = env.generateSequence(1, numVertices) .map(new MapFunction<Long, Tuple2<Long, Double>>() { public Tuple2<Long, Double> map(Long value) { DataSet<Tuple3<Long, Long, Double>> edgesWithProbability = env.generateSequence(1, numVertices) .flatMap(new FlatMapFunction<Long, Tuple3<Long, Long, Double>>() { public void flatMap(Long value, Collector<Tuple3<Long, Long, Double>> out) { env.execute("Spargel PageRank");
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); } else { counts.print(); } // execute program env.execute("WordCount Example"); }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.fromElements(WordCountData.TEXT); DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer()); DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected)); env.execute("Word Count Collection"); } }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setDegreeOfParallelism(1); DataSet<Integer> data = env.fromElements(1, 2, 3, 4, 5, 6, 7, 8); IterativeDataSet<Integer> iteration = data.iterate(10); DataSet<Integer> result = data.reduceGroup(new PickOneAllReduce()).withBroadcastSet(iteration, "bc"); final List<Integer> resultList = new ArrayList<Integer>(); iteration.closeWith(result).output(new LocalCollectionOutputFormat<Integer>(resultList)); env.execute(); Assert.assertEquals(8, resultList.get(0).intValue()); }
public static String runProgram(String resultPath) throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setDegreeOfParallelism(DOP); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput); DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput); int keyPosition = 0; env.execute();
public static void main(String[] args) throws Exception { prepareTestDb(); ExecutionEnvironment environment = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5> source = environment.createInput(JDBCInputFormat.buildJDBCInputFormat() .setDrivername("org.apache.derby.jdbc.EmbeddedDriver") .setDBUrl("jdbc:derby:memory:ebookshop") .setQuery("select * from books") .finish(), new TupleTypeInfo(Tuple5.class, INT_TYPE_INFO, STRING_TYPE_INFO, STRING_TYPE_INFO, DOUBLE_TYPE_INFO, INT_TYPE_INFO) ); source.output(JDBCOutputFormat.buildJDBCOutputFormat() .setDrivername("org.apache.derby.jdbc.EmbeddedDriver") .setDBUrl("jdbc:derby:memory:ebookshop") .setQuery("insert into newbooks (id,title,author,price,qty) values (?,?,?,?,?)") .finish()); environment.execute(); }
private static DataSet<Tuple3<Integer, String, Integer>> getRanksDataSet(ExecutionEnvironment env) { // Create DataSet for ranks relation (Rank, URL, Avg-Visit-Duration) if(fileOutput) { return env.readCsvFile(ranksPath) .fieldDelimiter('|') .types(Integer.class, String.class, Integer.class); } else { return WebLogData.getRankDataSet(env); } }
public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) { return env.fromElements( 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L); }
try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Double>> inputData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0)); DataSet<Tuple2<Long, Double>> result = constructPlan(inputData, 10); result.print(); Plan p = env.createProgramPlan();
/** * Creates a new local environment. */ public LocalEnvironment() { if(!ExecutionEnvironment.localExecutionIsAllowed()) { throw new InvalidProgramException("The LocalEnvironment cannot be used when submitting a program through a client."); } }
/** * Creates the program's {@link Plan}. The plan is a description of all data sources, data sinks, * and operations and how they interact, as an isolated unit that can be executed with a * {@link eu.stratosphere.api.common.PlanExecutor}. Obtaining a plan and starting it with an * executor is an alternative way to run a program and is only possible if the program consists * only of distributed operations. * * @return The program's plan. */ public JavaPlan createProgramPlan() { return createProgramPlan(null); }
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); } else { counts.print(); } // execute program env.execute("WordCount Example"); }
@Override protected void testProgram() throws Exception { final int NUM_ITERS = 4; final double expectedFactor = (int) Math.pow(7, NUM_ITERS); // this is an artificial program, it does not compute anything sensical ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Double>> initialData = env.fromElements(new Tuple2<Long, Double>(1L, 1.0), new Tuple2<Long, Double>(2L, 2.0), new Tuple2<Long, Double>(3L, 3.0), new Tuple2<Long, Double>(4L, 4.0), new Tuple2<Long, Double>(5L, 5.0), new Tuple2<Long, Double>(6L, 6.0)); DataSet<Tuple2<Long, Double>> result = MultipleJoinsWithSolutionSetCompilerTest.constructPlan(initialData, NUM_ITERS); List<Tuple2<Long, Double>> resultCollector = new ArrayList<Tuple2<Long,Double>>(); result.output(new LocalCollectionOutputFormat<Tuple2<Long,Double>>(resultCollector)); env.execute(); for (Tuple2<Long, Double> tuple : resultCollector) { Assert.assertEquals(expectedFactor * tuple.f0, tuple.f1.doubleValue(), 0.0); } } }
public static void main(String[] args) throws Exception { final int NUM_VERTICES = 100; ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Long> vertices = env.generateSequence(1, NUM_VERTICES); DataSet<Tuple3<Long, Long, Double>> edgesWithProbability = env.generateSequence(1, NUM_VERTICES) .flatMap(new FlatMapFunction<Long, Tuple3<Long, Long, Double>>() { public void flatMap(Long value, Collector<Tuple3<Long, Long, Double>> out) { env.execute("Spargel PageRank");
private static DataSet<Tuple5<Integer, String, String, String, Integer>> getOrdersDataSet(ExecutionEnvironment env) { return env.readCsvFile(ordersPath) .fieldDelimiter('|') .includeFields("101011010") .types(Integer.class, String.class, String.class, String.class, Integer.class); }
public static DataSet<Long> getDefaultVertexDataSet(ExecutionEnvironment env) { return env.fromElements( 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L); }