public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); } else { counts.print(); } // execute program env.execute("WordCount Example"); }
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); } else { counts.print(); } // execute program env.execute("WordCount Example"); }
@Override protected void testProgram() throws Exception { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> text = env.fromElements(WordCountData.TEXT); DataSet<Tuple2<String, Integer>> words = text.flatMap(new WordCount.Tokenizer()); DataSet<Tuple2<String, Integer>> result = words.groupBy(0).aggregate(Aggregations.SUM, 1); result.output(new LocalCollectionOutputFormat<Tuple2<String, Integer>>(resultsCollected)); env.execute("Word Count Collection"); } }
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }
lineitemsOfOrders.groupBy(0,1).aggregate(Aggregations.SUM, 2);
lineitemsOfOrders.groupBy(0,1).aggregate(Aggregations.SUM, 2);
.aggregate(Aggregations.SUM, 1);
.aggregate(Aggregations.SUM, 1);
.groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter());
.groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter());
.groupBy(0).aggregate(SUM, 1)
.groupBy(0).aggregate(SUM, 1)
.groupBy(0).aggregate(Aggregations.SUM, 1);
.groupBy(0).aggregate(Aggregations.SUM, 1);