/** * Applies a FlatMap transformation on a {@link DataSet}.<br/> * The transformation calls a {@link FlatMapFunction} for each element of the DataSet. * Each FlatMapFunction call can return any number of elements including none. * * @param flatMapper The FlatMapFunction that is called for each element of the DataSet. * @return A FlatMapOperator that represents the transformed DataSet. * * @see FlatMapFunction * @see FlatMapOperator * @see DataSet */ public <R> FlatMapOperator<T, R> flatMap(FlatMapFunction<T, R> flatMapper) { if (flatMapper == null) { throw new NullPointerException("FlatMap function must not be null."); } return new FlatMapOperator<T, R>(this, flatMapper); }
public FlatMapOperator(DataSet<IN> input, FlatMapFunction<IN, OUT> function) { super(input, TypeExtractor.getFlatMapReturnTypes(function, input.getType())); this.function = function; extractSemanticAnnotationsFromUdf(function.getClass()); }
@Override protected eu.stratosphere.api.common.operators.base.FlatMapOperatorBase<IN, OUT, GenericFlatMap<IN,OUT>> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : function.getClass().getName(); // create operator FlatMapOperatorBase<IN, OUT, GenericFlatMap<IN, OUT>> po = new FlatMapOperatorBase<IN, OUT, GenericFlatMap<IN, OUT>>(function, new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name); // set input po.setInput(input); // set dop if(this.getParallelism() > 0) { // use specified dop po.setDegreeOfParallelism(this.getParallelism()); } else { // if no dop has been specified, use dop of input operator to enable chaining po.setDegreeOfParallelism(input.getDegreeOfParallelism()); } return po; } }
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); } else { counts.print(); } // execute program env.execute("WordCount Example"); }
out.collect(outTuple); }).withBroadcastSet(ints, "ints"); bcFlatMapDs.writeAsCsv(resultPath); env.execute();
public static void main(String[] args) throws Exception { if(!parseParameters(args)) { return; } // set up the execution environment final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // get input data DataSet<String> text = getTextDataSet(env); DataSet<Tuple2<String, Integer>> counts = // split up the lines in pairs (2-tuples) containing: (word,1) text.flatMap(new Tokenizer()) // group by the tuple field "0" and sum up tuple field "1" .groupBy(0) .aggregate(Aggregations.SUM, 1); // emit result if(fileOutput) { counts.writeAsCsv(outputPath, "\n", " "); } else { counts.print(); } // execute program env.execute("WordCount Example"); }
.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new DegreeCounter()) .groupBy(EdgeWithDegrees.V1,EdgeWithDegrees.V2).reduce(new DegreeJoiner());
.groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new DegreeCounter()) .groupBy(EdgeWithDegrees.V1,EdgeWithDegrees.V2).reduce(new DegreeJoiner());
.groupBy(0).aggregate(SUM, 1)
.groupBy(0).aggregate(SUM, 1)