/** * Applies a Map transformation on a {@link DataSet}.<br/> * The transformation calls a {@link MapFunction} for each element of the DataSet. * Each MapFunction call returns exactly one element. * * @param mapper The MapFunction that is called for each element of the DataSet. * @return A MapOperator that represents the transformed DataSet. * * @see MapFunction * @see MapOperator * @see DataSet */ public <R> MapOperator<T, R> map(MapFunction<T, R> mapper) { if (mapper == null) { throw new NullPointerException("Map function must not be null."); } return new MapOperator<T, R>(this, mapper); }
public MapOperator(DataSet<IN> input, MapFunction<IN, OUT> function) { super(input, TypeExtractor.getMapReturnTypes(function, input.getType())); this.function = function; extractSemanticAnnotationsFromUdf(function.getClass()); }
@Override protected eu.stratosphere.api.common.operators.base.MapOperatorBase<IN, OUT, GenericMap<IN, OUT>> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : function.getClass().getName(); // create operator MapOperatorBase<IN, OUT, GenericMap<IN, OUT>> po = new MapOperatorBase<IN, OUT, GenericMap<IN, OUT>>(function, new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()), name); // set input po.setInput(input); // set dop if(this.getParallelism() > 0) { // use specified dop po.setDegreeOfParallelism(this.getParallelism()); } else { // if no dop has been specified, use dop of input operator to enable chaining po.setDegreeOfParallelism(input.getDegreeOfParallelism()); } return po; }
.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids") .map(new CountAppender()) .groupBy(0).reduce(new CentroidAccumulator()) .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");
.reduce(new ReduceFunction<Long>() { public Long reduce(Long value1, Long value2) { return value1 + value2; return new Tuple2<Long, Double>(value, 1.0/numVertices); }).withBroadcastSet(count, "count");
return out; }).withBroadcastSet(ints, "ints"); bcMapDs.writeAsCsv(resultPath); env.execute(); return value; }).withParameters(conf); bcMapDs.writeAsCsv(resultPath); env.execute();
.groupBy(0).aggregate(Aggregations.SUM, 1);
.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids") .map(new CountAppender()) .groupBy(0).reduce(new CentroidAccumulator()) .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");
.groupBy(0).aggregate(Aggregations.SUM, 1);
.map(new SelectNearestCenter()).withBroadcastSet(loop, "centroids") .map(new CountAppender()) .groupBy(0).reduce(new CentroidAccumulator()) .map(new SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");