/** * Applies a Reduce transformation on a non-grouped {@link DataSet}.<br/> * The transformation consecutively calls a {@link ReduceFunction} * until only a single element remains which is the result of the transformation. * A ReduceFunction combines two elements into one new element of the same type. * * @param reducer The ReduceFunction that is applied on the DataSet. * @return A ReduceOperator that represents the reduced DataSet. * * @see ReduceFunction * @see ReduceOperator * @see DataSet */ public ReduceOperator<T> reduce(ReduceFunction<T> reducer) { if (reducer == null) { throw new NullPointerException("Reduce function must not be null."); } return new ReduceOperator<T>(this, reducer); }
/** * * This is the case for a reduce-all case (in contrast to the reduce-per-group case). * * @param input * @param function */ public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function) { super(input, input.getType()); this.function = function; this.grouper = null; extractSemanticAnnotationsFromUdf(function.getClass()); }
@Override protected eu.stratosphere.api.common.operators.SingleInputOperator<?, IN, ?> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : function.getClass().getName(); UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<IN, IN>(getInputType(), getInputType()); ReduceOperatorBase<IN, GenericReduce<IN>> po = new ReduceOperatorBase<IN, GenericReduce<IN>>(function, operatorInfo, new int[0], name); Keys.SelectorFunctionKeys<IN, ?> selectorKeys = (Keys.SelectorFunctionKeys<IN, ?>) grouper.getKeys(); MapOperatorBase<?, IN, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getInputType(), name, input, this.getParallelism()); return po; UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<IN, IN>(getInputType(), getInputType()); ReduceOperatorBase<IN, GenericReduce<IN>> po = new ReduceOperatorBase<IN, GenericReduce<IN>>(function, operatorInfo, logicalKeyPositions, name); po.setDegreeOfParallelism(this.getParallelism());
.groupBy(0).reduce(new CentroidAccumulator()) .map(new CentroidAverager());
groupBy(1).reduce(new BCTuple3Reduce()).withBroadcastSet(intDs, "ints");
.groupBy(0).reduce(new CentroidAccumulator()) .map(new CentroidAverager());
.groupBy(0).reduce(new CentroidAccumulator()) .map(new CentroidAverager());
public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function) { super(input.getDataSet(), input.getDataSet().getType()); this.function = function; this.grouper = input; extractSemanticAnnotationsFromUdf(function.getClass()); }
/** * Applies a Reduce transformation on a grouped {@link DataSet}.<br/> * For each group, the transformation consecutively calls a {@link ReduceFunction} * until only a single element for each group remains. * A ReduceFunction combines two elements into one new element of the same type. * * @param reducer The ReduceFunction that is applied on each group of the DataSet. * @return A ReduceOperator that represents the reduced DataSet. * * @see ReduceFunction * @see ReduceOperator * @see DataSet */ public ReduceOperator<T> reduce(ReduceFunction<T> reducer) { if (reducer == null) { throw new NullPointerException("Reduce function must not be null."); } return new ReduceOperator<T>(this, reducer); }