/** * Applies a GroupReduce transformation on a grouped and sorted {@link DataSet}.<br/> * The transformation calls a {@link GroupReduceFunction} for each group of the DataSet. * A GroupReduceFunction can iterate over all elements of a group and emit any * number of output elements including none. * * @param reducer The GroupReduceFunction that is applied on each group of the DataSet. * @return A GroupReduceOperator that represents the reduced DataSet. * * @see GroupReduceFunction * @see GroupReduceOperator * @see DataSet */ public <R> ReduceGroupOperator<T, R> reduceGroup(GroupReduceFunction<T, R> reducer) { if (reducer == null) { throw new NullPointerException("GroupReduce function must not be null."); } return new ReduceGroupOperator<T, R>(this, reducer); }
/** * Constructor for a grouped reduce. * * @param input The grouped input to be processed group-wise by the groupReduce function. * @param function The user-defined GroupReduce function. */ public ReduceGroupOperator(Grouping<IN> input, GroupReduceFunction<IN, OUT> function) { super(input != null ? input.getDataSet() : null, TypeExtractor.getGroupReduceReturnTypes(function, input.getDataSet().getType())); this.function = function; this.grouper = input; checkCombinability(); extractSemanticAnnotationsFromUdf(function.getClass()); }
@Override protected eu.stratosphere.api.common.operators.base.GroupReduceOperatorBase<?, OUT, ?> translateToDataFlow(Operator<IN> input) { String name = getName() != null ? getName() : function.getClass().getName(); UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()); GroupReduceOperatorBase<IN, OUT, GenericGroupReduce<IN, OUT>> po = new GroupReduceOperatorBase<IN, OUT, GenericGroupReduce<IN, OUT>>(function, operatorInfo, new int[0], name); Keys.SelectorFunctionKeys<IN, ?> selectorKeys = (Keys.SelectorFunctionKeys<IN, ?>) grouper.getKeys(); PlanUnwrappingReduceGroupOperator<IN, OUT, ?> po = translateSelectorFunctionReducer( selectorKeys, function, getInputType(), getResultType(), name, input, isCombinable()); po.setDegreeOfParallelism(this.getParallelism()); UnaryOperatorInformation<IN, OUT> operatorInfo = new UnaryOperatorInformation<IN, OUT>(getInputType(), getResultType()); GroupReduceOperatorBase<IN, OUT, GenericGroupReduce<IN, OUT>> po = new GroupReduceOperatorBase<IN, OUT, GenericGroupReduce<IN, OUT>>(function, operatorInfo, logicalKeyPositions, name); po.setDegreeOfParallelism(this.getParallelism());
.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());
/** * Constructor for a non-grouped reduce (all reduce). * * @param input The input data set to the groupReduce function. * @param function The user-defined GroupReduce function. */ public ReduceGroupOperator(DataSet<IN> input, GroupReduceFunction<IN, OUT> function) { super(input, TypeExtractor.getGroupReduceReturnTypes(function, input.getType())); this.function = function; this.grouper = null; checkCombinability(); }
.join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());
/** * Applies a GroupReduce transformation on a grouped {@link DataSet}.<br/> * The transformation calls a {@link GroupReduceFunction} for each group of the DataSet. * A GroupReduceFunction can iterate over all elements of a group and emit any * number of output elements including none. * * @param reducer The GroupReduceFunction that is applied on each group of the DataSet. * @return A GroupReduceOperator that represents the reduced DataSet. * * @see GroupReduceFunction * @see GroupReduceOperator * @see DataSet */ public <R> ReduceGroupOperator<T, R> reduceGroup(GroupReduceFunction<T, R> reducer) { if (reducer == null) { throw new NullPointerException("GroupReduce function must not be null."); } return new ReduceGroupOperator<T, R>(this, reducer); }
/** * Applies a GroupReduce transformation on a non-grouped {@link DataSet}.<br/> * The transformation calls a {@link GroupReduceFunction} once with the full DataSet. * The GroupReduceFunction can iterate over all elements of the DataSet and emit any * number of output elements including none. * * @param reducer The GroupReduceFunction that is applied on the DataSet. * @return A GroupReduceOperator that represents the reduced DataSet. * * @see GroupReduceFunction * @see GroupReduceOperator * @see DataSet */ public <R> ReduceGroupOperator<T, R> reduceGroup(GroupReduceFunction<T, R> reducer) { if (reducer == null) { throw new NullPointerException("GroupReduce function must not be null."); } return new ReduceGroupOperator<T, R>(this, reducer); }