/** * Applies an Aggregate transformation on a non-grouped {@link Tuple} {@link DataSet}.<br/> * <b>Note: Only Tuple DataSets can be aggregated.</b> * The transformation applies a built-in {@link Aggregations Aggregation} on a specified field * of a Tuple DataSet. Additional aggregation functions can be added to the resulting * {@link AggregateOperator} by calling {@link AggregateOperator#and(Aggregations, int)}. * * @param agg The built-in aggregation function that is computed. * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the aggregated DataSet. * * @see Tuple * @see Aggregations * @see AggregateOperator * @see DataSet */ public AggregateOperator<T> aggregate(Aggregations agg, int field) { return new AggregateOperator<T>(this, agg, field); }
public AggregateOperator<IN> and(Aggregations function, int field) { Validate.notNull(function); TupleTypeInfo<?> inType = (TupleTypeInfo<?>) getType(); if (field < 0 || field >= inType.getArity()) { throw new IllegalArgumentException("Aggregation field position is out of range."); } AggregationFunctionFactory factory = function.getFactory(); AggregationFunction<?> aggFunct = factory.createAggregationFunction(inType.getTypeAt(field).getTypeClass()); this.aggregationFunctions.add(aggFunct); this.fields.add(field); return this; }
String name = getName() != null ? getName() : genName.toString(); UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<IN, IN>(getInputType(), getResultType()); GroupReduceOperatorBase<IN, IN, GenericGroupReduce<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GenericGroupReduce<IN, IN>>(function, operatorInfo, new int[0], name); po.setDegreeOfParallelism(this.getParallelism()); UnaryOperatorInformation<IN, IN> operatorInfo = new UnaryOperatorInformation<IN, IN>(getInputType(), getResultType()); GroupReduceOperatorBase<IN, IN, GenericGroupReduce<IN, IN>> po = new GroupReduceOperatorBase<IN, IN, GenericGroupReduce<IN, IN>>(function, operatorInfo, logicalKeyPositions, name); po.setDegreeOfParallelism(this.getParallelism());
.map(new Dampener(DAMPENING_FACTOR, numPages));
DataSet<Tuple2<Integer, Long>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1).types(Integer.class, Long.class); DataSet<Tuple2<Long, Integer>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.SUM, 0) .project(1, 0).types(Long.class, Integer.class); DataSet<Tuple1<Integer>> aggregateDs = ds.groupBy(1) .aggregate(Aggregations.MIN, 0) .aggregate(Aggregations.MIN, 0) .project(0).types(Integer.class);
.join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter());
.map(new Dampener(DAMPENING_FACTOR, numPages));
.join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter());
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }
/** * Applies an Aggregate transformation on a grouped {@link Tuple} {@link DataSet}.<br/> * <b>Note: Only Tuple DataSets can be aggregated.</b> * The transformation applies a built-in {@link Aggregations Aggregation} on a specified field * of a Tuple group. Additional aggregation functions can be added to the resulting * {@link AggregateOperator} by calling {@link AggregateOperator#and(Aggregations, int)}. * * @param agg The built-in aggregation function that is computed. * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the aggregated DataSet. * * @see Tuple * @see Aggregations * @see AggregateOperator * @see DataSet */ public AggregateOperator<T> aggregate(Aggregations agg, int field) { return new AggregateOperator<T>(this, agg, field); }