/** * Syntactic sugar for aggregate (SUM, field). * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the summed DataSet. * * @see org.apache.flink.api.java.operators.AggregateOperator */ public AggregateOperator<T> sum(int field) { return aggregate(Aggregations.SUM, field); }
/** * Syntactic sugar for {@link #aggregate(Aggregations, int)} using {@link Aggregations#MAX} as * the aggregation function. * * <p><strong>Note:</strong> This operation is not to be confused with {@link #maxBy(int...)}, * which selects one element with maximum value at the specified field positions. * * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the max'ed DataSet. * * @see #aggregate(Aggregations, int) * @see #maxBy(int...) */ public AggregateOperator<T> max(int field) { return aggregate(Aggregations.MAX, field); }
/** * Syntactic sugar for {@link #aggregate(Aggregations, int)} using {@link Aggregations#MIN} as * the aggregation function. * * <p><strong>Note:</strong> This operation is not to be confused with {@link #minBy(int...)}, * which selects one element with the minimum value at the specified field positions. * * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the min'ed DataSet. * * @see #aggregate(Aggregations, int) * @see #minBy(int...) */ public AggregateOperator<T> min(int field) { return aggregate(Aggregations.MIN, field); }
@Test public void testAggregationTypes() { try { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work: multiple aggregates tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4); // should work: nested aggregates tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1); // should not work: average on string try { tupleDs.aggregate(Aggregations.SUM, 2); Assert.fail(); } catch (UnsupportedAggregationTypeException iae) { // we're good here } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } } }
@Test public void testFieldsAggregate() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work try { tupleDs.aggregate(Aggregations.SUM, 1); } catch (Exception e) { Assert.fail(); } // should not work: index out of bounds try { tupleDs.aggregate(Aggregations.SUM, 10); Assert.fail(); } catch (IllegalArgumentException iae) { // we're good here } catch (Exception e) { Assert.fail(); } // should not work: not applied to tuple dataset DataSet<Long> longDs = env.fromCollection(emptyLongData, BasicTypeInfo.LONG_TYPE_INFO); try { longDs.aggregate(Aggregations.MIN, 1); Assert.fail(); } catch (InvalidProgramException uoe) { // we're good here } catch (Exception e) { Assert.fail(); } }
@Test public void testFullAggregateOfMutableValueTypes() throws Exception { /* * Full Aggregate of mutable value types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1); List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect(); String expected = "231,6\n"; compareResultAsTuples(result, expected); }
@Test public void testFullAggregate() throws Exception { /* * Full Aggregate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Integer, Long>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1); List<Tuple2<Integer, Long>> result = aggregateDs.collect(); String expected = "231,6\n"; compareResultAsTuples(result, expected); }
/** * Syntactic sugar for aggregate (SUM, field). * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the summed DataSet. * * @see org.apache.flink.api.java.operators.AggregateOperator */ public AggregateOperator<T> sum(int field) { return aggregate(Aggregations.SUM, field); }
/** * Syntactic sugar for aggregate (SUM, field). * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the summed DataSet. * * @see org.apache.flink.api.java.operators.AggregateOperator */ public AggregateOperator<T> sum(int field) { return aggregate(Aggregations.SUM, field); }
/** * Syntactic sugar for {@link #aggregate(Aggregations, int)} using {@link Aggregations#MAX} as * the aggregation function. * * <p><strong>Note:</strong> This operation is not to be confused with {@link #maxBy(int...)}, * which selects one element with maximum value at the specified field positions. * * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the max'ed DataSet. * * @see #aggregate(Aggregations, int) * @see #maxBy(int...) */ public AggregateOperator<T> max(int field) { return aggregate(Aggregations.MAX, field); }
/** * Syntactic sugar for {@link #aggregate(Aggregations, int)} using {@link Aggregations#MIN} as * the aggregation function. * * <p><strong>Note:</strong> This operation is not to be confused with {@link #minBy(int...)}, * which selects one element with the minimum value at the specified field positions. * * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the min'ed DataSet. * * @see #aggregate(Aggregations, int) * @see #minBy(int...) */ public AggregateOperator<T> min(int field) { return aggregate(Aggregations.MIN, field); }
/** * Syntactic sugar for {@link #aggregate(Aggregations, int)} using {@link Aggregations#MIN} as * the aggregation function. * * <p><strong>Note:</strong> This operation is not to be confused with {@link #minBy(int...)}, * which selects one element with the minimum value at the specified field positions. * * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the min'ed DataSet. * * @see #aggregate(Aggregations, int) * @see #minBy(int...) */ public AggregateOperator<T> min(int field) { return aggregate(Aggregations.MIN, field); }
/** * Syntactic sugar for {@link #aggregate(Aggregations, int)} using {@link Aggregations#MAX} as * the aggregation function. * * <p><strong>Note:</strong> This operation is not to be confused with {@link #maxBy(int...)}, * which selects one element with maximum value at the specified field positions. * * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the max'ed DataSet. * * @see #aggregate(Aggregations, int) * @see #maxBy(int...) */ public AggregateOperator<T> max(int field) { return aggregate(Aggregations.MAX, field); }
private void createAggregationOperation(OperationInfo info) throws IOException { DataSet op = (DataSet) sets.get(info.parentID); AggregateOperator ao = op.aggregate(info.aggregates[0].agg, info.aggregates[0].field); for (int x = 1; x < info.count; x++) { ao = ao.and(info.aggregates[x].agg, info.aggregates[x].field); } sets.put(info.setID, ao.name("Aggregation")); }