/** * Return the degree of all vertices in the graph. * * @return A DataSet of {@code Tuple2<vertexId, degree>} */ public DataSet<Tuple2<K, LongValue>> getDegrees() { return outDegrees() .union(inDegrees()).name("In- and out-degree") .groupBy(0).sum(1).name("Sum"); }
public AggregateOperator<IN> andMin (int field) { return this.and(Aggregations.MIN, field); }
private AggregateOperator<T> aggregate(Aggregations agg, int field, String callLocationName) { return new AggregateOperator<T>(this, agg, field, callLocationName); }
@Test public void testFullAggregate() throws Exception { /* * Full Aggregate */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<Integer, Long>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1); List<Tuple2<Integer, Long>> result = aggregateDs.collect(); String expected = "231,6\n"; compareResultAsTuples(result, expected); }
@Test public void testAggregationTypes() { try { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> tupleDs = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work: multiple aggregates tupleDs.aggregate(Aggregations.SUM, 0).and(Aggregations.MIN, 4); // should work: nested aggregates tupleDs.aggregate(Aggregations.MIN, 2).aggregate(Aggregations.SUM, 1); // should not work: average on string try { tupleDs.aggregate(Aggregations.SUM, 2); Assert.fail(); } catch (UnsupportedAggregationTypeException iae) { // we're good here } } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(); Assert.fail(e.getMessage()); } } }
.groupBy(0) .aggregate(Aggregations.SUM, 1) .map(new MapFunction<Tuple2<Long, Long>, Long>() { @Override public Long map(Tuple2<Long, Long> value) throws Exception {
public static DataSet<Tuple2<Long, Long>> doBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { // open a bulk iteration IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20); DataSet<Tuple2<Long, Long>> changes = iteration .join(edges).where(0).equalTo(0).with(new Join222()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration).where(0).equalTo(0) .flatMap(new FlatMapJoin()); // close the bulk iteration return iteration.closeWith(changes); }
private void createAggregationOperation(OperationInfo info) throws IOException { DataSet op = (DataSet) sets.get(info.parentID); AggregateOperator ao = op.aggregate(info.aggregates[0].agg, info.aggregates[0].field); for (int x = 1; x < info.count; x++) { ao = ao.and(info.aggregates[x].agg, info.aggregates[x].field); } sets.put(info.setID, ao.name("Aggregation")); }
.name("Average score") .sum(0) .andSum(1);
@Test public void testCustomPartitioningTupleAgg() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Integer, Integer>> data = env.fromElements(new Tuple2<Integer, Integer>(0, 0)) .rebalance().setParallelism(4); data.groupBy(0).withPartitioner(new TestPartitionerInt()) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); SinkPlanNode sink = op.getDataSinks().iterator().next(); SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource(); SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource(); assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy()); assertEquals(ShipStrategyType.PARTITION_CUSTOM, reducer.getInput().getShipStrategy()); assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
.map(new Dampener(DAMPENING_FACTOR, numPages));
.join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter());
@Test public void testFullAggregateOfMutableValueTypes() throws Exception { /* * Full Aggregate of mutable value types */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env); DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds .aggregate(Aggregations.SUM, 0) .and(Aggregations.MAX, 1) .project(0, 1); List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect(); String expected = "231,6\n"; compareResultAsTuples(result, expected); }
.name("Average score") .sum(0) .andSum(1);
.groupBy(0) .sum(1) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>());