/** * Closes the iteration. This method defines the end of the iterative program part. * * @param iterationResult The data set that will be fed back to the next iteration. * @return The DataSet that represents the result of the iteration, after the computation has terminated. * * @see DataSet#iterate(int) */ public DataSet<T> closeWith(DataSet<T> iterationResult) { return new BulkIterationResultSet<T>(getExecutionEnvironment(), getType(), this, iterationResult); }
public <I1, K> JoinOperatorBaseBuilder<OUT> withWrappedInput1( Operator<I1> input1, SelectorFunctionKeys<I1, ?> rawKeys1) { @SuppressWarnings("unchecked") SelectorFunctionKeys<I1, K> keys1 = (SelectorFunctionKeys<I1, K>) rawKeys1; TypeInformation<Tuple2<K, I1>> typeInfoWithKey1 = KeyFunctions.createTypeWithKey(keys1); Operator<Tuple2<K, I1>> keyMapper1 = KeyFunctions.appendKeyExtractor(input1, keys1); return this.withInput1(keyMapper1, typeInfoWithKey1, rawKeys1); }
public <I2, K> JoinOperatorBaseBuilder<OUT> withWrappedInput2( Operator<I2> input2, SelectorFunctionKeys<I2, ?> rawKeys2) { @SuppressWarnings("unchecked") SelectorFunctionKeys<I2, K> keys2 = (SelectorFunctionKeys<I2, K>) rawKeys2; TypeInformation<Tuple2<K, I2>> typeInfoWithKey2 = KeyFunctions.createTypeWithKey(keys2); Operator<Tuple2<K, I2>> keyMapper2 = KeyFunctions.appendKeyExtractor(input2, keys2); return withInput2(keyMapper2, typeInfoWithKey2, rawKeys2); }
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }
public static DataSet<Tuple2<Long, Long>> doBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { // open a bulk iteration IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20); DataSet<Tuple2<Long, Long>> changes = iteration .join(edges).where(0).equalTo(0).with(new Join222()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration).where(0).equalTo(0) .flatMap(new FlatMapJoin()); // close the bulk iteration return iteration.closeWith(changes); }
public static DataSet<Tuple2<Long, Long>> doSimpleBulkIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { // open a bulk iteration IterativeDataSet<Tuple2<Long, Long>> iteration = vertices.iterate(20); DataSet<Tuple2<Long, Long>> changes = iteration .join(edges).where(0).equalTo(0) .flatMap(new FlatMapJoin()); // close the bulk iteration return iteration.closeWith(changes); }
public void testJoinProjection12() { final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds1 = env.fromCollection(emptyTupleData, tupleTypeInfo); DataSet<Tuple5<Integer, Long, String, Long, Integer>> ds2 = env.fromCollection(emptyTupleData, tupleTypeInfo); // should work ds1.join(ds2).where(0).equalTo(0) .projectSecond(2) .projectFirst(1); }
private <IN, OUT> DataSet<OUT> applyReduceOperation(UnsortedGrouping<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) { return op1 .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonReducePreStep") .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name); } }
private <IN, OUT> DataSet<OUT> applyGroupReduceOperation(SortedGrouping<IN> op1, PythonOperationInfo info, TypeInformation<OUT> type) { return op1 .reduceGroup(new IdentityGroupReduce<IN>()).setCombinable(false).setParallelism(info.parallelism).name("PythonGroupReducePreStep") .mapPartition(new PythonMapPartition<IN, OUT>(operatorConfig, info.envID, info.setID, type)) .setParallelism(info.parallelism).name(info.name); }
/** * Constructor for a grouped reduce. * * @param input The grouped input to be processed group-wise by the groupReduce function. * @param function The user-defined GroupReduce function. */ public GroupReduceOperator(Grouping<IN> input, TypeInformation<OUT> resultType, GroupReduceFunction<IN, OUT> function, String defaultName) { super(input != null ? input.getInputDataSet() : null, resultType); this.function = function; this.grouper = input; this.defaultName = defaultName; this.combinable = checkCombinability(); UdfOperatorUtils.analyzeSingleInputUdf(this, GroupReduceFunction.class, defaultName, function, grouper.keys); }
@Override protected DualInputSemanticProperties extractSemanticAnnotationsFromUdf(Class<?> udfClass) { // we do not extract anything, but construct the properties from the projection return SemanticPropUtil.createProjectionPropertiesDual(getFunction().getFields(), getFunction().getIsFromFirst(), getInput1Type(), getInput2Type()); } }
public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function, String defaultName) { super(input.getInputDataSet(), input.getInputDataSet().getType()); this.function = function; this.grouper = input; this.defaultName = defaultName; this.hint = CombineHint.OPTIMIZER_CHOOSES; UdfOperatorUtils.analyzeSingleInputUdf(this, ReduceFunction.class, defaultName, function, grouper.keys); }
/** * Constructor for a grouped reduce. * * @param input The grouped input to be processed group-wise by the groupReduce function. * @param function The user-defined GroupReduce function. */ public GroupCombineOperator(Grouping<IN> input, TypeInformation<OUT> resultType, GroupCombineFunction<IN, OUT> function, String defaultName) { super(input != null ? input.getInputDataSet() : null, resultType); this.function = function; this.grouper = input; this.defaultName = defaultName; }
/** * Creates SplitDataProperties for the given data types. * * @param source The DataSource for which the SplitDataProperties are created. */ public SplitDataProperties(DataSource<T> source) { this.type = source.getType(); }
public MapOperator(DataSet<IN> input, TypeInformation<OUT> resultType, MapFunction<IN, OUT> function, String defaultName) { super(input, resultType); this.defaultName = defaultName; this.function = function; UdfOperatorUtils.analyzeSingleInputUdf(this, MapFunction.class, defaultName, function, null); }
/** * Returns the type of the result of this operator. * * @return The result type of the operator. */ public TypeInformation<OUT> getResultType() { return getType(); }
public CrossOperator(DataSet<I1> input1, DataSet<I2> input2, CrossFunction<I1, I2, OUT> function, TypeInformation<OUT> returnType, CrossHint hint, String defaultName) { super(input1, input2, returnType); this.function = function; this.defaultName = defaultName; this.hint = hint; UdfOperatorUtils.analyzeDualInputUdf(this, CrossFunction.class, defaultName, function, null, null); }
public FlatMapOperator(DataSet<IN> input, TypeInformation<OUT> resultType, FlatMapFunction<IN, OUT> function, String defaultName) { super(input, resultType); this.function = function; this.defaultName = defaultName; UdfOperatorUtils.analyzeSingleInputUdf(this, FlatMapFunction.class, defaultName, function, null); }