private <T> GenericDataSinkBase<T> translate(DataSink<T> sink) { // translate the input recursively Operator<T> input = translate(sink.getDataSet()); // translate the sink itself and connect it to the input GenericDataSinkBase<T> translatedSink = sink.translateToDataFlow(input); return translatedSink; }
/** * Returns the type of the result of this operator. * * @return The result type of the operator. */ public TypeInformation<OUT> getResultType() { return getType(); }
/** * Creates a union of this DataSet with an other DataSet. The other DataSet must be of the same data type. * * @param other The other DataSet which is unioned with the current DataSet. * @return The resulting DataSet. */ public UnionOperator<T> union(DataSet<T> other){ return new UnionOperator<T>(this, other); }
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }
/** * Initiates a ProjectJoin transformation and projects the second join input<br/> * If the second join input is a {@link Tuple} {@link DataSet}, fields can be selected by their index. * If the second join input is not a Tuple DataSet, no parameters should be passed.<br/> * * Fields of the first and second input can be added by chaining the method calls of * {@link JoinProjection#projectFirst(int...)} and {@link JoinProjection#projectSecond(int...)}. * * @param fieldIndexes If the second input is a Tuple DataSet, the indexes of the selected fields. * For a non-Tuple DataSet, do not provide parameters. * The order of fields in the output tuple is defined by to the order of field indexes. * @return A JoinProjection that needs to be converted into a {@link ProjectJoin} to complete the * Join transformation by calling {@link JoinProjection#types()}. * * @see Tuple * @see DataSet * @see JoinProjection * @see ProjectJoin */ public JoinProjection<I1, I2> projectSecond(int... secondFieldIndexes) { return new JoinProjection<I1, I2>(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint(), null, secondFieldIndexes); }
/** * Runs a {@link CustomUnaryOperation} on the data set. Custom operations are typically complex * operators that are composed of multiple steps. * * @param operation The operation to run. * @return The data set produced by the operation. */ public <X> DataSet<X> runOperation(CustomUnaryOperation<T, X> operation) { Validate.notNull(operation, "The custom operator must not be null."); operation.setInput(this); return operation.createResult(); }
/** * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}.</br> * The KeySelector function is called for each element of the first DataSet and extracts a single * key value on which the DataSet is grouped. </br> * * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is grouped. * @return An incomplete CoGroup transformation. * Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup. * * @see KeySelector * @see DataSet */ public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) { return new CoGroupOperatorSetsPredicate(new Keys.SelectorFunctionKeys<I1, K>(keyExtractor, input1.getType())); }
/** * Continues a CoGroup transformation. <br/> * Defines the {@link Tuple} fields of the first co-grouped {@link DataSet} that should be used as grouping keys.<br/> * <b>Note: Fields can only be selected as grouping keys on Tuple DataSets.</b><br/> * * @param fields The indexes of the Tuple fields of the first co-grouped DataSets that should be used as keys. * @return An incomplete CoGroup transformation. * Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup. * * @see Tuple * @see DataSet */ public CoGroupOperatorSetsPredicate where(int... fields) { return new CoGroupOperatorSetsPredicate(new Keys.FieldPositionKeys<I1>(fields, input1.getType())); }
/** * Continues a Join transformation. <br/> * Defines the {@link Tuple} fields of the first join {@link DataSet} that should be used as join keys.<br/> * <b>Note: Fields can only be selected as join keys on Tuple DataSets.</b><br/> * * @param fields The indexes of the Tuple fields of the first join DataSets that should be used as keys. * @return An incomplete Join transformation. * Call {@link JoinOperatorSetsPredicate#equalTo(int...)} or {@link JoinOperatorSetsPredicate#equalTo(KeySelector)} * to continue the Join. * * @see Tuple * @see DataSet */ public JoinOperatorSetsPredicate where(int... fields) { return new JoinOperatorSetsPredicate(new Keys.FieldPositionKeys<I1>(fields, input1.getType())); }
/** * Continues a Join transformation and defines a {@link KeySelector} function for the first join {@link DataSet}.</br> * The KeySelector function is called for each element of the first DataSet and extracts a single * key value on which the DataSet is joined. </br> * * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is joined. * @return An incomplete Join transformation. * Call {@link JoinOperatorSetsPredicate#equalTo(int...)} or {@link JoinOperatorSetsPredicate#equalTo(KeySelector)} * to continue the Join. * * @see KeySelector * @see DataSet */ public <K extends Comparable<K>> JoinOperatorSetsPredicate where(KeySelector<I1, K> keySelector) { return new JoinOperatorSetsPredicate(new Keys.SelectorFunctionKeys<I1, K>(keySelector, input1.getType())); }
public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function) { super(input.getDataSet(), input.getDataSet().getType()); this.function = function; this.grouper = input; extractSemanticAnnotationsFromUdf(function.getClass()); }
/** * Applies an Aggregate transformation on a non-grouped {@link Tuple} {@link DataSet}.<br/> * <b>Note: Only Tuple DataSets can be aggregated.</b> * The transformation applies a built-in {@link Aggregations Aggregation} on a specified field * of a Tuple DataSet. Additional aggregation functions can be added to the resulting * {@link AggregateOperator} by calling {@link AggregateOperator#and(Aggregations, int)}. * * @param agg The built-in aggregation function that is computed. * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the aggregated DataSet. * * @see Tuple * @see Aggregations * @see AggregateOperator * @see DataSet */ public AggregateOperator<T> aggregate(Aggregations agg, int field) { return new AggregateOperator<T>(this, agg, field); }
public int [] getKeyPositions() { return keys.computeLogicalKeyPositions(); }
public boolean isEmpty() { return getNumberOfKeyFields() == 0; }
/** * Sorts {@link Tuple} elements within a group on the specified field in the specified {@link Order}.</br> * <b>Note: Only groups of Tuple elements can be sorted.</b><br/> * Groups can be sorted by multiple fields by chaining {@link #sortGroup(int, Order)} calls. * * @param field The Tuple field on which the group is sorted. * @param order The Order in which the specified Tuple field is sorted. * @return A SortedGrouping with specified order of group element. * * @see Tuple * @see Order */ public SortedGrouping<T> sortGroup(int field, Order order) { return new SortedGrouping<T>(this.dataSet, this.keys, field, order); }
/** * Initiates a Project transformation on a {@link Tuple} {@link DataSet}.<br/> * <b>Note: Only Tuple DataSets can be projected.</b></br> * The transformation projects each Tuple of the DataSet onto a (sub)set of fields.</br> * This method returns a {@link Projection} on which {@link Projection#types()} needs to * be called to completed the transformation. * * @param fieldIndexes The field indexes of the input tuples that are retained. * The order of fields in the output tuple corresponds to the order of field indexes. * @return A Projection that needs to be converted into a {@link ProjectOperator} to complete the * Project transformation by calling {@link Projection#types()}. * * @see Tuple * @see DataSet * @see Projection * @see ProjectOperator */ public Projection<T> project(int... fieldIndexes) { return new Projection<T>(this, fieldIndexes); }
protected CrossOperator(DataSet<I1> input1, DataSet<I2> input2, CrossFunction<I1, I2, OUT> function, TypeInformation<OUT> returnType) { super(input1, input2, returnType); this.function = function; extractSemanticAnnotationsFromUdf(function.getClass()); }
private static int[] makeFields(int[] fields, TupleTypeInfo<?> type) { int inLength = type.getArity(); // null parameter means all fields are considered if (fields == null || fields.length == 0) { fields = new int[inLength]; for (int i = 0; i < inLength; i++) { fields[i] = i; } return fields; } else { return rangeCheckAndOrderFields(fields, inLength-1); } }
/** * Initiates a ProjectJoin transformation and projects the first join input<br/> * If the first join input is a {@link Tuple} {@link DataSet}, fields can be selected by their index. * If the first join input is not a Tuple DataSet, no parameters should be passed.<br/> * * Fields of the first and second input can be added by chaining the method calls of * {@link JoinProjection#projectFirst(int...)} and {@link JoinProjection#projectSecond(int...)}. * * @param firstFieldIndexes If the first input is a Tuple DataSet, the indexes of the selected fields. * For a non-Tuple DataSet, do not provide parameters. * The order of fields in the output tuple is defined by to the order of field indexes. * @return A JoinProjection that needs to be converted into a {@link ProjectJoin} to complete the * Join transformation by calling {@link JoinProjection#types()}. * * @see Tuple * @see DataSet * @see JoinProjection * @see ProjectJoin */ public JoinProjection<I1, I2> projectFirst(int... firstFieldIndexes) { return new JoinProjection<I1, I2>(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint(), firstFieldIndexes, null); }
/** * Applies an Aggregate transformation on a grouped {@link Tuple} {@link DataSet}.<br/> * <b>Note: Only Tuple DataSets can be aggregated.</b> * The transformation applies a built-in {@link Aggregations Aggregation} on a specified field * of a Tuple group. Additional aggregation functions can be added to the resulting * {@link AggregateOperator} by calling {@link AggregateOperator#and(Aggregations, int)}. * * @param agg The built-in aggregation function that is computed. * @param field The index of the Tuple field on which the aggregation function is applied. * @return An AggregateOperator that represents the aggregated DataSet. * * @see Tuple * @see Aggregations * @see AggregateOperator * @see DataSet */ public AggregateOperator<T> aggregate(Aggregations agg, int field) { return new AggregateOperator<T>(this, agg, field); }