eu.stratosphere.api.java.operators java code examples

private <T> GenericDataSinkBase<T> translate(DataSink<T> sink) {
  
  // translate the input recursively
  Operator<T> input = translate(sink.getDataSet());
  
  // translate the sink itself and connect it to the input
  GenericDataSinkBase<T> translatedSink = sink.translateToDataFlow(input);
      
  return translatedSink;
}

/**
 * Returns the type of the result of this operator.
 * 
 * @return The result type of the operator.
 */
public TypeInformation<OUT> getResultType() {
  return getType();
}

/**
 * Creates a union of this DataSet with an other DataSet. The other DataSet must be of the same data type.
 * 
 * @param other The other DataSet which is unioned with the current DataSet.
 * @return The resulting DataSet.
 */
public UnionOperator<T> union(DataSet<T> other){
  return new UnionOperator<T>(this, other);
}

public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) {
  DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0);
  
  DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet()
      .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1)
      .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander())
      .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2);
  
  DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1);
  
  DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes);
  
  return result;
}

/**
 * Initiates a ProjectJoin transformation and projects the second join input<br/>
 * If the second join input is a {@link Tuple} {@link DataSet}, fields can be selected by their index.
 * If the second join input is not a Tuple DataSet, no parameters should be passed.<br/>
 * 
 * Fields of the first and second input can be added by chaining the method calls of
 * {@link JoinProjection#projectFirst(int...)} and {@link JoinProjection#projectSecond(int...)}.
 * 
 * @param fieldIndexes If the second input is a Tuple DataSet, the indexes of the selected fields. 
 *                        For a non-Tuple DataSet, do not provide parameters.
 *                        The order of fields in the output tuple is defined by to the order of field indexes.
 * @return A JoinProjection that needs to be converted into a {@link ProjectJoin} to complete the 
 *           Join transformation by calling {@link JoinProjection#types()}.
 * 
 * @see Tuple
 * @see DataSet
 * @see JoinProjection
 * @see ProjectJoin
 */
public JoinProjection<I1, I2> projectSecond(int... secondFieldIndexes) {
  return new JoinProjection<I1, I2>(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint(), null, secondFieldIndexes);
}

/**
 * Runs a {@link CustomUnaryOperation} on the data set. Custom operations are typically complex
 * operators that are composed of multiple steps.
 * 
 * @param operation The operation to run.
 * @return The data set produced by the operation.
 */
public <X> DataSet<X> runOperation(CustomUnaryOperation<T, X> operation) {
  Validate.notNull(operation, "The custom operator must not be null.");
  operation.setInput(this);
  return operation.createResult();
}

/**
 * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}.</br>
 * The KeySelector function is called for each element of the first DataSet and extracts a single 
 * key value on which the DataSet is grouped. </br>
 * 
 * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is grouped.
 * @return An incomplete CoGroup transformation. 
 *           Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup. 
 * 
 * @see KeySelector
 * @see DataSet
 */
public <K> CoGroupOperatorSetsPredicate where(KeySelector<I1, K> keyExtractor) {
  return new CoGroupOperatorSetsPredicate(new Keys.SelectorFunctionKeys<I1, K>(keyExtractor, input1.getType()));
}

/**
 * Continues a CoGroup transformation. <br/>
 * Defines the {@link Tuple} fields of the first co-grouped {@link DataSet} that should be used as grouping keys.<br/>
 * <b>Note: Fields can only be selected as grouping keys on Tuple DataSets.</b><br/>
 * 
 * @param fields The indexes of the Tuple fields of the first co-grouped DataSets that should be used as keys.
 * @return An incomplete CoGroup transformation. 
 *           Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup. 
 * 
 * @see Tuple
 * @see DataSet
 */
public CoGroupOperatorSetsPredicate where(int... fields) {
  return new CoGroupOperatorSetsPredicate(new Keys.FieldPositionKeys<I1>(fields, input1.getType()));
}

/**
 * Continues a Join transformation. <br/>
 * Defines the {@link Tuple} fields of the first join {@link DataSet} that should be used as join keys.<br/>
 * <b>Note: Fields can only be selected as join keys on Tuple DataSets.</b><br/>
 * 
 * @param fields The indexes of the Tuple fields of the first join DataSets that should be used as keys.
 * @return An incomplete Join transformation. 
 *           Call {@link JoinOperatorSetsPredicate#equalTo(int...)} or {@link JoinOperatorSetsPredicate#equalTo(KeySelector)}
 *           to continue the Join. 
 * 
 * @see Tuple
 * @see DataSet
 */
public JoinOperatorSetsPredicate where(int... fields) {
  return new JoinOperatorSetsPredicate(new Keys.FieldPositionKeys<I1>(fields, input1.getType()));
}

/**
 * Continues a Join transformation and defines a {@link KeySelector} function for the first join {@link DataSet}.</br>
 * The KeySelector function is called for each element of the first DataSet and extracts a single 
 * key value on which the DataSet is joined. </br>
 * 
 * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is joined.
 * @return An incomplete Join transformation. 
 *           Call {@link JoinOperatorSetsPredicate#equalTo(int...)} or {@link JoinOperatorSetsPredicate#equalTo(KeySelector)}
 *           to continue the Join. 
 * 
 * @see KeySelector
 * @see DataSet
 */
public <K extends Comparable<K>> JoinOperatorSetsPredicate where(KeySelector<I1, K> keySelector) {
  return new JoinOperatorSetsPredicate(new Keys.SelectorFunctionKeys<I1, K>(keySelector, input1.getType()));
}

public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function) {
  super(input.getDataSet(), input.getDataSet().getType());
  
  this.function = function;
  this.grouper = input;
  
  extractSemanticAnnotationsFromUdf(function.getClass());
}

/**
 * Applies an Aggregate transformation on a non-grouped {@link Tuple} {@link DataSet}.<br/>
 * <b>Note: Only Tuple DataSets can be aggregated.</b>
 * The transformation applies a built-in {@link Aggregations Aggregation} on a specified field 
 *   of a Tuple DataSet. Additional aggregation functions can be added to the resulting 
 *   {@link AggregateOperator} by calling {@link AggregateOperator#and(Aggregations, int)}.
 * 
 * @param agg The built-in aggregation function that is computed.
 * @param field The index of the Tuple field on which the aggregation function is applied.
 * @return An AggregateOperator that represents the aggregated DataSet. 
 * 
 * @see Tuple
 * @see Aggregations
 * @see AggregateOperator
 * @see DataSet
 */
public AggregateOperator<T> aggregate(Aggregations agg, int field) {
  return new AggregateOperator<T>(this, agg, field);
}

public int [] getKeyPositions() {
  return keys.computeLogicalKeyPositions();
}

public boolean isEmpty() {
  return getNumberOfKeyFields() == 0;
}

/**
 * Sorts {@link Tuple} elements within a group on the specified field in the specified {@link Order}.</br>
 * <b>Note: Only groups of Tuple elements can be sorted.</b><br/>
 * Groups can be sorted by multiple fields by chaining {@link #sortGroup(int, Order)} calls.
 * 
 * @param field The Tuple field on which the group is sorted.
 * @param order The Order in which the specified Tuple field is sorted.
 * @return A SortedGrouping with specified order of group element.
 * 
 * @see Tuple
 * @see Order
 */
public SortedGrouping<T> sortGroup(int field, Order order) {
  return new SortedGrouping<T>(this.dataSet, this.keys, field, order);
}

/**
 * Initiates a Project transformation on a {@link Tuple} {@link DataSet}.<br/>
 * <b>Note: Only Tuple DataSets can be projected.</b></br>
 * The transformation projects each Tuple of the DataSet onto a (sub)set of fields.</br>
 * This method returns a {@link Projection} on which {@link Projection#types()} needs to
 *   be called to completed the transformation.
 * 
 * @param fieldIndexes The field indexes of the input tuples that are retained.
 *                        The order of fields in the output tuple corresponds to the order of field indexes.
 * @return A Projection that needs to be converted into a {@link ProjectOperator} to complete the 
 *           Project transformation by calling {@link Projection#types()}.
 * 
 * @see Tuple
 * @see DataSet
 * @see Projection
 * @see ProjectOperator
 */
public Projection<T> project(int... fieldIndexes) {
  return new Projection<T>(this, fieldIndexes);
}

protected CrossOperator(DataSet<I1> input1, DataSet<I2> input2,
            CrossFunction<I1, I2, OUT> function,
            TypeInformation<OUT> returnType)
{
  super(input1, input2, returnType);
  this.function = function;
  extractSemanticAnnotationsFromUdf(function.getClass());
}

private static int[] makeFields(int[] fields, TupleTypeInfo<?> type) {
  int inLength = type.getArity();
  
  // null parameter means all fields are considered
  if (fields == null || fields.length == 0) {
    fields = new int[inLength];
    for (int i = 0; i < inLength; i++) {
      fields[i] = i;
    }
    return fields;
  } else {
    return rangeCheckAndOrderFields(fields, inLength-1);
  }
}

/**
 * Initiates a ProjectJoin transformation and projects the first join input<br/>
 * If the first join input is a {@link Tuple} {@link DataSet}, fields can be selected by their index.
 * If the first join input is not a Tuple DataSet, no parameters should be passed.<br/>
 * 
 * Fields of the first and second input can be added by chaining the method calls of
 * {@link JoinProjection#projectFirst(int...)} and {@link JoinProjection#projectSecond(int...)}.
 * 
 * @param firstFieldIndexes If the first input is a Tuple DataSet, the indexes of the selected fields.
 *                        For a non-Tuple DataSet, do not provide parameters.
 *                        The order of fields in the output tuple is defined by to the order of field indexes.
 * @return A JoinProjection that needs to be converted into a {@link ProjectJoin} to complete the 
 *           Join transformation by calling {@link JoinProjection#types()}.
 * 
 * @see Tuple
 * @see DataSet
 * @see JoinProjection
 * @see ProjectJoin
 */
public JoinProjection<I1, I2> projectFirst(int... firstFieldIndexes) {
  return new JoinProjection<I1, I2>(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint(), firstFieldIndexes, null);
}

/**
 * Applies an Aggregate transformation on a grouped {@link Tuple} {@link DataSet}.<br/>
 * <b>Note: Only Tuple DataSets can be aggregated.</b>
 * The transformation applies a built-in {@link Aggregations Aggregation} on a specified field 
 *   of a Tuple group. Additional aggregation functions can be added to the resulting 
 *   {@link AggregateOperator} by calling {@link AggregateOperator#and(Aggregations, int)}.
 * 
 * @param agg The built-in aggregation function that is computed.
 * @param field The index of the Tuple field on which the aggregation function is applied.
 * @return An AggregateOperator that represents the aggregated DataSet. 
 * 
 * @see Tuple
 * @see Aggregations
 * @see AggregateOperator
 * @see DataSet
 */
public AggregateOperator<T> aggregate(Aggregations agg, int field) {
  return new AggregateOperator<T>(this, agg, field);
}

How to use eu.stratosphere.api.java.operators

Best Java code snippets using eu.stratosphere.api.java.operators (Showing top 20 results out of 315)