private SortPartitionOperator(DataSet<T> dataSet, String sortLocationName) { super(dataSet, dataSet.getType()); keys = new ArrayList<>(); orders = new ArrayList<>(); this.sortLocationName = sortLocationName; }
public DefaultJoin(DataSet<I1> input1, DataSet<I2> input2, Keys<I1> keys1, Keys<I2> keys2, JoinHint hint, String joinLocationName, JoinType type) { super(input1, input2, keys1, keys2, new DefaultFlatJoinFunction<I1, I2>(), new TupleTypeInfo<Tuple2<I1, I2>>(input1.getType(), input2.getType()), hint, joinLocationName, type); }
public SortedGrouping(DataSet<T> set, Keys<T> keys, String field, Order order) { super(set, keys); if (!Keys.ExpressionKeys.isSortKey(field, inputDataSet.getType())) { throw new InvalidProgramException("Selected sort key is not a sortable type"); } // resolve String-field to int using the expression keys ExpressionKeys<T> ek = new ExpressionKeys<>(field, inputDataSet.getType()); this.groupSortKeyPositions = ek.computeLogicalKeyPositions(); this.groupSortOrders = new Order[groupSortKeyPositions.length]; Arrays.fill(this.groupSortOrders, order); // if field == "*" }
public DefaultCross(DataSet<I1> input1, DataSet<I2> input2, CrossHint hint, String defaultName) { super(input1, input2, new DefaultCrossFunction<I1, I2>(), new TupleTypeInfo<Tuple2<I1, I2>>( Preconditions.checkNotNull(input1, "input1 is null").getType(), Preconditions.checkNotNull(input2, "input2 is null").getType()), hint, defaultName); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> ProjectOperator<T, Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>> projectTuple13() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>> tType = new TupleTypeInfo<Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>>(fTypes); return new ProjectOperator<T, Tuple13<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12>>(this.ds, this.fieldIndexes, tType); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> ProjectOperator<T, Tuple15<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14>> projectTuple15() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple15<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14>> tType = new TupleTypeInfo<Tuple15<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14>>(fTypes); return new ProjectOperator<T, Tuple15<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14>>(this.ds, this.fieldIndexes, tType); }
/** * {@inheritDoc} * * @return An incomplete Join transformation. * Call {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(int...)} or * {@link org.apache.flink.api.java.operators.JoinOperator.JoinOperatorSets.JoinOperatorSetsPredicate#equalTo(KeySelector)} * to continue the Join. */ @Override public JoinOperatorSetsPredicate where(int... fields) { return new JoinOperatorSetsPredicate(new Keys.ExpressionKeys<>(fields, input1.getType())); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2> ProjectOperator<T, Tuple3<T0, T1, T2>> projectTuple3() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple3<T0, T1, T2>> tType = new TupleTypeInfo<Tuple3<T0, T1, T2>>(fTypes); return new ProjectOperator<T, Tuple3<T0, T1, T2>>(this.ds, this.fieldIndexes, tType); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6> ProjectOperator<T, Tuple7<T0, T1, T2, T3, T4, T5, T6>> projectTuple7() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple7<T0, T1, T2, T3, T4, T5, T6>> tType = new TupleTypeInfo<Tuple7<T0, T1, T2, T3, T4, T5, T6>>(fTypes); return new ProjectOperator<T, Tuple7<T0, T1, T2, T3, T4, T5, T6>>(this.ds, this.fieldIndexes, tType); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8> ProjectOperator<T, Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> projectTuple9() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>> tType = new TupleTypeInfo<Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>>(fTypes); return new ProjectOperator<T, Tuple9<T0, T1, T2, T3, T4, T5, T6, T7, T8>>(this.ds, this.fieldIndexes, tType); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15> ProjectOperator<T, Tuple16<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15>> projectTuple16() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple16<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15>> tType = new TupleTypeInfo<Tuple16<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15>>(fTypes); return new ProjectOperator<T, Tuple16<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15>>(this.ds, this.fieldIndexes, tType); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19> ProjectOperator<T, Tuple20<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19>> projectTuple20() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple20<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19>> tType = new TupleTypeInfo<Tuple20<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19>>(fTypes); return new ProjectOperator<T, Tuple20<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19>>(this.ds, this.fieldIndexes, tType); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> ProjectOperator<T, Tuple23<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>> projectTuple23() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple23<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>> tType = new TupleTypeInfo<Tuple23<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>>(fTypes); return new ProjectOperator<T, Tuple23<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>>(this.ds, this.fieldIndexes, tType); }
/** * Projects a {@link Tuple} {@link DataSet} to the previously selected fields. * * @return The projected DataSet. * @see Tuple * @see DataSet */ public <T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> ProjectOperator<T, Tuple24<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>> projectTuple24() { TypeInformation<?>[] fTypes = extractFieldTypes(fieldIndexes, ds.getType()); TupleTypeInfo<Tuple24<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>> tType = new TupleTypeInfo<Tuple24<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>>(fTypes); return new ProjectOperator<T, Tuple24<T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>>(this.ds, this.fieldIndexes, tType); }
public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function, String defaultName) { super(input.getInputDataSet(), input.getInputDataSet().getType()); this.function = function; this.grouper = input; this.defaultName = defaultName; this.hint = CombineHint.OPTIMIZER_CHOOSES; UdfOperatorUtils.analyzeSingleInputUdf(this, ReduceFunction.class, defaultName, function, grouper.keys); }
/** * Range-partitions a DataSet using the specified key selector function. */ public static <T, K extends Comparable<K>> PartitionOperator<T> partitionByRange(DataSet<T> input, DataDistribution distribution, KeySelector<T, K> keyExtractor) { final TypeInformation<K> keyType = TypeExtractor.getKeySelectorTypes(keyExtractor, input.getType()); return new PartitionOperator<>(input, PartitionOperatorBase.PartitionMethod.RANGE, new Keys.SelectorFunctionKeys<>(input.clean(keyExtractor), input.getType(), keyType), distribution, Utils.getCallLocationName()); }
/** * Hash-partitions a DataSet on the specified key fields. * * <p><b>Important:</b>This operation shuffles the whole DataSet over the network and can take significant amount of time. * * @param fields The field indexes on which the DataSet is hash-partitioned. * @return The partitioned DataSet. */ public PartitionOperator<T> partitionByHash(int... fields) { return new PartitionOperator<>(this, PartitionMethod.HASH, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName()); }
/** * Range-partitions a DataSet on the specified key fields. * * <p><b>Important:</b>This operation requires an extra pass over the DataSet to compute the range boundaries and * shuffles the whole DataSet over the network. This can take significant amount of time. * * @param fields The field expressions on which the DataSet is range-partitioned. * @return The partitioned DataSet. */ public PartitionOperator<T> partitionByRange(String... fields) { return new PartitionOperator<>(this, PartitionMethod.RANGE, new Keys.ExpressionKeys<>(fields, getType()), Utils.getCallLocationName()); }
@Override public DataSet<Vertex<K, VV>> run(Graph<K, VV, EV> input) { TypeInformation<VV> valueType = ((TupleTypeInfo<?>) input.getVertices().getType()).getTypeAt(1); // iteratively adopt the most frequent label among the neighbors of each vertex return input .mapEdges(new MapTo<>(NullValue.getInstance())) .runScatterGatherIteration( new SendNewLabelToNeighbors<>(valueType), new UpdateVertexLabel<>(), maxIterations) .getVertices(); }
@Test(expected = TableException.class) public void testGenericRowWithAlias() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); BatchTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env, config()); // use null value the enforce GenericType DataSet<Row> dataSet = env.fromElements(Row.of((Integer) null)); assertTrue(dataSet.getType() instanceof GenericTypeInfo); assertTrue(dataSet.getType().getTypeClass().equals(Row.class)); // Must fail. Cannot import DataSet<Row> with GenericTypeInfo. tableEnv.fromDataSet(dataSet, "nullField"); }