/** * KeyBy operation for connected data stream. Assigns keys to the elements of * input1 and input2 using keySelector1 and keySelector2. * * @param keySelector1 * The {@link KeySelector} used for grouping the first input * @param keySelector2 * The {@link KeySelector} used for grouping the second input * @return The partitioned {@link ConnectedStreams} */ public ConnectedStreams<IN1, IN2> keyBy(KeySelector<IN1, ?> keySelector1, KeySelector<IN2, ?> keySelector2) { return new ConnectedStreams<>(environment, inputStream1.keyBy(keySelector1), inputStream2.keyBy(keySelector2)); }
/** * Applies an aggregation that gives the maximum element of every window of * the data stream by the given position. If more elements have the same * maximum value the operator returns the first by default. * * @param positionToMaxBy * The position to maximize by * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> maxBy(int positionToMaxBy) { return this.maxBy(positionToMaxBy, true); }
/** * Creates a join operation. See {@link JoinedStreams} for an example of how the keys * and window can be specified. */ public <T2> JoinedStreams<T, T2> join(DataStream<T2> otherStream) { return new JoinedStreams<>(this, otherStream); }
public static SingleOutputStreamOperator<Integer> createWindowFunction(ExecutionMode mode, DataStream<Tuple2<Integer, Integer>> input) { return input .keyBy(0) .countWindow(1) .apply(new StatefulWindowFunction(mode)) .setParallelism(4) .uid("window"); }
private static void addSmallBoundedJob(StreamExecutionEnvironment env, int parallelism) { DataStream<Long> stream = env.generateSequence(1, 100).setParallelism(parallelism); stream .filter(ignored -> false).setParallelism(parallelism) .startNewChain() .print().setParallelism(parallelism); } }
public static SingleOutputStreamOperator<Integer> createSecondStatefulMap(ExecutionMode mode, DataStream<Integer> input) { return input .map(new StatefulStringStoringMap(mode, "second")) .setParallelism(4) .uid("second"); }
@Test(expected = UnsupportedOperationException.class) public void testDifferingParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10) .map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2)); }
@Test(expected = UnsupportedOperationException.class) public void testCoDifferingParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(coIter.map(noOpIntCoMap).setParallelism(parallelism / 2)); }
@Test public void testDelegateToCoGrouped() { Time lateness = Time.milliseconds(42L); JoinedStreams.WithWindow<String, String, String, TimeWindow> withLateness = dataStream1 .join(dataStream2) .where(keySelector) .equalTo(keySelector) .window(tsAssigner) .allowedLateness(lateness); withLateness.apply(joinFunction, BasicTypeInfo.STRING_TYPE_INFO); Assert.assertEquals(lateness.toMilliseconds(), withLateness.getCoGroupedWindowedStream().getAllowedLateness().toMilliseconds()); }
@Test public void testDelegateToCoGrouped() { Time lateness = Time.milliseconds(42L); CoGroupedStreams.WithWindow<String, String, String, TimeWindow> withLateness = dataStream1 .coGroup(dataStream2) .where(keySelector) .equalTo(keySelector) .window(tsAssigner) .allowedLateness(lateness); withLateness.apply(coGroupFunction, BasicTypeInfo.STRING_TYPE_INFO); Assert.assertEquals(lateness.toMilliseconds(), withLateness.getWindowedStream().getAllowedLateness()); }
@Test(expected = IllegalStateException.class) public void testExecutionWithEmptyIteration() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.map(noOpIntMap).print(); env.execute(); }
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
/** * Gets the type of the first input. * * @return The type of the first input */ public TypeInformation<IN1> getType1() { return inputStream1.getType(); }
/** * A thin wrapper layer over {@link SingleOutputStreamOperator#name(String)} . * * @param name operator name * @return The named operator. */ public PythonSingleOutputStreamOperator name(String name) { this.stream.name(name); return this; } }
/** * Applies an aggregation that that gives the minimum value of every window * of the data stream at the given position. * * @param positionToMin The position to minimize * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> min(int positionToMin) { return aggregate(new ComparableAggregator<>(positionToMin, input.getType(), AggregationFunction.AggregationType.MIN, input.getExecutionConfig())); }
/** * Specifies a {@link KeySelector} for elements from the first input with explicit type information for the key type. * * @param keySelector The KeySelector to be used for extracting the first input's key for partitioning. * @param keyType The type information describing the key type. */ public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector, TypeInformation<KEY> keyType) { requireNonNull(keySelector); requireNonNull(keyType); return new Where<>(input1.clean(keySelector), keyType); }
/** * Specifies a {@link KeySelector} for elements from the first input with explicit type information. * * @param keySelector The KeySelector to be used for extracting the first input's key for partitioning. * @param keyType The type information describing the key type. */ public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector, TypeInformation<KEY> keyType) { Preconditions.checkNotNull(keySelector); Preconditions.checkNotNull(keyType); return new Where<>(input1.clean(keySelector), keyType); }
/** * Applies an aggregation that gives the maximum element of every window of * the data stream by the given field. If more elements have the same * maximum value the operator returns the first by default. * * @param field * The field to maximize by * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> maxBy(String field) { return this.maxBy(field, true); }
/** * Applies an aggregation that gives the minimum element of every window of * the data stream by the given position. If more elements have the same * minimum value the operator returns the first element by default. * * @param positionToMinBy The position to minimize by * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> minBy(String positionToMinBy) { return this.minBy(positionToMinBy, true); }
@Override public TypeSerializerSnapshot<TaggedUnion<T1, T2>> snapshotConfiguration() { return new UnionSerializerSnapshot<>(this); } }