/** * KeyBy operation for connected data stream. Assigns keys to the elements of * input1 and input2 according to keyPositions1 and keyPositions2. * * @param keyPositions1 * The fields used to group the first input stream. * @param keyPositions2 * The fields used to group the second input stream. * @return The grouped {@link ConnectedStreams} */ public ConnectedStreams<IN1, IN2> keyBy(int[] keyPositions1, int[] keyPositions2) { return new ConnectedStreams<>(environment, inputStream1.keyBy(keyPositions1), inputStream2.keyBy(keyPositions2)); }
/** * KeyBy operation for connected data stream. Assigns keys to the elements of * input1 and input2 using keySelector1 and keySelector2. * * @param keySelector1 * The {@link KeySelector} used for grouping the first input * @param keySelector2 * The {@link KeySelector} used for grouping the second input * @return The partitioned {@link ConnectedStreams} */ public ConnectedStreams<IN1, IN2> keyBy(KeySelector<IN1, ?> keySelector1, KeySelector<IN2, ?> keySelector2) { return new ConnectedStreams<>(environment, inputStream1.keyBy(keySelector1), inputStream2.keyBy(keySelector2)); }
/** * KeyBy operation for connected data stream. Assigns keys to the elements of * input1 and input2 according to keyPosition1 and keyPosition2. * * @param keyPosition1 * The field used to compute the hashcode of the elements in the * first input stream. * @param keyPosition2 * The field used to compute the hashcode of the elements in the * second input stream. * @return The grouped {@link ConnectedStreams} */ public ConnectedStreams<IN1, IN2> keyBy(int keyPosition1, int keyPosition2) { return new ConnectedStreams<>(this.environment, inputStream1.keyBy(keyPosition1), inputStream2.keyBy(keyPosition2)); }
/** * KeyBy operation for connected data stream using key expressions. Assigns keys to * the elements of input1 and input2 according to field1 and field2. A field * expression is either the name of a public field or a getter method with * parentheses of the {@link DataStream}S underlying type. A dot can be used * to drill down into objects, as in {@code "field1.getInnerField2()" }. * * @param field1 * The grouping expression for the first input * @param field2 * The grouping expression for the second input * @return The grouped {@link ConnectedStreams} */ public ConnectedStreams<IN1, IN2> keyBy(String field1, String field2) { return new ConnectedStreams<>(environment, inputStream1.keyBy(field1), inputStream2.keyBy(field2)); }
/** * Partitions the operator state of a {@link DataStream} using field expressions. * A field expression is either the name of a public field or a getter method with parentheses * of the {@link DataStream}'s underlying type. A dot can be used to drill * down into objects, as in {@code "field1.getInnerField2()" }. * * @param fields * One or more field expressions on which the state of the {@link DataStream} operators will be * partitioned. * @return The {@link DataStream} with partitioned state (i.e. KeyedStream) **/ public KeyedStream<T, Tuple> keyBy(String... fields) { return keyBy(new Keys.ExpressionKeys<>(fields, getType())); }
/** * A thin wrapper layer over {@link DataStream#keyBy(KeySelector)}. * * @param selector The KeySelector to be used for extracting the key for partitioning * @return The {@link PythonDataStream} with partitioned state (i.e. {@link PythonKeyedStream}) */ public PythonKeyedStream key_by(KeySelector<PyObject, PyKey> selector) throws IOException { return new PythonKeyedStream(stream.keyBy(new PythonKeySelector(selector))); }
/** * Partitions the operator state of a {@link DataStream} by the given key positions. * * @param fields * The position of the fields on which the {@link DataStream} * will be grouped. * @return The {@link DataStream} with partitioned state (i.e. KeyedStream) */ public KeyedStream<T, Tuple> keyBy(int... fields) { if (getType() instanceof BasicArrayTypeInfo || getType() instanceof PrimitiveArrayTypeInfo) { return keyBy(KeySelectorUtil.getSelectorForArray(fields, getType())); } else { return keyBy(new Keys.ExpressionKeys<>(fields, getType())); } }
@PublicEvolving public AllWindowedStream(DataStream<T> input, WindowAssigner<? super T, W> windowAssigner) { this.input = input.keyBy(new NullByteKeySelector<T>()); this.windowAssigner = windowAssigner; this.trigger = windowAssigner.getDefaultTrigger(input.getExecutionEnvironment()); }
@Test(expected = CompositeType.InvalidFieldReferenceException.class) public void testFailOnNestedPojoFieldAccessor() throws Exception { StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Data> dataStream = see.fromCollection(elements); dataStream.keyBy("aaa", "stats.count").sum("stats.nonExistingField"); }
public static SingleOutputStreamOperator<Integer> createWindowFunction(ExecutionMode mode, DataStream<Tuple2<Integer, Integer>> input) { return input .keyBy(0) .countWindow(1) .apply(new StatefulWindowFunction(mode)) .setParallelism(4) .uid("window"); }
@Test public void testPOJOWithNestedArrayAndHashCodeWorkAround() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithHashCode> input = env.fromElements( new POJOWithHashCode(new int[] {1, 2})); input.keyBy(new KeySelector<POJOWithHashCode, POJOWithHashCode>() { @Override public POJOWithHashCode getKey(POJOWithHashCode value) throws Exception { return value; } }).addSink(new SinkFunction<POJOWithHashCode>() { @Override public void invoke(POJOWithHashCode value) throws Exception { Assert.assertEquals(value.getId(), new int[]{1, 2}); } }); }
@Test public void testPOJOnoHashCodeKeyRejection() { KeySelector<POJOWithoutHashCode, POJOWithoutHashCode> keySelector = new KeySelector<POJOWithoutHashCode, POJOWithoutHashCode>() { @Override public POJOWithoutHashCode getKey(POJOWithoutHashCode value) throws Exception { return value; } }; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithoutHashCode> input = env.fromElements( new POJOWithoutHashCode(new int[] {1, 2})); // adjust the rule expectedException.expect(InvalidProgramException.class); input.keyBy(keySelector); }
@Test(expected = NullPointerException.class) public void testFailsWithoutUpperBound() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne .keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), null); }
@Test(expected = NullPointerException.class) public void testFailsWithoutLowerBound() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne .keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(null, Time.milliseconds(1)); }
@Test public void testPOJOWithNestedArrayNoHashCodeKeyRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<POJOWithHashCode> input = env.fromElements( new POJOWithHashCode(new int[] {1, 2})); TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple1<int[]>>( PrimitiveArrayTypeInfo.INT_PRIMITIVE_ARRAY_TYPE_INFO); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key.")); input.keyBy("id"); }
private <K> void testKeyRejection(KeySelector<Tuple2<Integer[], String>, K> keySelector, TypeInformation<K> expectedKeyType) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "barfoo") ); Assert.assertEquals(expectedKeyType, TypeExtractor.getKeySelectorTypes(keySelector, input.getType())); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedKeyType + " cannot be used as key.")); input.keyBy(keySelector); }
@Test public void testTupleNestedArrayKeyRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Integer[], String>> input = env.fromElements( new Tuple2<>(new Integer[] {1, 2}, "test-test")); TypeInformation<?> expectedTypeInfo = new TupleTypeInfo<Tuple2<Integer[], String>>( BasicArrayTypeInfo.INT_ARRAY_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO); // adjust the rule expectedException.expect(InvalidProgramException.class); expectedException.expectMessage(new StringStartsWith("Type " + expectedTypeInfo + " cannot be used as key.")); input.keyBy(new KeySelector<Tuple2<Integer[], String>, Tuple2<Integer[], String>>() { @Override public Tuple2<Integer[], String> getKey(Tuple2<Integer[], String> value) throws Exception { return value; } }); }
/** * Tests that the KeyGroupStreamPartitioner are properly set up with the correct value of * maximum parallelism. */ @Test public void testSetupOfKeyGroupPartitioner() { int maxParallelism = 42; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setMaxParallelism(maxParallelism); DataStream<Integer> source = env.fromElements(1, 2, 3); DataStream<Integer> keyedResult = source.keyBy(value -> value).map(new NoOpIntMap()); keyedResult.addSink(new DiscardingSink<>()); StreamGraph graph = env.getStreamGraph(); StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId()); StreamPartitioner<?> streamPartitioner = keyedResultNode.getInEdges().get(0).getPartitioner(); }
@Test public void testKeybyBetweenConsecutiveSplitRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.split(outputSelector).select("dummy").keyBy(x -> x).split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
/** * .aggregate() does not support RichAggregateFunction, since the AggregationFunction is used internally * in a {@code AggregatingState}. */ @Test(expected = UnsupportedOperationException.class) public void testAggregateWithRichFunctionFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyRichAggregationFunction<Tuple2<String, Integer>>()); fail("exception was not thrown"); }