@SuppressWarnings("unchecked") private static <T> SourceFunction<T> getFunctionFromDataSource(DataStreamSource<T> dataStreamSource) { dataStreamSource.addSink(new DiscardingSink<T>()); AbstractUdfStreamOperator<?, ?> operator = (AbstractUdfStreamOperator<?, ?>) getOperatorFromDataStream(dataStreamSource); return (SourceFunction<T>) operator.getUserFunction(); }
@SuppressWarnings("rawtypes,unchecked") private static Integer createDownStreamId(ConnectedStreams dataStream) { SingleOutputStreamOperator<?> coMap = dataStream.map(new CoMapFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Object>() { private static final long serialVersionUID = 1L; @Override public Object map1(Tuple2<Long, Long> value) { return null; } @Override public Object map2(Tuple2<Long, Long> value) { return null; } }); coMap.addSink(new DiscardingSink()); return coMap.getId(); }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); // define the dataflow StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 1000)); env.readFileStream("input/", 60000, FileMonitoringFunction.WatchType.ONLY_NEW_FILES) .addSink(new DiscardingSink<String>()); // generate a job graph final JobGraph jobGraph = env.getStreamGraph().getJobGraph(); File jobGraphFile = new File(params.get("output", "job.graph")); try (FileOutputStream output = new FileOutputStream(jobGraphFile); ObjectOutputStream obOutput = new ObjectOutputStream(output)){ obOutput.writeObject(jobGraph); } } }
public static JobGraph stoppableJob(final StopJobSignal stopJobSignal) { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.addSource(new InfiniteSourceFunction(stopJobSignal)) .setParallelism(2) .shuffle() .addSink(new DiscardingSink<>()) .setParallelism(2); return env.getStreamGraph().getJobGraph(); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); final int numStates = pt.getInt(NUM_LIST_STATES_PER_OP.key(), NUM_LIST_STATES_PER_OP.defaultValue()); final int numPartitionsPerState = pt.getInt(NUM_PARTITIONS_PER_LIST_STATE.key(), NUM_PARTITIONS_PER_LIST_STATE.defaultValue()); Preconditions.checkState(env.getCheckpointInterval() > 0L, "Checkpointing must be enabled for this test!"); env.addSource(new SimpleEndlessSourceWithBloatedState(numStates, numPartitionsPerState)).setParallelism(env.getParallelism()) .addSink(new DiscardingSink<>()).setParallelism(1); env.execute("HeavyDeploymentStressTestProgram"); }
@Test public void testConsecutiveSplitRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.split(outputSelector).split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
@Test public void testSplitAfterSideOutputRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputTag<Integer> outputTag = new OutputTag<Integer>("dummy"){}; OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.getSideOutput(outputTag).split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Split after side-outputs are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
public void doTestPropagationFromCheckpointConfig(boolean failTaskOnCheckpointErrors) throws Exception { StreamExecutionEnvironment streamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment(); streamExecutionEnvironment.setParallelism(1); streamExecutionEnvironment.getCheckpointConfig().setCheckpointInterval(1000); streamExecutionEnvironment.getCheckpointConfig().setFailOnCheckpointingErrors(failTaskOnCheckpointErrors); streamExecutionEnvironment.addSource(new SourceFunction<Integer>() { @Override public void run(SourceContext<Integer> ctx) throws Exception { } @Override public void cancel() { } }).addSink(new DiscardingSink<>()); StreamGraph streamGraph = streamExecutionEnvironment.getStreamGraph(); JobGraph jobGraph = StreamingJobGraphGenerator.createJobGraph(streamGraph); SerializedValue<ExecutionConfig> serializedExecutionConfig = jobGraph.getSerializedExecutionConfig(); ExecutionConfig executionConfig = serializedExecutionConfig.deserializeValue(Thread.currentThread().getContextClassLoader()); Assert.assertEquals(failTaskOnCheckpointErrors, executionConfig.isFailTaskOnCheckpointError()); } }
@Test public void test() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.addSource(new TestSource()).map(new TestMap()).addSink(new DiscardingSink<Integer>()); env.execute(); assertNotEquals(srcContext, mapContext); }
/** * Test whether an {@link OutputTypeConfigurable} implementation gets called with the correct * output type. In this test case the output type must be BasicTypeInfo.INT_TYPE_INFO. */ @Test public void testOutputTypeConfigurationWithOneInputTransformation() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10); OutputTypeConfigurableOperationWithOneInput outputTypeConfigurableOperation = new OutputTypeConfigurableOperationWithOneInput(); DataStream<Integer> result = source.transform( "Single input and output type configurable operation", BasicTypeInfo.INT_TYPE_INFO, outputTypeConfigurableOperation); result.addSink(new DiscardingSink<>()); env.getStreamGraph(); assertEquals(BasicTypeInfo.INT_TYPE_INFO, outputTypeConfigurableOperation.getTypeInformation()); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final String checkpointDir = pt.getRequired("checkpoint.dir"); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStateBackend(new FsStateBackend(checkpointDir)); env.setRestartStrategy(RestartStrategies.noRestart()); env.enableCheckpointing(1000L); env.getConfig().disableGenericTypes(); env.addSource(new MySource()).uid("my-source") .keyBy(anInt -> 0) .map(new MyStatefulFunction()).uid("my-map") .addSink(new DiscardingSink<>()).uid("my-sink"); env.execute(); } }
/** * Tests that the KeyGroupStreamPartitioner are properly set up with the correct value of * maximum parallelism. */ @Test public void testSetupOfKeyGroupPartitioner() { int maxParallelism = 42; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setMaxParallelism(maxParallelism); DataStream<Integer> source = env.fromElements(1, 2, 3); DataStream<Integer> keyedResult = source.keyBy(value -> value).map(new NoOpIntMap()); keyedResult.addSink(new DiscardingSink<>()); StreamGraph graph = env.getStreamGraph(); StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId()); StreamPartitioner<?> streamPartitioner = keyedResultNode.getInEdges().get(0).getPartitioner(); }
@Test public void testSelectBetweenConsecutiveSplitRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.split(outputSelector).select("dummy").split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
@Test public void testKeybyBetweenConsecutiveSplitRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.split(outputSelector).select("dummy").keyBy(x -> x).split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
@Test public void testOutputTypeConfigurationWithTwoInputTransformation() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source1 = env.fromElements(1, 10); DataStream<Integer> source2 = env.fromElements(2, 11); ConnectedStreams<Integer, Integer> connectedSource = source1.connect(source2); OutputTypeConfigurableOperationWithTwoInputs outputTypeConfigurableOperation = new OutputTypeConfigurableOperationWithTwoInputs(); DataStream<Integer> result = connectedSource.transform( "Two input and output type configurable operation", BasicTypeInfo.INT_TYPE_INFO, outputTypeConfigurableOperation); result.addSink(new DiscardingSink<>()); env.getStreamGraph(); assertEquals(BasicTypeInfo.INT_TYPE_INFO, outputTypeConfigurableOperation.getTypeInformation()); }
@Test public void testUnionBetweenConsecutiveSplitRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.split(outputSelector).select("dummy").union(src.map(x -> x)).split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
/** * Tests that the max parallelism is properly set for connected * streams. */ @Test public void testMaxParallelismWithConnectedKeyedStream() { int maxParallelism = 42; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> input1 = env.fromElements(1, 2, 3, 4).setMaxParallelism(128); DataStream<Integer> input2 = env.fromElements(1, 2, 3, 4).setMaxParallelism(129); env.getConfig().setMaxParallelism(maxParallelism); DataStream<Integer> keyedResult = input1 .connect(input2) .keyBy(value -> value, value -> value) .map(new NoOpIntCoMap()); keyedResult.addSink(new DiscardingSink<>()); StreamGraph graph = env.getStreamGraph(); StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId()); StreamPartitioner<?> streamPartitioner1 = keyedResultNode.getInEdges().get(0).getPartitioner(); StreamPartitioner<?> streamPartitioner2 = keyedResultNode.getInEdges().get(1).getPartitioner(); }
/** * Creates a streaming JobGraph from the StreamEnvironment. */ private JobGraph createJobGraph( int parallelism, int numberOfRetries, long restartDelay) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.disableOperatorChaining(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay)); env.getConfig().disableSysoutLogging(); DataStream<Integer> stream = env .addSource(new InfiniteTestSource()) .shuffle() .map(new StatefulCounter()); stream.addSink(new DiscardingSink<>()); return env.getStreamGraph().getJobGraph(); }
/** * These check whether timestamps are properly ignored when they are disabled. */ @Test public void testDisabledTimestamps() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new MyNonWatermarkingSource(numElements)); DataStream<Integer> source2 = env.addSource(new MyNonWatermarkingSource(numElements)); source1 .map(new IdentityMap()) .connect(source2).map(new IdentityCoMap()) .transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new DisabledTimestampCheckingOperator()) .addSink(new DiscardingSink<Integer>()); env.execute(); }
/** * These check whether timestamps are properly assigned at the sources and handled in * network transmission and between chained operators when timestamps are enabled. */ @Test public void testTimestampHandling() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new MyTimestampSource(0L, numElements)); DataStream<Integer> source2 = env.addSource(new MyTimestampSource(0L, numElements)); source1 .map(new IdentityMap()) .connect(source2).map(new IdentityCoMap()) .transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator()) .addSink(new DiscardingSink<Integer>()); env.execute(); }