/** * A thin wrapper layer over {@link StreamExecutionEnvironment#setParallelism(int)}. * * @param parallelism The parallelism * @return The same {@code PythonStreamExecutionEnvironment} instance of the caller */ public PythonStreamExecutionEnvironment set_parallelism(int parallelism) { this.env.setParallelism(parallelism); return this; }
@Test public void testCollect() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); final long n = 10; DataStream<Long> stream = env.generateSequence(1, n); long i = 1; for (Iterator<Long> it = DataStreamUtils.collect(stream); it.hasNext(); ) { long x = it.next(); assertEquals("received wrong element", i, x); i++; } assertEquals("received wrong number of elements", n + 1, i); } }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); // define the dataflow StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(10, 1000)); env.readFileStream("input/", 60000, FileMonitoringFunction.WatchType.ONLY_NEW_FILES) .addSink(new DiscardingSink<String>()); // generate a job graph final JobGraph jobGraph = env.getStreamGraph().getJobGraph(); File jobGraphFile = new File(params.get("output", "job.graph")); try (FileOutputStream output = new FileOutputStream(jobGraphFile); ObjectOutputStream obOutput = new ObjectOutputStream(output)){ obOutput.writeObject(jobGraph); } } }
@Test public void testPrimitiveKeyAcceptance() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.setMaxParallelism(1); DataStream<Integer> input = env.fromElements(new Integer(10000)); KeyedStream<Integer, Object> keyedStream = input.keyBy(new KeySelector<Integer, Object>() { @Override public Object getKey(Integer value) throws Exception { return value; } }); keyedStream.addSink(new SinkFunction<Integer>() { @Override public void invoke(Integer value) throws Exception { Assert.assertEquals(10000L, (long) value); } }); }
@Test public void testProcessingTimeWithWindow() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataStream<Integer> input = env.fromElements(1, 2); Pattern<Integer, ?> pattern = Pattern.<Integer>begin("start").followedByAny("end").within(Time.days(1)); DataStream<Integer> result = CEP.pattern(input, pattern).select(new PatternSelectFunction<Integer, Integer>() { @Override public Integer select(Map<String, List<Integer>> pattern) throws Exception { return pattern.get("start").get(0) + pattern.get("end").get(0); } }); List<Integer> resultList = new ArrayList<>(); DataStreamUtils.collect(result).forEachRemaining(resultList::add); assertEquals(Arrays.asList(3), resultList); }
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final String outputPath = params.getRequired("outputPath"); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); env.enableCheckpointing(5000L); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10L, TimeUnit.SECONDS))); final StreamingFileSink<Tuple2<Integer, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Tuple2<Integer, Integer>>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.f1); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); // generate data, shuffle, sink env.addSource(new Generator(10, 10, 60)) .keyBy(0) .addSink(sink); env.execute("StreamingFileSinkProgram"); }
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource()); stream .keyBy(0) .timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) .reduce(new SummingReducer()) // alternative: use a apply function which does not pre-aggregate // .keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>()) // .window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) // .apply(new SummingWindowFunction()) .addSink(new SinkFunction<Tuple2<Long, Long>>() { @Override public void invoke(Tuple2<Long, Long> value) { } }); env.execute(); }
@Test public void test() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.addSource(new TestSource()).map(new TestMap()).addSink(new DiscardingSink<Integer>()); env.execute(); assertNotEquals(srcContext, mapContext); }
/** * Tests that a manual hash at the beginning of a chain is accepted. */ @Test public void testManualHashAssignmentForStartNodeInInChain() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.addSource(new NoOpSourceFunction()).uid("source") .map(new NoOpMapFunction()) .addSink(new NoOpSinkFunction()); env.getStreamGraph().getJobGraph(); }
@Test public void testStreaming() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataStream<Integer> input = env.fromCollection(inputData); input .flatMap(new NotifyingMapper()) .writeUsingOutputFormat(new DummyOutputFormat()).disableChaining(); JobGraph jobGraph = env.getStreamGraph().getJobGraph(); submitJobAndVerifyResults(jobGraph); }
/** * Tests that a manual hash for an intermediate chain node is accepted. */ @Test public void testManualHashAssignmentForIntermediateNodeInChain() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.addSource(new NoOpSourceFunction()) // Intermediate chained node .map(new NoOpMapFunction()).uid("map") .addSink(new NoOpSinkFunction()); env.getStreamGraph().getJobGraph(); }
/** * This verifies that an event time source works when setting stream time characteristic to * processing time. In this case, the watermarks should just be swallowed. */ @Test public void testEventTimeSourceWithProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Integer> source1 = env.addSource(new MyTimestampSource(0, 10)); source1 .map(new IdentityMap()) .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(false)); env.execute(); // verify that we don't get any watermarks, the source is used as watermark source in // other tests, so it normally emits watermarks Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 0); }
private static void runPartitioningProgram(int parallelism) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().enableObjectReuse(); env.setBufferTimeout(5L); env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE); env .addSource(new TimeStampingSource()) .map(new IdMapper<Tuple2<Long, Long>>()) .keyBy(0) .addSink(new TimestampingSink()); env.execute("Partitioning Program"); }
@Test(expected = NullPointerException.class) public void testFailsWithoutUpperBound() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne .keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), null); }
@Test(expected = NullPointerException.class) public void testFailsWithoutLowerBound() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne .keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(null, Time.milliseconds(1)); }
/** * Tests that a collision on the manual hash throws an Exception. */ @Test(expected = IllegalArgumentException.class) public void testManualHashAssignmentCollisionThrowsException() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.disableOperatorChaining(); env.addSource(new NoOpSourceFunction()).uid("source") .map(new NoOpMapFunction()).uid("source") // Collision .addSink(new NoOpSinkFunction()); // This call is necessary to generate the job graph env.getStreamGraph().getJobGraph(); }
/** * Creates a streaming JobGraph from the StreamEnvironment. */ private JobGraph createJobGraph( int parallelism, int numberOfRetries, long restartDelay) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.disableOperatorChaining(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay)); env.getConfig().disableSysoutLogging(); DataStream<Integer> stream = env .addSource(new InfiniteTestSource()) .shuffle() .map(new StatefulCounter()); stream.addSink(new DiscardingSink<>()); return env.getStreamGraph().getJobGraph(); }
@Test(expected = UnsupportedTimeCharacteristicException.class) public void testExecutionFailsInProcessingTime() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne.keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), Time.milliseconds(0)) .process(new ProcessJoinFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String>() { @Override public void processElement(Tuple2<String, Integer> left, Tuple2<String, Integer> right, Context ctx, Collector<String> out) throws Exception { out.collect(left + ":" + right); } }); }
/** * These check whether timestamps are properly ignored when they are disabled. */ @Test public void testDisabledTimestamps() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new MyNonWatermarkingSource(numElements)); DataStream<Integer> source2 = env.addSource(new MyNonWatermarkingSource(numElements)); source1 .map(new IdentityMap()) .connect(source2).map(new IdentityCoMap()) .transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new DisabledTimestampCheckingOperator()) .addSink(new DiscardingSink<Integer>()); env.execute(); }
/** * These check whether timestamps are properly assigned at the sources and handled in * network transmission and between chained operators when timestamps are enabled. */ @Test public void testTimestampHandling() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new MyTimestampSource(0L, numElements)); DataStream<Integer> source2 = env.addSource(new MyTimestampSource(0L, numElements)); source1 .map(new IdentityMap()) .connect(source2).map(new IdentityCoMap()) .transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator()) .addSink(new DiscardingSink<Integer>()); env.execute(); }