public static StreamExecutionEnvironment prepareExecutionEnv(ParameterTool parameterTool) throws Exception { if (parameterTool.getNumberOfParameters() < 5) { System.out.println("Missing parameters!\n" + "Usage: Kafka --input-topic <topic> --output-topic <topic> " + "--bootstrap.servers <kafka brokers> " + "--zookeeper.connect <zk quorum> --group.id <some id>"); throw new Exception("Missing parameters!\n" + "Usage: Kafka --input-topic <topic> --output-topic <topic> " + "--bootstrap.servers <kafka brokers> " + "--zookeeper.connect <zk quorum> --group.id <some id>"); } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().disableSysoutLogging(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000)); env.enableCheckpointing(5000); // create a checkpoint every 5 seconds env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); return env; }
@Test public void testApplyWindowAllState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .timeWindowAll(Time.milliseconds(1000)) .apply(new AllWindowFunction<File, String, TimeWindow>() { @Override public void apply(TimeWindow window, Iterable<File> input, Collector<String> out) {} }); validateListStateDescriptorConfigured(result); }
@Test public void testProcessAllWindowState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .timeWindowAll(Time.milliseconds(1000)) .process(new ProcessAllWindowFunction<File, String, TimeWindow>() { @Override public void process(Context ctx, Iterable<File> input, Collector<String> out) {} }); validateListStateDescriptorConfigured(result); }
public static void main(String[] args) throws Exception { // parse the parameters final ParameterTool params = ParameterTool.fromArgs(args); final long windowSize = params.getLong("windowSize", 2000); final long rate = params.getLong("rate", 3L); System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate); System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]"); // obtain execution environment, run this example in "ingestion time" StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create the data sources for both grades and salaries DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate); DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate); // run the actual window join program // for testability, this functionality is in a separate method. DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize); // print the results with a single thread, rather than in parallel joinedStream.print().setParallelism(1); // execute program env.execute("Windowed Join Example"); }
@Test public void testFoldWindowAllState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<String> src = env.fromElements("abc"); SingleOutputStreamOperator<?> result = src .timeWindowAll(Time.milliseconds(1000)) .fold(new File("/"), new FoldFunction<String, File>() { @Override public File fold(File a, String e) { return null; } }); validateStateDescriptorConfigured(result); }
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Integer> trainingData = env.addSource(new FiniteTrainingDataSource()); DataStream<Integer> newData = env.addSource(new FiniteNewDataSource()); // build new model on every second of new data DataStream<Double[]> model = trainingData .assignTimestampsAndWatermarks(new LinearTimestamp()) .timeWindowAll(Time.of(5000, TimeUnit.MILLISECONDS)) .apply(new PartialModelBuilder()); // use partial model for newData DataStream<Integer> prediction = newData.connect(model).map(new Predictor()); // emit result if (params.has("output")) { prediction.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); prediction.print(); } // execute program env.execute("Streaming Incremental Learning"); }
@Test public void testReduceWindowAllState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .timeWindowAll(Time.milliseconds(1000)) .reduce(new ReduceFunction<File>() { @Override public File reduce(File value1, File value2) { return null; } }); validateStateDescriptorConfigured(result); }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); int recordsPerSecond = params.getInt("recordsPerSecond", 10); int duration = params.getInt("durationInSecond", 60); int offset = params.getInt("offsetInSecond", 0); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); // execute a simple pass through program. PeriodicSourceGenerator generator = new PeriodicSourceGenerator( recordsPerSecond, duration, offset); DataStream<Tuple> rows = sEnv.addSource(generator); DataStream<Tuple> result = rows .keyBy(1) .timeWindow(Time.seconds(5)) .sum(0); result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE) .setParallelism(1); sEnv.execute(); }
@Test public void testProcessWindowState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .keyBy(new KeySelector<File, String>() { @Override public String getKey(File value) { return null; } }) .timeWindow(Time.milliseconds(1000)) .process(new ProcessWindowFunction<File, String, String, TimeWindow>() { @Override public void process(String s, Context ctx, Iterable<File> input, Collector<String> out) {} }); validateListStateDescriptorConfigured(result); }
private void runTest( SourceFunction<SessionEvent<Integer, TestEventPayload>> dataSource, WindowFunction<SessionEvent<Integer, TestEventPayload>, String, Tuple, TimeWindow> windowFunction) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); WindowedStream<SessionEvent<Integer, TestEventPayload>, Tuple, TimeWindow> windowedStream = env.addSource(dataSource).keyBy("sessionKey") .window(EventTimeSessionWindows.withGap(Time.milliseconds(MAX_SESSION_EVENT_GAP_MS))); if (ALLOWED_LATENESS_MS != Long.MAX_VALUE) { windowedStream = windowedStream.allowedLateness(Time.milliseconds(ALLOWED_LATENESS_MS)); } if (PURGE_WINDOW_ON_FIRE) { windowedStream = windowedStream.trigger(PurgingTrigger.of(EventTimeTrigger.create())); } windowedStream.apply(windowFunction).print(); JobExecutionResult result = env.execute(); // check that overall event counts match with our expectations. remember that late events within lateness will // each trigger a window! Assert.assertEquals( (LATE_EVENTS_PER_SESSION + 1) * NUMBER_OF_SESSIONS * EVENTS_PER_SESSION, (long) result.getAccumulatorResult(SESSION_COUNTER_ON_TIME_KEY)); Assert.assertEquals( NUMBER_OF_SESSIONS * (LATE_EVENTS_PER_SESSION * (LATE_EVENTS_PER_SESSION + 1) / 2), (long) result.getAccumulatorResult(SESSION_COUNTER_LATE_KEY)); }
@Test public void testApplyWindowState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .keyBy(new KeySelector<File, String>() { @Override public String getKey(File value) { return null; } }) .timeWindow(Time.milliseconds(1000)) .apply(new WindowFunction<File, String, String, TimeWindow>() { @Override public void apply(String s, TimeWindow window, Iterable<File> input, Collector<String> out) {} }); validateListStateDescriptorConfigured(result); }
@Test public void testReduceWindowState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .keyBy(new KeySelector<File, String>() { @Override public String getKey(File value) { return null; } }) .timeWindow(Time.milliseconds(1000)) .reduce(new ReduceFunction<File>() { @Override public File reduce(File value1, File value2) { return null; } }); validateStateDescriptorConfigured(result); }
@Test public void testFoldWindowState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<String> src = env.fromElements("abc"); SingleOutputStreamOperator<?> result = src .keyBy(new KeySelector<String, String>() { @Override public String getKey(String value) { return null; } }) .timeWindow(Time.milliseconds(1000)) .fold(new File("/"), new FoldFunction<String, File>() { @Override public File fold(File a, String e) { return null; } }); validateStateDescriptorConfigured(result); }
/** * .aggregate() does not support RichAggregateFunction, since the AggregateFunction is used internally * in an {@code AggregatingState}. */ @Test(expected = UnsupportedOperationException.class) public void testAggregateWithRichFunctionFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyRichAggregationFunction<Tuple2<String, Integer>>()); fail("exception was not thrown"); }
@Test(expected = NullPointerException.class) public void testFailsWithoutUpperBound() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne .keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), null); }
@Test(expected = NullPointerException.class) public void testFailsWithoutLowerBound() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne .keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(null, Time.milliseconds(1)); }
/** * This verifies that an event time source works when setting stream time characteristic to * processing time. In this case, the watermarks should just be swallowed. */ @Test public void testEventTimeSourceWithProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Integer> source1 = env.addSource(new MyTimestampSource(0, 10)); source1 .map(new IdentityMap()) .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(false)); env.execute(); // verify that we don't get any watermarks, the source is used as watermark source in // other tests, so it normally emits watermarks Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 0); }
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { private static final long serialVersionUID = -6448847205314995812L; @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
/** * .aggregate() does not support RichAggregateFunction, since the AggregationFunction is used internally * in a {@code AggregatingState}. */ @Test(expected = UnsupportedOperationException.class) public void testAggregateWithRichFunctionFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyRichAggregationFunction<Tuple2<String, Integer>>()); fail("exception was not thrown"); }
@Test public void testOneInputOperatorWithoutChaining() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(timeCharacteristic); env.setParallelism(1);