@Override public DataStreamSink<Row> addSink(SinkFunction<Row> sinkFunction) { this.sinkFunction = sinkFunction; return super.addSink(sinkFunction); } }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); int recordsPerSecond = params.getInt("recordsPerSecond", 10); int duration = params.getInt("durationInSecond", 60); int offset = params.getInt("offsetInSecond", 0); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); sEnv.enableCheckpointing(4000); sEnv.getConfig().setAutoWatermarkInterval(1000); // execute a simple pass through program. PeriodicSourceGenerator generator = new PeriodicSourceGenerator( recordsPerSecond, duration, offset); DataStream<Tuple> rows = sEnv.addSource(generator); DataStream<Tuple> result = rows .keyBy(1) .timeWindow(Time.seconds(5)) .sum(0); result.writeAsText(outputPath + "/result.txt", FileSystem.WriteMode.OVERWRITE) .setParallelism(1); sEnv.execute(); }
@Test(expected = UnsupportedOperationException.class) public void testForwardFailsLowToHighParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> src = env.fromElements(1, 2, 3); // this doesn't work because it goes from 1 to 3 src.forward().map(new NoOpIntMap()); env.execute(); }
/** * Partitions the operator state of a {@link DataStream} using field expressions. * A field expression is either the name of a public field or a getter method with parentheses * of the {@link DataStream}'s underlying type. A dot can be used to drill * down into objects, as in {@code "field1.getInnerField2()" }. * * @param fields * One or more field expressions on which the state of the {@link DataStream} operators will be * partitioned. * @return The {@link DataStream} with partitioned state (i.e. KeyedStream) **/ public KeyedStream<T, Tuple> keyBy(String... fields) { return keyBy(new Keys.ExpressionKeys<>(fields, getType())); }
/** * Applies a Map transformation on a {@link DataStream}. The transformation * calls a {@link MapFunction} for each element of the DataStream. Each * MapFunction call returns exactly one element. The user can also extend * {@link RichMapFunction} to gain access to other features provided by the * {@link org.apache.flink.api.common.functions.RichFunction} interface. * * @param mapper * The MapFunction that is called for each element of the * DataStream. * @param <R> * output type * @return The transformed {@link DataStream}. */ public <R> SingleOutputStreamOperator<R> map(MapFunction<T, R> mapper) { TypeInformation<R> outType = TypeExtractor.getMapReturnTypes(clean(mapper), getType(), Utils.getCallLocationName(), true); return transform("Map", outType, new StreamMap<>(clean(mapper))); }
/** * Applies a FlatMap transformation on a {@link DataStream}. The * transformation calls a {@link FlatMapFunction} for each element of the * DataStream. Each FlatMapFunction call can return any number of elements * including none. The user can also extend {@link RichFlatMapFunction} to * gain access to other features provided by the * {@link org.apache.flink.api.common.functions.RichFunction} interface. * * @param flatMapper * The FlatMapFunction that is called for each element of the * DataStream * * @param <R> * output type * @return The transformed {@link DataStream}. */ public <R> SingleOutputStreamOperator<R> flatMap(FlatMapFunction<T, R> flatMapper) { TypeInformation<R> outType = TypeExtractor.getFlatMapReturnTypes(clean(flatMapper), getType(), Utils.getCallLocationName(), true); return transform("Flat Map", outType, new StreamFlatMap<>(clean(flatMapper))); }
@Test(expected = NullPointerException.class) public void testFailsWithoutUpperBound() { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); DataStream<Tuple2<String, Integer>> streamOne = env.fromElements(Tuple2.of("1", 1)); DataStream<Tuple2<String, Integer>> streamTwo = env.fromElements(Tuple2.of("1", 1)); streamOne .keyBy(new Tuple2KeyExtractor()) .intervalJoin(streamTwo.keyBy(new Tuple2KeyExtractor())) .between(Time.milliseconds(0), null); }
/** * This verifies that an event time source works when setting stream time characteristic to * processing time. In this case, the watermarks should just be swallowed. */ @Test public void testEventTimeSourceWithProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Integer> source1 = env.addSource(new MyTimestampSource(0, 10)); source1 .map(new IdentityMap()) .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(false)); env.execute(); // verify that we don't get any watermarks, the source is used as watermark source in // other tests, so it normally emits watermarks Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 0); }
@Test @SuppressWarnings({"rawtypes", "unchecked"}) public void testFoldWithEvictor() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple3<String, String, Integer>> window1 = source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .evictor(CountEvictor.of(100)) .fold(new Tuple3<>("", "", 1), new DummyFolder()); OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof EvictingWindowOperator); EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?> winOperator = (EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getEvictor() instanceof CountEvictor); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); winOperator.setOutputType((TypeInformation) window1.getType(), new ExecutionConfig()); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
int checkpointingInterval) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); if (0 < maxParallelism) { env.getConfig().setMaxParallelism(maxParallelism); env.enableCheckpointing(checkpointingInterval); env.setRestartStrategy(RestartStrategies.noRestart()); env.getConfig().setUseSnapshotCompression(true); DataStream<Integer> input = env.addSource(new SubtaskIndexSource( DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements)); result.addSink(new CollectionSink<Tuple2<Integer, Integer>>()); return env.getStreamGraph().getJobGraph();
protected void testProgramWithBackend(AbstractStateBackend stateBackend) throws Exception { assertEquals("Broken test setup", 0, (NUM_STRINGS / 2) % NUM_KEYS); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(PARALLELISM); env.enableCheckpointing(500); env.getConfig().disableSysoutLogging(); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, 0L)); new IntGeneratingSourceFunction(NUM_STRINGS / 2, NUM_STRINGS / 4)); stream1.union(stream2) .keyBy(new IdentityKeySelector<Integer>()) .map(new OnceFailingPartitionedSum(failurePos)) .keyBy(0)
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); env.setParallelism(params.getInt("parallelism", 1)); .flatMap(new SelectEnglishAndTokenizeFlatMap()) tweets.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); tweets.print();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setGlobalJobParameters(params); if (params.has("input")) { text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WordCount example with default input data set."); text.flatMap(new Tokenizer()) counts.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); counts.print();
/** * Creates a streaming JobGraph from the StreamEnvironment. */ private JobGraph createJobGraph( int parallelism, int numberOfRetries, long restartDelay) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.disableOperatorChaining(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay)); env.getConfig().disableSysoutLogging(); DataStream<Integer> stream = env .addSource(new InfiniteTestSource()) .shuffle() .map(new StatefulCounter()); stream.addSink(new DiscardingSink<>()); return env.getStreamGraph().getJobGraph(); }
/** * Tests that the KeyGroupStreamPartitioner are properly set up with the correct value of * maximum parallelism. */ @Test public void testSetupOfKeyGroupPartitioner() { int maxParallelism = 42; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().setMaxParallelism(maxParallelism); DataStream<Integer> source = env.fromElements(1, 2, 3); DataStream<Integer> keyedResult = source.keyBy(value -> value).map(new NoOpIntMap()); keyedResult.addSink(new DiscardingSink<>()); StreamGraph graph = env.getStreamGraph(); StreamNode keyedResultNode = graph.getStreamNode(keyedResult.getId()); StreamPartitioner<?> streamPartitioner = keyedResultNode.getInEdges().get(0).getPartitioner(); }
public static void main(String[] args) throws Exception { // Checking input parameters final ParameterTool params = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Integer> trainingData = env.addSource(new FiniteTrainingDataSource()); DataStream<Integer> newData = env.addSource(new FiniteNewDataSource()); // build new model on every second of new data DataStream<Double[]> model = trainingData .assignTimestampsAndWatermarks(new LinearTimestamp()) .timeWindowAll(Time.of(5000, TimeUnit.MILLISECONDS)) .apply(new PartialModelBuilder()); // use partial model for newData DataStream<Integer> prediction = newData.connect(model).map(new Predictor()); // emit result if (params.has("output")) { prediction.writeAsText(params.get("output")); } else { System.out.println("Printing result to stdout. Use --output to specify output path."); prediction.print(); } // execute program env.execute("Streaming Incremental Learning"); }
@Test public void testNestedPojoFieldAccessor() throws Exception { StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.getConfig().disableObjectReuse(); see.setParallelism(4); DataStream<Data> dataStream = see.fromCollection(elements); DataStream<Data> summedStream = dataStream .keyBy("aaa") .sum("stats.count") .keyBy("aaa") .flatMap(new FlatMapFunction<Data, Data>() { Data[] first = new Data[3]; @Override public void flatMap(Data value, Collector<Data> out) throws Exception { if (first[value.aaa] == null) { first[value.aaa] = value; if (value.stats.count != 123) { throw new RuntimeException("Expected stats.count to be 123"); } } else { if (value.stats.count != 2 * 123) { throw new RuntimeException("Expected stats.count to be 2 * 123"); } } } }); summedStream.print(); see.execute(); }
TestListResultSink<Integer> splitterResultSink2 = new TestListResultSink<Integer>(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.setBufferTimeout(1); DataStream<Integer> d1 = env.fromElements(0, 2, 4, 6, 8); DataStream<Integer> d2 = env.fromElements(1, 3, 5, 7, 9); d1 = d1.union(d2); d1.split(new OutputSelector<Integer>() { private static final long serialVersionUID = 8354166915727490130L; }).select(">").addSink(splitterResultSink1); d1.split(new OutputSelector<Integer>() { private static final long serialVersionUID = -6822487543355994807L; }).select("yes").addSink(splitterResultSink2); env.execute();
public static void main(String[] args) throws Exception { // parse the parameters final ParameterTool params = ParameterTool.fromArgs(args); final long windowSize = params.getLong("windowSize", 2000); final long rate = params.getLong("rate", 3L); System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate); System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]"); // obtain execution environment, run this example in "ingestion time" StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create the data sources for both grades and salaries DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate); DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate); // run the actual window join program // for testability, this functionality is in a separate method. DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize); // print the results with a single thread, rather than in parallel joinedStream.print().setParallelism(1); // execute program env.execute("Windowed Join Example"); }
@Test public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); DataStream<Integer> input = env.fromElements(1, 2, 3); input.flatMap(new FlatMapFunction<Integer, Integer>() { @Override public void flatMap(Integer value, Collector<Integer> out) throws Exception { out.collect(value << 1); } }); env.execute(); } }