/** * A thin wrapper layer over {@link DataStream#flatMap(FlatMapFunction)}. * * @param flat_mapper The FlatMapFunction that is called for each element of the * DataStream * @return The transformed {@link PythonDataStream}. */ public PythonDataStream<SingleOutputStreamOperator<PyObject>> flat_map( FlatMapFunction<PyObject, Object> flat_mapper) throws IOException { return new PythonSingleOutputStreamOperator(stream.flatMap(new PythonFlatMapFunction(flat_mapper))); }
/** * Creates a data stream that contains the contents of file created while system watches the given path. The file * will be read with the system's default character set. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/") * @param intervalMillis * The interval of file watching in milliseconds * @param watchType * The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes * only * new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of * appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended * contents * of files. * @return The DataStream containing the given directory. * * @deprecated Use {@link #readFile(FileInputFormat, String, FileProcessingMode, long)} instead. */ @Deprecated @SuppressWarnings("deprecation") public DataStream<String> readFileStream(String filePath, long intervalMillis, FileMonitoringFunction.WatchType watchType) { DataStream<Tuple3<String, Long, Long>> source = addSource(new FileMonitoringFunction( filePath, intervalMillis, watchType), "Read File Stream source"); return source.flatMap(new FileReadFunction()); }
text.flatMap(new Tokenizer())
.flatMap(new FlatMapFunction<String, WordWithCount>() { @Override public void flatMap(String value, Collector<WordWithCount> out) {
text.flatMap(new WordCount.Tokenizer())
.flatMap(new SelectEnglishAndTokenizeFlatMap())
@Test public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); DataStream<Integer> input = env.fromElements(1, 2, 3); input.flatMap(new FlatMapFunction<Integer, Integer>() { @Override public void flatMap(Integer value, Collector<Integer> out) throws Exception { out.collect(value << 1); } }); env.execute(); } }
@Test public void testProgram() throws Exception { String resultPath = getTempDirPath("result"); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> text = env.fromElements(WordCountData.TEXT); DataStream<Tuple2<String, Integer>> counts = text .flatMap(new Tokenizer()) .keyBy(0).sum(1); counts.writeAsCsv(resultPath); env.execute("WriteAsCsvTest"); //Strip the parentheses from the expected text like output compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES .replaceAll("[\\\\(\\\\)]", ""), resultPath); }
source.flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {
/** * Test that ensures that DeserializationSchema.isEndOfStream() is properly evaluated. * * @throws Exception */ public void runEndOfStreamTest() throws Exception { final int elementCount = 300; final String topic = writeSequence("testEndOfStream", elementCount, 1, 1); // read using custom schema final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment(); env1.setParallelism(1); env1.getConfig().setRestartStrategy(RestartStrategies.noRestart()); env1.getConfig().disableSysoutLogging(); Properties props = new Properties(); props.putAll(standardProps); props.putAll(secureProps); DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new FixedNumberDeserializationSchema(elementCount), props)); fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() { @Override public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception { // noop ;) } }); tryExecute(env1, "Consume " + elementCount + " elements from Kafka"); deleteTestTopic(topic); }
@Test public void testProgram() throws Exception { String resultPath = getTempDirPath("result"); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> text = env.fromElements(WordCountData.TEXT); DataStream<Tuple2<String, Integer>> counts = text .flatMap(new Tokenizer()) .keyBy(0).sum(1); counts.writeAsText(resultPath); env.execute("WriteAsTextTest"); compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES, resultPath); }
.flatMap(new FlatMapFunction<Integer, String>() {
@Test public void testStreaming() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataStream<Integer> input = env.fromCollection(inputData); input .flatMap(new NotifyingMapper()) .writeUsingOutputFormat(new DummyOutputFormat()).disableChaining(); JobGraph jobGraph = env.getStreamGraph().getJobGraph(); submitJobAndVerifyResults(jobGraph); }
DataStream<Tuple2<byte[], PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, schema, props)); fromKafka.flatMap(new RichFlatMapFunction<Tuple2<byte[], PojoValue>, Object>() { long counter = 0; @Override
private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState( int parallelism, int maxParallelism, int fixedParallelism, int numberKeys, int numberElements, boolean terminateAfterEmission, int checkpointingInterval) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().setMaxParallelism(maxParallelism); env.enableCheckpointing(checkpointingInterval); env.setRestartStrategy(RestartStrategies.noRestart()); DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource( numberKeys, numberElements, terminateAfterEmission)) .setParallelism(fixedParallelism) .keyBy(new KeySelector<Integer, Integer>() { private static final long serialVersionUID = -7952298871120320940L; @Override public Integer getKey(Integer value) throws Exception { return value; } }); SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys); DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements)); result.addSink(new CollectionSink<Tuple2<Integer, Integer>>()); return env.getStreamGraph().getJobGraph(); }
props.putAll(secureProps); DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props)); fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() { long counter = 0; @Override
DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));
.flatMap(new FlatMapFunction<Integer, String>() { private static final long serialVersionUID = 1L;
@Override public void testProgram(StreamExecutionEnvironment env) { // set the restart strategy. env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0)); env.enableCheckpointing(10); // create and start the file creating thread. fc = new FileCreator(); fc.start(); // create the monitoring source along with the necessary readers. TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI)); format.setFilesFilter(FilePathFilter.createDefaultFilter()); DataStream<String> inputStream = env.readFile(format, localFsURI, FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL); TestingSinkFunction sink = new TestingSinkFunction(); inputStream.flatMap(new FlatMapFunction<String, String>() { @Override public void flatMap(String value, Collector<String> out) throws Exception { out.collect(value); } }).addSink(sink).setParallelism(1); }
source.flatMap(new TestFlatMap<Long, Long>()).print(); fail(); } catch (Exception ignored) {} source.flatMap(new TestFlatMap<Long, Long>()).returns(new TypeHint<Long>(){}).print(); source.connect(source).map(new TestCoMap<Long, Long, Integer>()).returns(BasicTypeInfo.INT_TYPE_INFO).print(); source.connect(source).flatMap(new TestCoFlatMap<Long, Long, Integer>())