org.apache.flink.streaming.api.datastream.DataStream.flatMap java code examples

/**
 * A thin wrapper layer over {@link DataStream#flatMap(FlatMapFunction)}.
 *
 * @param flat_mapper The FlatMapFunction that is called for each element of the
 * DataStream
 * @return The transformed {@link PythonDataStream}.
 */
public PythonDataStream<SingleOutputStreamOperator<PyObject>> flat_map(
  FlatMapFunction<PyObject, Object> flat_mapper) throws IOException {
  return new PythonSingleOutputStreamOperator(stream.flatMap(new PythonFlatMapFunction(flat_mapper)));
}

/**
 * Creates a data stream that contains the contents of file created while system watches the given path. The file
 * will be read with the system's default character set.
 *
 * @param filePath
 *         The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/")
 * @param intervalMillis
 *         The interval of file watching in milliseconds
 * @param watchType
 *         The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes
 *         only
 *         new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of
 *         appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended
 *         contents
 *         of files.
 * @return The DataStream containing the given directory.
 *
 * @deprecated Use {@link #readFile(FileInputFormat, String, FileProcessingMode, long)} instead.
 */
@Deprecated
@SuppressWarnings("deprecation")
public DataStream<String> readFileStream(String filePath, long intervalMillis, FileMonitoringFunction.WatchType watchType) {
  DataStream<Tuple3<String, Long, Long>> source = addSource(new FileMonitoringFunction(
      filePath, intervalMillis, watchType), "Read File Stream source");
  return source.flatMap(new FileReadFunction());
}

text.flatMap(new Tokenizer())

.flatMap(new FlatMapFunction<String, WordWithCount>() {
  @Override
  public void flatMap(String value, Collector<WordWithCount> out) {

text.flatMap(new WordCount.Tokenizer())

.flatMap(new SelectEnglishAndTokenizeFlatMap())

  @Test
  public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.getConfig().enableObjectReuse();
    DataStream<Integer> input = env.fromElements(1, 2, 3);
    input.flatMap(new FlatMapFunction<Integer, Integer>() {
      @Override
      public void flatMap(Integer value, Collector<Integer> out) throws Exception {
        out.collect(value << 1);
      }
    });
    env.execute();
  }
}

@Test
public void testProgram() throws Exception {
  String resultPath = getTempDirPath("result");
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  DataStream<String> text = env.fromElements(WordCountData.TEXT);
  DataStream<Tuple2<String, Integer>> counts = text
      .flatMap(new Tokenizer())
      .keyBy(0).sum(1);
  counts.writeAsCsv(resultPath);
  env.execute("WriteAsCsvTest");
  //Strip the parentheses from the expected text like output
  compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES
      .replaceAll("[\\\\(\\\\)]", ""), resultPath);
}

source.flatMap(new RichFlatMapFunction<Tuple2<Integer, Integer>, Integer>() {

/**
 * Test that ensures that DeserializationSchema.isEndOfStream() is properly evaluated.
 *
 * @throws Exception
 */
public void runEndOfStreamTest() throws Exception {
  final int elementCount = 300;
  final String topic = writeSequence("testEndOfStream", elementCount, 1, 1);
  // read using custom schema
  final StreamExecutionEnvironment env1 = StreamExecutionEnvironment.getExecutionEnvironment();
  env1.setParallelism(1);
  env1.getConfig().setRestartStrategy(RestartStrategies.noRestart());
  env1.getConfig().disableSysoutLogging();
  Properties props = new Properties();
  props.putAll(standardProps);
  props.putAll(secureProps);
  DataStream<Tuple2<Integer, Integer>> fromKafka = env1.addSource(kafkaServer.getConsumer(topic, new FixedNumberDeserializationSchema(elementCount), props));
  fromKafka.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Void>() {
    @Override
    public void flatMap(Tuple2<Integer, Integer> value, Collector<Void> out) throws Exception {
      // noop ;)
    }
  });
  tryExecute(env1, "Consume " + elementCount + " elements from Kafka");
  deleteTestTopic(topic);
}

@Test
public void testProgram() throws Exception {
  String resultPath = getTempDirPath("result");
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  DataStream<String> text = env.fromElements(WordCountData.TEXT);
  DataStream<Tuple2<String, Integer>> counts = text
      .flatMap(new Tokenizer())
      .keyBy(0).sum(1);
  counts.writeAsText(resultPath);
  env.execute("WriteAsTextTest");
  compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES, resultPath);
}

.flatMap(new FlatMapFunction<Integer, String>() {

@Test
public void testStreaming() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(1);
  DataStream<Integer> input = env.fromCollection(inputData);
  input
      .flatMap(new NotifyingMapper())
      .writeUsingOutputFormat(new DummyOutputFormat()).disableChaining();
  JobGraph jobGraph = env.getStreamGraph().getJobGraph();
  submitJobAndVerifyResults(jobGraph);
}

DataStream<Tuple2<byte[], PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, schema, props));
fromKafka.flatMap(new RichFlatMapFunction<Tuple2<byte[], PojoValue>, Object>() {
  long counter = 0;
  @Override

private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState(
    int parallelism,
    int maxParallelism,
    int fixedParallelism,
    int numberKeys,
    int numberElements,
    boolean terminateAfterEmission,
    int checkpointingInterval) {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.getConfig().setMaxParallelism(maxParallelism);
  env.enableCheckpointing(checkpointingInterval);
  env.setRestartStrategy(RestartStrategies.noRestart());
  DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource(
      numberKeys,
      numberElements,
      terminateAfterEmission))
      .setParallelism(fixedParallelism)
      .keyBy(new KeySelector<Integer, Integer>() {
        private static final long serialVersionUID = -7952298871120320940L;
        @Override
        public Integer getKey(Integer value) throws Exception {
          return value;
        }
      });
  SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);
  DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));
  result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());
  return env.getStreamGraph().getJobGraph();
}

props.putAll(secureProps);
DataStream<Tuple2<Long, PojoValue>> fromKafka = env.addSource(kafkaServer.getConsumer(topic, readSchema, props));
fromKafka.flatMap(new RichFlatMapFunction<Tuple2<Long, PojoValue>, Object>() {
  long counter = 0;
  @Override

DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));

.flatMap(new FlatMapFunction<Integer, String>() {
  private static final long serialVersionUID = 1L;

@Override
public void testProgram(StreamExecutionEnvironment env) {
  // set the restart strategy.
  env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(NO_OF_RETRIES, 0));
  env.enableCheckpointing(10);
  // create and start the file creating thread.
  fc = new FileCreator();
  fc.start();
  // create the monitoring source along with the necessary readers.
  TextInputFormat format = new TextInputFormat(new org.apache.flink.core.fs.Path(localFsURI));
  format.setFilesFilter(FilePathFilter.createDefaultFilter());
  DataStream<String> inputStream = env.readFile(format, localFsURI,
    FileProcessingMode.PROCESS_CONTINUOUSLY, INTERVAL);
  TestingSinkFunction sink = new TestingSinkFunction();
  inputStream.flatMap(new FlatMapFunction<String, String>() {
    @Override
    public void flatMap(String value, Collector<String> out) throws Exception {
      out.collect(value);
    }
  }).addSink(sink).setParallelism(1);
}

  source.flatMap(new TestFlatMap<Long, Long>()).print();
  fail();
} catch (Exception ignored) {}
source.flatMap(new TestFlatMap<Long, Long>()).returns(new TypeHint<Long>(){}).print();
source.connect(source).map(new TestCoMap<Long, Long, Integer>()).returns(BasicTypeInfo.INT_TYPE_INFO).print();
source.connect(source).flatMap(new TestCoFlatMap<Long, Long, Integer>())

Javadoc

Applies a FlatMap transformation on a DataStream. The transformation calls a FlatMapFunction for each element of the DataStream. Each FlatMapFunction call can return any number of elements including none. The user can also extend RichFlatMapFunction to gain access to other features provided by the org.apache.flink.api.common.functions.RichFunction interface.

Popular methods of DataStream

addSink
Adds the given sink to this DataStream. Only streams with sinks added will be executed once the Stre
keyBy
Partitions the operator state of a DataStream using field expressions. A field expression is either
map
Applies a Map transformation on a DataStream. The transformation calls a MapFunction for each elemen
getType
Gets the type of the stream.
union
Creates a new DataStream by merging DataStream outputs of the same type with each other. The DataStr
print
Writes a DataStream to the standard output stream (stdout).For each element of the DataStream the re
transform
Method for passing user defined operators along with the type information that will transform the Da
getExecutionEnvironment
Returns the StreamExecutionEnvironment that was used to create this DataStream.
getTransformation
Returns the StreamTransformation that represents the operation that logically creates this DataStrea
rebalance
Sets the partitioning of the DataStream so that the output elements are distributed evenly to instan
writeAsText
Writes a DataStream to the file specified by path in text format.For every element of the DataStream
broadcast
Sets the partitioning of the DataStream so that the output elements are broadcasted to every paralle

Popular in Java

Start an intent from android
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
onCreateOptionsMenu (Activity)
notifyDataSetChanged (ArrayAdapter)
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
JLabel (javax.swing)
Github Copilot alternatives

How to use flatMapmethodin org.apache.flink.streaming.api.datastream.DataStream

Best Java code snippets using org.apache.flink.streaming.api.datastream.DataStream.flatMap (Showing top 20 results out of 315)

How to use
flatMap
method
in
org.apache.flink.streaming.api.datastream.DataStream