org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.addSource java code examples

private <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat,
                        TypeInformation<OUT> typeInfo,
                        String sourceName) {
  InputFormatSourceFunction<OUT> function = new InputFormatSourceFunction<>(inputFormat, typeInfo);
  return addSource(function, sourceName, typeInfo);
}

@Override
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> sourceFunction) {
  this.sourceFunction = sourceFunction;
  return super.addSource(sourceFunction);
}

private <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, TypeInformation<OUT>
    typeInfo, String operatorName) {
  return addSource(new FromSplittableIteratorFunction<>(iterator), operatorName, typeInfo);
}

@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment execEnv) {
  return execEnv.addSource(new Generator(numKeys, recordsPerKeyAndSecond, durationSeconds, offsetSeconds));
}

public static SingleOutputStreamOperator<Tuple2<Integer, Integer>> createIntegerTupleSource(StreamExecutionEnvironment env, ExecutionMode mode) {
  return env.addSource(new IntegerTupleSource(mode));
}

/**
 * Add a java source to the streaming topology. The source expected to be an java based
 * implementation (.e.g. Kafka connector).
 *
 * @param src A native java source (e.g. PythonFlinkKafkaConsumer09)
 * @return Python data stream
 */
public PythonDataStream add_java_source(SourceFunction<Object> src) {
  return new PythonDataStream<>(env.addSource(src).map(new AdapterMap<>()));
}

/**
 * NOTE: This method is for internal use only for defining a TableSource.
 *       Do not use it in Table API programs.
 */
@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {
  DeserializationSchema<Row> deserializationSchema = getDeserializationSchema();
  // Version-specific Kafka consumer
  FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema);
  return env.addSource(kafkaConsumer).name(explainSource());
}

public static SingleOutputStreamOperator<Integer> createSource(StreamExecutionEnvironment env, ExecutionMode mode) {
  return env.addSource(new IntegerSource(mode))
    .setParallelism(4);
}

public PythonDataStream create_python_source(SourceFunction<Object> src) throws Exception {
  return new PythonDataStream<>(env.addSource(new PythonGeneratorFunction(src)).map(new AdapterMap<>()));
}

/**
 * Creates a python data stream from the given iterator.
 *
 * <p>Note that this operation will result in a non-parallel data stream source, i.e.,
 * a data stream source with a parallelism of one.</p>
 *
 * @param iter The iterator of elements to create the data stream from
 * @return The data stream representing the elements in the iterator
 * @see StreamExecutionEnvironment#fromCollection(java.util.Iterator, org.apache.flink.api.common.typeinfo.TypeInformation)
 */
public PythonDataStream from_collection(Iterator<Object> iter) throws Exception {
  return new PythonDataStream<>(env.addSource(new PythonIteratorFunction(iter), TypeExtractor.getForClass(Object.class))
    .map(new AdapterMap<>()));
}

public static void main(String[] args) throws Exception {
  final ParameterTool pt = ParameterTool.fromArgs(args);
  final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  setupEnvironment(env, pt);
  final int numStates =
    pt.getInt(NUM_LIST_STATES_PER_OP.key(), NUM_LIST_STATES_PER_OP.defaultValue());
  final int numPartitionsPerState =
    pt.getInt(NUM_PARTITIONS_PER_LIST_STATE.key(), NUM_PARTITIONS_PER_LIST_STATE.defaultValue());
  Preconditions.checkState(env.getCheckpointInterval() > 0L, "Checkpointing must be enabled for this test!");
  env.addSource(new SimpleEndlessSourceWithBloatedState(numStates, numPartitionsPerState)).setParallelism(env.getParallelism())
    .addSink(new DiscardingSink<>()).setParallelism(1);
  env.execute("HeavyDeploymentStressTestProgram");
}

public static JobGraph stoppableJob(final StopJobSignal stopJobSignal) {
  final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.addSource(new InfiniteSourceFunction(stopJobSignal))
    .setParallelism(2)
    .shuffle()
    .addSink(new DiscardingSink<>())
    .setParallelism(2);
  return env.getStreamGraph().getJobGraph();
}

@Test
public void test() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(1);
  env.addSource(new TestSource()).map(new TestMap()).addSink(new DiscardingSink<Integer>());
  env.execute();
  assertNotEquals(srcContext, mapContext);
}

/**
 * Tests that a changed operator name does not affect the hash.
 */
@Test
public void testChangedOperatorName() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.addSource(new NoOpSourceFunction(), "A").map(new NoOpMapFunction());
  JobGraph jobGraph = env.getStreamGraph().getJobGraph();
  JobVertexID expected = jobGraph.getVerticesAsArray()[0].getID();
  env = StreamExecutionEnvironment.createLocalEnvironment();
  env.addSource(new NoOpSourceFunction(), "B").map(new NoOpMapFunction());
  jobGraph = env.getStreamGraph().getJobGraph();
  JobVertexID actual = jobGraph.getVerticesAsArray()[0].getID();
  assertEquals(expected, actual);
}

  public static void main(String[] args) throws Exception {
    final ParameterTool pt = ParameterTool.fromArgs(args);
    final String checkpointDir = pt.getRequired("checkpoint.dir");

    final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.setStateBackend(new FsStateBackend(checkpointDir));
    env.setRestartStrategy(RestartStrategies.noRestart());
    env.enableCheckpointing(1000L);
    env.getConfig().disableGenericTypes();

    env.addSource(new MySource()).uid("my-source")
        .keyBy(anInt -> 0)
        .map(new MyStatefulFunction()).uid("my-map")
        .addSink(new DiscardingSink<>()).uid("my-sink");
    env.execute();
  }
}

/**
 * Tests that a manual hash at the beginning of a chain is accepted.
 */
@Test
public void testManualHashAssignmentForStartNodeInInChain() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.setParallelism(4);
  env.addSource(new NoOpSourceFunction()).uid("source")
      .map(new NoOpMapFunction())
      .addSink(new NoOpSinkFunction());
  env.getStreamGraph().getJobGraph();
}

/**
 * Tests that a manual hash for an intermediate chain node is accepted.
 */
@Test
public void testManualHashAssignmentForIntermediateNodeInChain() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.setParallelism(4);
  env.addSource(new NoOpSourceFunction())
      // Intermediate chained node
      .map(new NoOpMapFunction()).uid("map")
      .addSink(new NoOpSinkFunction());
  env.getStreamGraph().getJobGraph();
}

private static void runPartitioningProgram(int parallelism) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.getConfig().enableObjectReuse();
  env.setBufferTimeout(5L);
  env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);
  env
    .addSource(new TimeStampingSource())
    .map(new IdMapper<Tuple2<Long, Long>>())
    .keyBy(0)
    .addSink(new TimestampingSink());
  env.execute("Partitioning Program");
}

/**
 * Tests that a collision on the manual hash throws an Exception.
 */
@Test(expected = IllegalArgumentException.class)
public void testManualHashAssignmentCollisionThrowsException() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.setParallelism(4);
  env.disableOperatorChaining();
  env.addSource(new NoOpSourceFunction()).uid("source")
      .map(new NoOpMapFunction()).uid("source") // Collision
      .addSink(new NoOpSinkFunction());
  // This call is necessary to generate the job graph
  env.getStreamGraph().getJobGraph();
}

@Test
public void testUserProvidedHashingOnChainSupported() {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.addSource(new NoOpSourceFunction(), "src").setUidHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
      .map(new NoOpMapFunction()).setUidHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb")
      .filter(new NoOpFilterFunction()).setUidHash("cccccccccccccccccccccccccccccccc")
      .keyBy(new NoOpKeySelector())
      .reduce(new NoOpReduceFunction()).name("reduce").setUidHash("dddddddddddddddddddddddddddddddd");
  env.getStreamGraph().getJobGraph();
}

Javadoc

Adds a Data Source to the streaming topology.

By default sources have a parallelism of 1. To enable parallel execution, the user defined source should implement org.apache.flink.streaming.api.functions.source.ParallelSourceFunction or extend org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction. In these cases the resulting source will have the parallelism of the environment. To change this afterwards call org.apache.flink.streaming.api.datastream.DataStreamSource#setParallelism(int)

Popular methods of StreamExecutionEnvironment

execute
getExecutionEnvironment
Creates an execution environment that represents the context in which the program is currently execu
getConfig
Gets the config object.
enableCheckpointing
Enables checkpointing for the streaming job. The distributed state of the streaming dataflow will be
setStreamTimeCharacteristic
Sets the time characteristic for all streams create from this environment, e.g., processing time, ev
setParallelism
Sets the parallelism for operations executed through this environment. Setting a parallelism of x he
fromElements
Creates a new data stream that contains the given elements. The elements must all be of the same typ
setStateBackend
Sets the state backend that describes how to store and checkpoint operator state. It defines both wh
createLocalEnvironment
Creates a LocalStreamEnvironment. The local execution environment will run the program in a multi-th
fromCollection
Creates a data stream from the given iterator.Because the iterator will remain unmodified until the
getCheckpointConfig
Gets the checkpoint config, which defines values like checkpoint interval, delay between checkpoints
getParallelism
Gets the parallelism with which operation are executed by default. Operations can individually overr

Popular in Java

Making http requests using okhttp
getSystemService (Context)
notifyDataSetChanged (ArrayAdapter)
getResourceAsStream (ClassLoader)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Project (org.apache.tools.ant)
Central representation of an Ant project. This class defines an Ant project with all of its targets,
From CI to AI: The AI layer in your organization

How to use addSourcemethodin org.apache.flink.streaming.api.environment.StreamExecutionEnvironment

Best Java code snippets using org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.addSource (Showing top 20 results out of 540)

How to use
addSource
method
in
org.apache.flink.streaming.api.environment.StreamExecutionEnvironment