private <OUT> DataStreamSource<OUT> createInput(InputFormat<OUT, ?> inputFormat, TypeInformation<OUT> typeInfo, String sourceName) { InputFormatSourceFunction<OUT> function = new InputFormatSourceFunction<>(inputFormat, typeInfo); return addSource(function, sourceName, typeInfo); }
@Override public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> sourceFunction) { this.sourceFunction = sourceFunction; return super.addSource(sourceFunction); }
private <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, TypeInformation<OUT> typeInfo, String operatorName) { return addSource(new FromSplittableIteratorFunction<>(iterator), operatorName, typeInfo); }
@Override public DataStream<Row> getDataStream(StreamExecutionEnvironment execEnv) { return execEnv.addSource(new Generator(numKeys, recordsPerKeyAndSecond, durationSeconds, offsetSeconds)); }
public static SingleOutputStreamOperator<Tuple2<Integer, Integer>> createIntegerTupleSource(StreamExecutionEnvironment env, ExecutionMode mode) { return env.addSource(new IntegerTupleSource(mode)); }
/** * Add a java source to the streaming topology. The source expected to be an java based * implementation (.e.g. Kafka connector). * * @param src A native java source (e.g. PythonFlinkKafkaConsumer09) * @return Python data stream */ public PythonDataStream add_java_source(SourceFunction<Object> src) { return new PythonDataStream<>(env.addSource(src).map(new AdapterMap<>())); }
/** * NOTE: This method is for internal use only for defining a TableSource. * Do not use it in Table API programs. */ @Override public DataStream<Row> getDataStream(StreamExecutionEnvironment env) { DeserializationSchema<Row> deserializationSchema = getDeserializationSchema(); // Version-specific Kafka consumer FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema); return env.addSource(kafkaConsumer).name(explainSource()); }
public static SingleOutputStreamOperator<Integer> createSource(StreamExecutionEnvironment env, ExecutionMode mode) { return env.addSource(new IntegerSource(mode)) .setParallelism(4); }
public PythonDataStream create_python_source(SourceFunction<Object> src) throws Exception { return new PythonDataStream<>(env.addSource(new PythonGeneratorFunction(src)).map(new AdapterMap<>())); }
/** * Creates a python data stream from the given iterator. * * <p>Note that this operation will result in a non-parallel data stream source, i.e., * a data stream source with a parallelism of one.</p> * * @param iter The iterator of elements to create the data stream from * @return The data stream representing the elements in the iterator * @see StreamExecutionEnvironment#fromCollection(java.util.Iterator, org.apache.flink.api.common.typeinfo.TypeInformation) */ public PythonDataStream from_collection(Iterator<Object> iter) throws Exception { return new PythonDataStream<>(env.addSource(new PythonIteratorFunction(iter), TypeExtractor.getForClass(Object.class)) .map(new AdapterMap<>())); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); final int numStates = pt.getInt(NUM_LIST_STATES_PER_OP.key(), NUM_LIST_STATES_PER_OP.defaultValue()); final int numPartitionsPerState = pt.getInt(NUM_PARTITIONS_PER_LIST_STATE.key(), NUM_PARTITIONS_PER_LIST_STATE.defaultValue()); Preconditions.checkState(env.getCheckpointInterval() > 0L, "Checkpointing must be enabled for this test!"); env.addSource(new SimpleEndlessSourceWithBloatedState(numStates, numPartitionsPerState)).setParallelism(env.getParallelism()) .addSink(new DiscardingSink<>()).setParallelism(1); env.execute("HeavyDeploymentStressTestProgram"); }
public static JobGraph stoppableJob(final StopJobSignal stopJobSignal) { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.addSource(new InfiniteSourceFunction(stopJobSignal)) .setParallelism(2) .shuffle() .addSink(new DiscardingSink<>()) .setParallelism(2); return env.getStreamGraph().getJobGraph(); }
@Test public void test() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.addSource(new TestSource()).map(new TestMap()).addSink(new DiscardingSink<Integer>()); env.execute(); assertNotEquals(srcContext, mapContext); }
/** * Tests that a changed operator name does not affect the hash. */ @Test public void testChangedOperatorName() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.addSource(new NoOpSourceFunction(), "A").map(new NoOpMapFunction()); JobGraph jobGraph = env.getStreamGraph().getJobGraph(); JobVertexID expected = jobGraph.getVerticesAsArray()[0].getID(); env = StreamExecutionEnvironment.createLocalEnvironment(); env.addSource(new NoOpSourceFunction(), "B").map(new NoOpMapFunction()); jobGraph = env.getStreamGraph().getJobGraph(); JobVertexID actual = jobGraph.getVerticesAsArray()[0].getID(); assertEquals(expected, actual); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final String checkpointDir = pt.getRequired("checkpoint.dir"); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStateBackend(new FsStateBackend(checkpointDir)); env.setRestartStrategy(RestartStrategies.noRestart()); env.enableCheckpointing(1000L); env.getConfig().disableGenericTypes(); env.addSource(new MySource()).uid("my-source") .keyBy(anInt -> 0) .map(new MyStatefulFunction()).uid("my-map") .addSink(new DiscardingSink<>()).uid("my-sink"); env.execute(); } }
/** * Tests that a manual hash at the beginning of a chain is accepted. */ @Test public void testManualHashAssignmentForStartNodeInInChain() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.addSource(new NoOpSourceFunction()).uid("source") .map(new NoOpMapFunction()) .addSink(new NoOpSinkFunction()); env.getStreamGraph().getJobGraph(); }
/** * Tests that a manual hash for an intermediate chain node is accepted. */ @Test public void testManualHashAssignmentForIntermediateNodeInChain() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.addSource(new NoOpSourceFunction()) // Intermediate chained node .map(new NoOpMapFunction()).uid("map") .addSink(new NoOpSinkFunction()); env.getStreamGraph().getJobGraph(); }
private static void runPartitioningProgram(int parallelism) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().enableObjectReuse(); env.setBufferTimeout(5L); env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE); env .addSource(new TimeStampingSource()) .map(new IdMapper<Tuple2<Long, Long>>()) .keyBy(0) .addSink(new TimestampingSink()); env.execute("Partitioning Program"); }
/** * Tests that a collision on the manual hash throws an Exception. */ @Test(expected = IllegalArgumentException.class) public void testManualHashAssignmentCollisionThrowsException() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.disableOperatorChaining(); env.addSource(new NoOpSourceFunction()).uid("source") .map(new NoOpMapFunction()).uid("source") // Collision .addSink(new NoOpSinkFunction()); // This call is necessary to generate the job graph env.getStreamGraph().getJobGraph(); }
@Test public void testUserProvidedHashingOnChainSupported() { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.addSource(new NoOpSourceFunction(), "src").setUidHash("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") .map(new NoOpMapFunction()).setUidHash("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb") .filter(new NoOpFilterFunction()).setUidHash("cccccccccccccccccccccccccccccccc") .keyBy(new NoOpKeySelector()) .reduce(new NoOpReduceFunction()).name("reduce").setUidHash("dddddddddddddddddddddddddddddddd"); env.getStreamGraph().getJobGraph(); }