org.apache.flink.streaming.api.datastream java code examples

/**
 * KeyBy operation for connected data stream. Assigns keys to the elements of
 * input1 and input2 using keySelector1 and keySelector2.
 *
 * @param keySelector1
 *            The {@link KeySelector} used for grouping the first input
 * @param keySelector2
 *            The {@link KeySelector} used for grouping the second input
 * @return The partitioned {@link ConnectedStreams}
 */
public ConnectedStreams<IN1, IN2> keyBy(KeySelector<IN1, ?> keySelector1, KeySelector<IN2, ?> keySelector2) {
  return new ConnectedStreams<>(environment, inputStream1.keyBy(keySelector1),
      inputStream2.keyBy(keySelector2));
}

/**
 * Applies an aggregation that gives the maximum element of every window of
 * the data stream by the given position. If more elements have the same
 * maximum value the operator returns the first by default.
 *
 * @param positionToMaxBy
 *            The position to maximize by
 * @return The transformed DataStream.
 */
public SingleOutputStreamOperator<T> maxBy(int positionToMaxBy) {
  return this.maxBy(positionToMaxBy, true);
}

/**
 * Creates a join operation. See {@link JoinedStreams} for an example of how the keys
 * and window can be specified.
 */
public <T2> JoinedStreams<T, T2> join(DataStream<T2> otherStream) {
  return new JoinedStreams<>(this, otherStream);
}

public static SingleOutputStreamOperator<Integer> createWindowFunction(ExecutionMode mode, DataStream<Tuple2<Integer, Integer>> input) {
  return input
    .keyBy(0)
    .countWindow(1)
    .apply(new StatefulWindowFunction(mode))
    .setParallelism(4)
    .uid("window");
}

  private static void addSmallBoundedJob(StreamExecutionEnvironment env, int parallelism) {
    DataStream<Long> stream = env.generateSequence(1, 100).setParallelism(parallelism);

    stream
        .filter(ignored -> false).setParallelism(parallelism)
          .startNewChain()
          .print().setParallelism(parallelism);
  }
}

public static SingleOutputStreamOperator<Integer> createSecondStatefulMap(ExecutionMode mode, DataStream<Integer> input) {
  return input
    .map(new StatefulStringStoringMap(mode, "second"))
    .setParallelism(4)
    .uid("second");
}

@Test(expected = UnsupportedOperationException.class)
public void testDifferingParallelism() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10)
      .map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2));
}

@Test(expected = UnsupportedOperationException.class)
public void testCoDifferingParallelism() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
      Integer.class);
  coIter.closeWith(coIter.map(noOpIntCoMap).setParallelism(parallelism / 2));
}

@Test
public void testDelegateToCoGrouped() {
  Time lateness = Time.milliseconds(42L);
  JoinedStreams.WithWindow<String, String, String, TimeWindow> withLateness = dataStream1
    .join(dataStream2)
    .where(keySelector)
    .equalTo(keySelector)
    .window(tsAssigner)
    .allowedLateness(lateness);
  withLateness.apply(joinFunction, BasicTypeInfo.STRING_TYPE_INFO);
  Assert.assertEquals(lateness.toMilliseconds(), withLateness.getCoGroupedWindowedStream().getAllowedLateness().toMilliseconds());
}

@Test
public void testDelegateToCoGrouped() {
  Time lateness = Time.milliseconds(42L);
  CoGroupedStreams.WithWindow<String, String, String, TimeWindow> withLateness = dataStream1
    .coGroup(dataStream2)
    .where(keySelector)
    .equalTo(keySelector)
    .window(tsAssigner)
    .allowedLateness(lateness);
  withLateness.apply(coGroupFunction, BasicTypeInfo.STRING_TYPE_INFO);
  Assert.assertEquals(lateness.toMilliseconds(), withLateness.getWindowedStream().getAllowedLateness());
}

@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  iter1.map(noOpIntMap).print();
  env.execute();
}

@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {
  // this test verifies that we cannot close an iteration with a DataStream that does not
  // have the iteration in its predecessors
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
      Integer.class);
  coIter.closeWith(iter1.map(noOpIntMap));
}

/**
 * Gets the type of the first input.
 *
 * @return The type of the first input
 */
public TypeInformation<IN1> getType1() {
  return inputStream1.getType();
}

  /**
   * A thin wrapper layer over {@link SingleOutputStreamOperator#name(String)} .
   *
   * @param name operator name
   * @return The named operator.
   */
  public PythonSingleOutputStreamOperator name(String name) {
    this.stream.name(name);
    return this;
  }
}

/**
 * Applies an aggregation that that gives the minimum value of every window
 * of the data stream at the given position.
 *
 * @param positionToMin The position to minimize
 * @return The transformed DataStream.
 */
public SingleOutputStreamOperator<T> min(int positionToMin) {
  return aggregate(new ComparableAggregator<>(positionToMin, input.getType(), AggregationFunction.AggregationType.MIN, input.getExecutionConfig()));
}

/**
 * Specifies a {@link KeySelector} for elements from the first input with explicit type information for the key type.
 *
 * @param keySelector The KeySelector to be used for extracting the first input's key for partitioning.
 * @param keyType The type information describing the key type.
 */
public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector, TypeInformation<KEY> keyType)  {
  requireNonNull(keySelector);
  requireNonNull(keyType);
  return new Where<>(input1.clean(keySelector), keyType);
}

/**
 * Specifies a {@link KeySelector} for elements from the first input with explicit type information.
 *
 * @param keySelector The KeySelector to be used for extracting the first input's key for partitioning.
 * @param keyType The type information describing the key type.
 */
public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector, TypeInformation<KEY> keyType)  {
  Preconditions.checkNotNull(keySelector);
  Preconditions.checkNotNull(keyType);
  return new Where<>(input1.clean(keySelector), keyType);
}

/**
 * Applies an aggregation that gives the maximum element of every window of
 * the data stream by the given field. If more elements have the same
 * maximum value the operator returns the first by default.
 *
 * @param field
 *            The field to maximize by
 * @return The transformed DataStream.
 */
public SingleOutputStreamOperator<T> maxBy(String field) {
  return this.maxBy(field, true);
}

/**
 * Applies an aggregation that gives the minimum element of every window of
 * the data stream by the given position. If more elements have the same
 * minimum value the operator returns the first element by default.
 *
 * @param positionToMinBy The position to minimize by
 * @return The transformed DataStream.
 */
public SingleOutputStreamOperator<T> minBy(String positionToMinBy) {
  return this.minBy(positionToMinBy, true);
}

  @Override
  public TypeSerializerSnapshot<TaggedUnion<T1, T2>> snapshotConfiguration() {
    return new UnionSerializerSnapshot<>(this);
  }
}

Most used classes

DataStream
A DataStream represents a stream of elements of the same type. A DataStream can be transformed into
DataStreamSource
The DataStreamSource represents the starting point of a DataStream.
SingleOutputStreamOperator
SingleOutputStreamOperator represents a user defined transformation applied on a DataStream with on
KeyedStream
A KeyedStream represents a DataStream on which operator state is partitioned by key using a provided
DataStreamSink
A Stream Sink. This is used for emitting elements from a streaming topology.

How to use org.apache.flink.streaming.api.datastream

Best Java code snippets using org.apache.flink.streaming.api.datastream (Showing top 20 results out of 765)