public ConnectedIterativeStreams(DataStream<I> input, TypeInformation<F> feedbackType, long waitTime) { super(input.getExecutionEnvironment(), input, new DataStream<>(input.getExecutionEnvironment(), new CoFeedbackTransformation<>(input.getParallelism(), feedbackType, waitTime))); this.coFeedbackTransformation = (CoFeedbackTransformation<F>) getSecondInput().getTransformation(); }
.setParallelism(input1.getParallelism()) .returns(unionType); DataStream<TaggedUnion<T1, T2>> taggedInput2 = input2 .map(new Input2Tagger<T1, T2>()) .setParallelism(input2.getParallelism()) .returns(unionType);
public ConnectedIterativeStreams(DataStream<I> input, TypeInformation<F> feedbackType, long waitTime) { super(input.getExecutionEnvironment(), input, new DataStream<>(input.getExecutionEnvironment(), new CoFeedbackTransformation<>(input.getParallelism(), feedbackType, waitTime))); this.coFeedbackTransformation = (CoFeedbackTransformation<F>) getSecondInput().getTransformation(); }
public ConnectedIterativeStreams(DataStream<I> input, TypeInformation<F> feedbackType, long waitTime) { super(input.getExecutionEnvironment(), input, new DataStream<>(input.getExecutionEnvironment(), new CoFeedbackTransformation<>(input.getParallelism(), feedbackType, waitTime))); this.coFeedbackTransformation = (CoFeedbackTransformation<F>) getSecondInput().getTransformation(); }
public ConnectedIterativeStreams(DataStream<I> input, TypeInformation<F> feedbackType, long waitTime) { super(input.getExecutionEnvironment(), input, new DataStream<>(input.getExecutionEnvironment(), new CoFeedbackTransformation<>(input.getParallelism(), feedbackType, waitTime))); this.coFeedbackTransformation = (CoFeedbackTransformation<F>) getSecondInput().getTransformation(); }
private DataStream<Tuple2<Integer, S>> enhance(DataStream<Tuple2<Integer, S>> input, TypeInformation<Tuple2<Integer, S>> aggType) { if (input.getParallelism() <= 2) { return input; } int nextParal = input.getParallelism() / 2; DataStream<Tuple2<Integer, S>> unpartitionedStream = input.keyBy(new KeySelector<Tuple2<Integer, S>, Integer>() { //collapse two partitions into one @Override public Integer getKey(Tuple2<Integer, S> record) throws Exception { return record.f0 / 2; } }); //repartition stream to p / 2 aggregators KeyedStream<Tuple2<Integer, S>, Integer> repartitionedStream = unpartitionedStream.map(new PartitionReMapper()).returns(aggType) .setParallelism(nextParal) .keyBy(0); //window again on event time and aggregate DataStream<Tuple2<Integer, S>> aggregatedStream = repartitionedStream.timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .reduce(new AggregationWrapper<>(getCombineFun())) .setParallelism(nextParal); return enhance(aggregatedStream, aggType); }
doFnOperator, outputTypeInformation, inputDataStream.getParallelism());
.setParallelism(input1.getParallelism()) .returns(unionType); DataStream<TaggedUnion<T1, T2>> taggedInput2 = input2 .map(new Input2Tagger<T1, T2>()) .setParallelism(input2.getParallelism()) .returns(unionType);
.setParallelism(input1.getParallelism()) .returns(unionType); DataStream<TaggedUnion<T1, T2>> taggedInput2 = input2 .map(new Input2Tagger<T1, T2>()) .setParallelism(input2.getParallelism()) .returns(unionType);
.setParallelism(input1.getParallelism()) .returns(unionType); DataStream<TaggedUnion<T1, T2>> taggedInput2 = input2 .map(new Input2Tagger<T1, T2>()) .setParallelism(input2.getParallelism()) .returns(unionType);
@SuppressWarnings("unchecked") @Override public DataStream<T> run(final DataStream<Edge<K, EV>> edgeStream) { TypeInformation<Tuple2<Integer, Edge<K, EV>>> basicTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, edgeStream.getType()); TupleTypeInfo edgeTypeInfo = (TupleTypeInfo) edgeStream.getType(); TypeInformation<S> partialAggType = TypeExtractor.createTypeInfo(EdgesFold.class, getUpdateFun().getClass(), 2, edgeTypeInfo.getTypeAt(0), edgeTypeInfo.getTypeAt(2)); TypeInformation<Tuple2<Integer, S>> partialTypeInfo = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, partialAggType); degree = (degree == -1) ? edgeStream.getParallelism() : degree; DataStream<S> partialAgg = edgeStream .map(new PartitionMapper<>()).returns(basicTypeInfo) .setParallelism(degree) .keyBy(0) .timeWindow(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .fold(getInitialValue(), new PartialAgg<>(getUpdateFun(), partialAggType)).setParallelism(degree); //split here DataStream<Tuple2<Integer, S>> treeAgg = enhance(partialAgg.map(new PartitionMapper<>()).setParallelism(degree).returns(partialTypeInfo), partialTypeInfo); DataStream<S> resultStream = treeAgg.map(new PartitionStripper<>()).setParallelism(treeAgg.getParallelism()) .timeWindowAll(Time.of(timeMillis, TimeUnit.MILLISECONDS)) .reduce(getCombineFun()) .flatMap(getAggregator(edgeStream)).setParallelism(1); return (getTransform() != null) ? resultStream.map(getTransform()) : (DataStream<T>) resultStream; }
.setParallelism(input.getParallelism()); reduced = (DataStream) windowed.keyBy(new KeyedMultiWindowedElementKeyExtractor()) .transform(operator.getName(), TypeInformation.of(StreamingElement.class),