/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type * information. This method is useful for cases where the type is generic. In that case, the * type class (as given in * {@link #fromParallelCollection(org.apache.flink.util.SplittableIterator, Class)} does not * supply all type information. * * @param iterator * The iterator that produces the elements of the data stream * @param typeInfo * The TypeInformation for the produced data stream. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, TypeInformation<OUT> typeInfo) { return fromParallelCollection(iterator, typeInfo, "Parallel Collection Source"); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type class * (this is due to the fact that the Java compiler erases the generic type information). * * @param iterator * The iterator that produces the elements of the data stream * @param type * The class of the data produced by the iterator. Must not be a generic class. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, Class<OUT> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }
@Test @SuppressWarnings("unchecked") public void testFromCollectionParallelism() { try { TypeInformation<Integer> typeInfo = BasicTypeInfo.INT_TYPE_INFO; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> dataStream1 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo); try { dataStream1.setParallelism(4); fail("should throw an exception"); } catch (IllegalArgumentException e) { // expected } dataStream1.addSink(new DiscardingSink<Integer>()); DataStreamSource<Integer> dataStream2 = env.fromParallelCollection(new DummySplittableIterator<Integer>(), typeInfo).setParallelism(4); dataStream2.addSink(new DiscardingSink<Integer>()); env.getExecutionPlan(); assertEquals("Parallelism of collection source must be 1.", 1, env.getStreamGraph().getStreamNode(dataStream1.getId()).getParallelism()); assertEquals("Parallelism of parallel collection source must be 4.", 4, env.getStreamGraph().getStreamNode(dataStream2.getId()).getParallelism()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type * information. This method is useful for cases where the type is generic. In that case, the * type class (as given in * {@link #fromParallelCollection(org.apache.flink.util.SplittableIterator, Class)} does not * supply all type information. * * @param iterator * The iterator that produces the elements of the data stream * @param typeInfo * The TypeInformation for the produced data stream. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, TypeInformation<OUT> typeInfo) { return fromParallelCollection(iterator, typeInfo, "Parallel Collection Source"); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type * information. This method is useful for cases where the type is generic. In that case, the * type class (as given in * {@link #fromParallelCollection(org.apache.flink.util.SplittableIterator, Class)} does not * supply all type information. * * @param iterator * The iterator that produces the elements of the data stream * @param typeInfo * The TypeInformation for the produced data stream. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, TypeInformation<OUT> typeInfo) { return fromParallelCollection(iterator, typeInfo, "Parallel Collection Source"); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type * information. This method is useful for cases where the type is generic. In that case, the * type class (as given in * {@link #fromParallelCollection(org.apache.flink.util.SplittableIterator, Class)} does not * supply all type information. * * @param iterator * The iterator that produces the elements of the data stream * @param typeInfo * The TypeInformation for the produced data stream. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, TypeInformation<OUT> typeInfo) { return fromParallelCollection(iterator, typeInfo, "Parallel Collection Source"); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type * information. This method is useful for cases where the type is generic. In that case, the * type class (as given in * {@link #fromParallelCollection(org.apache.flink.util.SplittableIterator, Class)} does not * supply all type information. * * @param iterator * The iterator that produces the elements of the data stream * @param typeInfo * The TypeInformation for the produced data stream. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, TypeInformation<OUT> typeInfo) { return fromParallelCollection(iterator, typeInfo, "Parallel Collection Source"); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type class * (this is due to the fact that the Java compiler erases the generic type information). * * @param iterator * The iterator that produces the elements of the data stream * @param type * The class of the data produced by the iterator. Must not be a generic class. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, Class<OUT> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type class * (this is due to the fact that the Java compiler erases the generic type information). * * @param iterator * The iterator that produces the elements of the data stream * @param type * The class of the data produced by the iterator. Must not be a generic class. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, Class<OUT> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type class * (this is due to the fact that the Java compiler erases the generic type information). * * @param iterator * The iterator that produces the elements of the data stream * @param type * The class of the data produced by the iterator. Must not be a generic class. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, Class<OUT> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }
/** * Creates a new data stream that contains elements in the iterator. The iterator is splittable, * allowing the framework to create a parallel data stream source that returns the elements in * the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type * of data returned by the iterator must be given explicitly in the form of the type class * (this is due to the fact that the Java compiler erases the generic type information). * * @param iterator * The iterator that produces the elements of the data stream * @param type * The class of the data produced by the iterator. Must not be a generic class. * @param <OUT> * The type of the returned data stream * @return A data stream representing the elements in the iterator */ public <OUT> DataStreamSource<OUT> fromParallelCollection(SplittableIterator<OUT> iterator, Class<OUT> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }