/** * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the * framework to create a parallel data source that returns the elements in the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data * returned by the iterator must be given explicitly in the form of the type class (this is due to the * fact that the Java compiler erases the generic type information). * * @param iterator The iterator that produces the elements of the data set. * @param type The class of the data produced by the iterator. Must not be a generic class. * @return A DataSet representing the elements in the iterator. * * @see #fromParallelCollection(SplittableIterator, TypeInformation) */ public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, Class<X> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }
/** * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the * framework to create a parallel data source that returns the elements in the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data * returned by the iterator must be given explicitly in the form of the type information. * This method is useful for cases where the type is generic. In that case, the type class * (as given in {@link #fromParallelCollection(SplittableIterator, Class)} does not supply all type information. * * @param iterator The iterator that produces the elements of the data set. * @param type The TypeInformation for the produced data set. * @return A DataSet representing the elements in the iterator. * * @see #fromParallelCollection(SplittableIterator, Class) */ public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, TypeInformation<X> type) { return fromParallelCollection(iterator, type, Utils.getCallLocationName()); }
/** * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel, * so there is no guarantee about the order of the elements. * * @param from The number to start at (inclusive). * @param to The number to stop at (inclusive). * @return A DataSet, containing all number in the {@code [from, to]} interval. */ public DataSource<Long> generateSequence(long from, long to) { return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName()); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); // Validate ranges Collections.sort(offsetRanges); Iterator<OffsetRange> iter = offsetRanges.iterator(); OffsetRange lastRange = iter.next(); while (iter.hasNext()) { OffsetRange nextRange = iter.next(); if (lastRange.overlaps(nextRange)) { throw new IllegalArgumentException("Overlapping ranges " + lastRange + " and " + nextRange); } lastRange = nextRange; } DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToOffsets(vertexCount, offsetRanges)) .setParallelism(parallelism) .name("Circulant graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
private static <T, B extends CopyableIterator<T>> void testReducePerformance (B iterator, TypeInformation<T> typeInfo, CombineHint hint, int numRecords, boolean print) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); @SuppressWarnings("unchecked") DataSet<T> output = env.fromParallelCollection(new SplittableRandomIterator<T, B>(numRecords, iterator), typeInfo) .groupBy("0") .reduce(new SumReducer()).setCombineHint(hint); long start = System.currentTimeMillis(); System.out.println(output.count()); long end = System.currentTimeMillis(); if (print) { System.out.println("=== Time for " + iterator.getClass().getSimpleName() + " with hint " + hint.toString() + ": " + (end - start) + "ms ==="); } }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions)) .setParallelism(parallelism) .name("Grid graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(vertexCount >= 2); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
/** * Generates {@link Vertex Vertices} with sequential, numerical labels. * * @param env the Flink execution environment. * @param parallelism operator parallelism * @param vertexCount number of sequential vertex labels * @return {@link DataSet} of sequentially labeled {@link Vertex vertices} */ public static DataSet<Vertex<LongValue, NullValue>> vertexSequence(ExecutionEnvironment env, int parallelism, long vertexCount) { Preconditions.checkArgument(vertexCount >= 0, "Vertex count must be non-negative"); if (vertexCount == 0) { return env .fromCollection(Collections.emptyList(), TypeInformation.of(new TypeHint<Vertex<LongValue, NullValue>>(){})) .setParallelism(parallelism) .name("Empty vertex set"); } else { LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1); DataSource<LongValue> vertexLabels = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Vertex indices"); return vertexLabels .map(new CreateVertex()) .setParallelism(parallelism) .name("Vertex sequence"); } }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(vertexPairCount > 0); // Vertices long vertexCount = 2 * vertexPairCount; DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .map(new LinkVertexToSingletonNeighbor()) .setParallelism(parallelism) .name("Complete graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
/** * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the * framework to create a parallel data source that returns the elements in the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data * returned by the iterator must be given explicitly in the form of the type class (this is due to the * fact that the Java compiler erases the generic type information). * * @param iterator The iterator that produces the elements of the data set. * @param type The class of the data produced by the iterator. Must not be a generic class. * @return A DataSet representing the elements in the iterator. * * @see #fromParallelCollection(SplittableIterator, TypeInformation) */ public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, Class<X> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }
/** * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the * framework to create a parallel data source that returns the elements in the iterator. * * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data * returned by the iterator must be given explicitly in the form of the type class (this is due to the * fact that the Java compiler erases the generic type information). * * @param iterator The iterator that produces the elements of the data set. * @param type The class of the data produced by the iterator. Must not be a generic class. * @return A DataSet representing the elements in the iterator. * * @see #fromParallelCollection(SplittableIterator, TypeInformation) */ public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, Class<X> type) { return fromParallelCollection(iterator, TypeExtractor.getForClass(type)); }
/** * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel, * so there is no guarantee about the order of the elements. * * @param from The number to start at (inclusive). * @param to The number to stop at (inclusive). * @return A DataSet, containing all number in the {@code [from, to]} interval. */ public DataSource<Long> generateSequence(long from, long to) { return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName()); }
/** * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel, * so there is no guarantee about the order of the elements. * * @param from The number to start at (inclusive). * @param to The number to stop at (inclusive). * @return A DataSet, containing all number in the {@code [from, to]} interval. */ public DataSource<Long> generateSequence(long from, long to) { return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName()); }
/** * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel, * so there is no guarantee about the order of the elements. * * @param from The number to start at (inclusive). * @param to The number to stop at (inclusive). * @return A DataSet, containing all number in the {@code [from, to]} interval. */ public DataSource<Long> generateSequence(long from, long to) { return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName()); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions)) .setParallelism(parallelism) .name("Grid graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph"); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions)) .setParallelism(parallelism) .name("Grid graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { // Vertices long vertexCount = 2 * this.vertexPairCount; DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .map(new LinkVertexToSingletonNeighbor()) .setParallelism(parallelism) .name("Complete graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(vertexCount >= 2); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }
@Override public Graph<LongValue, NullValue, NullValue> generate() { Preconditions.checkState(vertexCount >= 2); // Vertices DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount); // Edges LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1); DataSet<Edge<LongValue, NullValue>> edges = env .fromParallelCollection(iterator, LongValue.class) .setParallelism(parallelism) .name("Edge iterators") .flatMap(new LinkVertexToCenter()) .setParallelism(parallelism) .name("Star graph edges"); // Graph return Graph.fromDataSet(vertices, edges, env); }