org.apache.flink.api.java.ExecutionEnvironment.fromParallelCollection java code examples

/**
 * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the
 * framework to create a parallel data source that returns the elements in the iterator.
 *
 * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data
 * returned by the iterator must be given explicitly in the form of the type class (this is due to the
 * fact that the Java compiler erases the generic type information).
 *
 * @param iterator The iterator that produces the elements of the data set.
 * @param type The class of the data produced by the iterator. Must not be a generic class.
 * @return A DataSet representing the elements in the iterator.
 *
 * @see #fromParallelCollection(SplittableIterator, TypeInformation)
 */
public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, Class<X> type) {
  return fromParallelCollection(iterator, TypeExtractor.getForClass(type));
}

/**
 * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the
 * framework to create a parallel data source that returns the elements in the iterator.
 *
 * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data
 * returned by the iterator must be given explicitly in the form of the type information.
 * This method is useful for cases where the type is generic. In that case, the type class
 * (as given in {@link #fromParallelCollection(SplittableIterator, Class)} does not supply all type information.
 *
 * @param iterator The iterator that produces the elements of the data set.
 * @param type The TypeInformation for the produced data set.
 * @return A DataSet representing the elements in the iterator.
 *
 * @see #fromParallelCollection(SplittableIterator, Class)
 */
public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, TypeInformation<X> type) {
  return fromParallelCollection(iterator, type, Utils.getCallLocationName());
}

/**
 * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel,
 * so there is no guarantee about the order of the elements.
 *
 * @param from The number to start at (inclusive).
 * @param to The number to stop at (inclusive).
 * @return A DataSet, containing all number in the {@code [from, to]} interval.
 */
public DataSource<Long> generateSequence(long from, long to) {
  return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName());
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1);
  // Validate ranges
  Collections.sort(offsetRanges);
  Iterator<OffsetRange> iter = offsetRanges.iterator();
  OffsetRange lastRange = iter.next();
  while (iter.hasNext()) {
    OffsetRange nextRange = iter.next();
    if (lastRange.overlaps(nextRange)) {
      throw new IllegalArgumentException("Overlapping ranges " + lastRange + " and " + nextRange);
    }
    lastRange = nextRange;
  }
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToOffsets(vertexCount, offsetRanges))
      .setParallelism(parallelism)
      .name("Circulant graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

private static <T, B extends CopyableIterator<T>> void testReducePerformance
  (B iterator, TypeInformation<T> typeInfo, CombineHint hint, int numRecords, boolean print) throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.getConfig().enableObjectReuse();
  @SuppressWarnings("unchecked")
  DataSet<T> output =
    env.fromParallelCollection(new SplittableRandomIterator<T, B>(numRecords, iterator), typeInfo)
      .groupBy("0")
      .reduce(new SumReducer()).setCombineHint(hint);
  long start = System.currentTimeMillis();
  System.out.println(output.count());
  long end = System.currentTimeMillis();
  if (print) {
    System.out.println("=== Time for " + iterator.getClass().getSimpleName() + " with hint " + hint.toString() + ": " + (end - start) + "ms ===");
  }
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph");
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions))
      .setParallelism(parallelism)
      .name("Grid graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(vertexCount >= 2);
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToCenter())
      .setParallelism(parallelism)
      .name("Star graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

/**
 * Generates {@link Vertex Vertices} with sequential, numerical labels.
 *
 * @param env the Flink execution environment.
 * @param parallelism operator parallelism
 * @param vertexCount number of sequential vertex labels
 * @return {@link DataSet} of sequentially labeled {@link Vertex vertices}
 */
public static DataSet<Vertex<LongValue, NullValue>> vertexSequence(ExecutionEnvironment env, int parallelism, long vertexCount) {
  Preconditions.checkArgument(vertexCount >= 0, "Vertex count must be non-negative");
  if (vertexCount == 0) {
    return env
      .fromCollection(Collections.emptyList(), TypeInformation.of(new TypeHint<Vertex<LongValue, NullValue>>(){}))
        .setParallelism(parallelism)
        .name("Empty vertex set");
  } else {
    LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1);
    DataSource<LongValue> vertexLabels = env
      .fromParallelCollection(iterator, LongValue.class)
        .setParallelism(parallelism)
        .name("Vertex indices");
    return vertexLabels
      .map(new CreateVertex())
        .setParallelism(parallelism)
        .name("Vertex sequence");
  }
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(vertexPairCount > 0);
  // Vertices
  long vertexCount = 2 * vertexPairCount;
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .map(new LinkVertexToSingletonNeighbor())
      .setParallelism(parallelism)
      .name("Complete graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

/**
 * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the
 * framework to create a parallel data source that returns the elements in the iterator.
 *
 * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data
 * returned by the iterator must be given explicitly in the form of the type class (this is due to the
 * fact that the Java compiler erases the generic type information).
 *
 * @param iterator The iterator that produces the elements of the data set.
 * @param type The class of the data produced by the iterator. Must not be a generic class.
 * @return A DataSet representing the elements in the iterator.
 *
 * @see #fromParallelCollection(SplittableIterator, TypeInformation)
 */
public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, Class<X> type) {
  return fromParallelCollection(iterator, TypeExtractor.getForClass(type));
}

/**
 * Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the
 * framework to create a parallel data source that returns the elements in the iterator.
 *
 * <p>Because the iterator will remain unmodified until the actual execution happens, the type of data
 * returned by the iterator must be given explicitly in the form of the type class (this is due to the
 * fact that the Java compiler erases the generic type information).
 *
 * @param iterator The iterator that produces the elements of the data set.
 * @param type The class of the data produced by the iterator. Must not be a generic class.
 * @return A DataSet representing the elements in the iterator.
 *
 * @see #fromParallelCollection(SplittableIterator, TypeInformation)
 */
public <X> DataSource<X> fromParallelCollection(SplittableIterator<X> iterator, Class<X> type) {
  return fromParallelCollection(iterator, TypeExtractor.getForClass(type));
}

/**
 * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel,
 * so there is no guarantee about the order of the elements.
 *
 * @param from The number to start at (inclusive).
 * @param to The number to stop at (inclusive).
 * @return A DataSet, containing all number in the {@code [from, to]} interval.
 */
public DataSource<Long> generateSequence(long from, long to) {
  return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName());
}

/**
 * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel,
 * so there is no guarantee about the order of the elements.
 *
 * @param from The number to start at (inclusive).
 * @param to The number to stop at (inclusive).
 * @return A DataSet, containing all number in the {@code [from, to]} interval.
 */
public DataSource<Long> generateSequence(long from, long to) {
  return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName());
}

/**
 * Creates a new data set that contains a sequence of numbers. The data set will be created in parallel,
 * so there is no guarantee about the order of the elements.
 *
 * @param from The number to start at (inclusive).
 * @param to The number to stop at (inclusive).
 * @return A DataSet, containing all number in the {@code [from, to]} interval.
 */
public DataSource<Long> generateSequence(long from, long to) {
  return fromParallelCollection(new NumberSequenceIterator(from, to), BasicTypeInfo.LONG_TYPE_INFO, Utils.getCallLocationName());
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph");
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions))
      .setParallelism(parallelism)
      .name("Grid graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(!dimensions.isEmpty(), "No dimensions added to GridGraph");
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, this.vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToNeighbors(vertexCount, dimensions))
      .setParallelism(parallelism)
      .name("Grid graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToCenter())
      .setParallelism(parallelism)
      .name("Star graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  // Vertices
  long vertexCount = 2 * this.vertexPairCount;
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .map(new LinkVertexToSingletonNeighbor())
      .setParallelism(parallelism)
      .name("Complete graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(vertexCount >= 2);
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToCenter())
      .setParallelism(parallelism)
      .name("Star graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(vertexCount >= 2);
  // Vertices
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(1, this.vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .flatMap(new LinkVertexToCenter())
      .setParallelism(parallelism)
      .name("Star graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

Javadoc

Creates a new data set that contains elements in the iterator. The iterator is splittable, allowing the framework to create a parallel data source that returns the elements in the iterator.

Because the iterator will remain unmodified until the actual execution happens, the type of data returned by the iterator must be given explicitly in the form of the type class (this is due to the fact that the Java compiler erases the generic type information).

Popular methods of ExecutionEnvironment

getExecutionEnvironment
Creates an execution environment that represents the context in which the program is currently execu
execute
Triggers the program execution. The environment will execute all parts of the program that have resu
getConfig
Gets the config object that defines execution parameters.
fromCollection
Creates a DataSet from the given iterator. Because the iterator will remain unmodified until the act
fromElements
Creates a new data set that contains the given elements. The elements must all be of the same type,
setParallelism
Sets the parallelism for operations executed through this environment. Setting a parallelism of x he
createInput
Generic method to create an input DataSet with in InputFormat. The DataSet will not be immediately c
getParallelism
Gets the parallelism with which operation are executed by default. Operations can individually overr
createLocalEnvironment
Creates a LocalEnvironment which is used for executing Flink jobs.
readTextFile
Creates a DataSet that represents the Strings produced by reading the given file line wise. The java
getLastJobExecutionResult
Returns the org.apache.flink.api.common.JobExecutionResult of the last executed job.
readCsvFile
Creates a CSV reader to read a comma separated value (CSV) file. The reader has options to define pa

Popular in Java

Reactive rest calls using spring rest template
setContentView (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
runOnUiThread (Activity)
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
BoxLayout (javax.swing)
Best plugins for Eclipse

How to use fromParallelCollectionmethodin org.apache.flink.api.java.ExecutionEnvironment

Best Java code snippets using org.apache.flink.api.java.ExecutionEnvironment.fromParallelCollection (Showing top 20 results out of 315)

How to use
fromParallelCollection
method
in
org.apache.flink.api.java.ExecutionEnvironment