org.apache.flink.api.java.operators.MapOperator.name java code examples

/**
 * @return The IDs of the vertices as DataSet
 */
public DataSet<K> getVertexIds() {
  return vertices.map(new ExtractVertexIDMapper<>()).name("Vertex IDs");
}

/**
 * @return The IDs of the edges as DataSet
 */
public DataSet<Tuple2<K, K>> getEdgeIds() {
  return edges.map(new ExtractEdgeIDsMapper<>()).name("Edge IDs");
}

/**
 * Reverse the direction of the edges in the graph.
 *
 * @return a new graph with all edges reversed
 * @throws UnsupportedOperationException
 */
public Graph<K, VV, EV> reverse() throws UnsupportedOperationException {
  DataSet<Edge<K, EV>> reversedEdges = edges.map(new ReverseEdgesMap<>()).name("Reverse edges");
  return new Graph<>(vertices, reversedEdges, this.context);
}

/**
 * Creates a Graph from CSV input without vertex values or edge values.
 * @param vertexKey the type of the vertex IDs
 * @return a Graph where the vertex IDs are read from the edges input file.
 */
public <K> Graph<K, NullValue, NullValue> keyType(Class<K> vertexKey) {
  if (edgeReader == null) {
    throw new RuntimeException("The edge input file cannot be null!");
  }
  DataSet<Edge<K, NullValue>> edges = edgeReader
    .types(vertexKey, vertexKey)
      .name(GraphCsvReader.class.getName())
    .map(new Tuple2ToEdgeMap<>())
      .name("Type conversion");
  return Graph.fromDataSet(edges, executionContext);
}

/**
 * Creates a graph from a DataSet of Tuple2 objects for edges.
 * Each Tuple2 will become one Edge, where the source ID will be the first field of the Tuple2
 * and the target ID will be the second field of the Tuple2.
 *
 * <p>Edge value types and Vertex values types will be set to NullValue.
 *
 * @param edges a DataSet of Tuple2.
 * @param context the flink execution environment.
 * @return the newly created graph.
 */
public static <K> Graph<K, NullValue, NullValue> fromTuple2DataSet(DataSet<Tuple2<K, K>> edges,
    ExecutionEnvironment context) {
  DataSet<Edge<K, NullValue>> edgeDataSet = edges
    .map(new Tuple2ToEdgeMap<>())
      .name("To Edge");
  return fromDataSet(edgeDataSet, context);
}

private <K extends Tuple> void createHashPartitionOperation(PythonOperationInfo info) {
  DataSet<Tuple2<K, byte[]>> op1 = sets.getDataSet(info.parentID);
  DataSet<byte[]> result = op1
    .partitionByHash(info.keys.toArray(new String[info.keys.size()])).setParallelism(info.parallelism)
    .map(new KeyDiscarder<K>()).setParallelism(info.parallelism).name("HashPartitionPostStep");
  sets.add(info.setID, result);
}

@Override
public DataSet plan(Graph<K, VV, EV> graph) throws Exception {
  DataSet<Edge<K, EV>> edges = graph.getEdges();
  if (hasNullValueEdges(edges)) {
    return edges
      .map(new EdgeToTuple2Map<>())
      .name("Edge to Tuple2")
      .setParallelism(parallelism.getValue().intValue());
  } else {
    return edges;
  }
}

private <K extends Tuple> void createDistinctOperation(PythonOperationInfo info) {
  DataSet<Tuple2<K, byte[]>> op = sets.getDataSet(info.parentID);
  DataSet<byte[]> result = op
    .distinct(info.keys.toArray(new String[info.keys.size()])).setParallelism(info.parallelism).name("Distinct")
    .map(new KeyDiscarder<K>()).setParallelism(info.parallelism).name("DistinctPostStep");
  sets.add(info.setID, result);
}

@SuppressWarnings("unchecked")
private <T extends Tuple> void createCsvSource(ExecutionEnvironment env, PythonOperationInfo info) {
  if (!(info.types instanceof TupleTypeInfo)) {
    throw new RuntimeException("The output type of a csv source has to be a tuple. The derived type is " + info);
  }
  Path path = new Path(info.path);
  String lineD = info.lineDelimiter;
  String fieldD = info.fieldDelimiter;
  TupleTypeInfo<T> types = (TupleTypeInfo<T>) info.types;
  sets.add(info.setID, env.createInput(new TupleCsvInputFormat<>(path, lineD, fieldD, types), types).setParallelism(info.parallelism).name("CsvSource")
    .map(new SerializerMap<T>()).setParallelism(info.parallelism).name("CsvSourcePostStep"));
}

/**
 * Apply a function to the attribute of each vertex in the graph.
 *
 * @param mapper the map function to apply.
 * @param returnType the explicit return type.
 * @return a new graph
 */
public <NV> Graph<K, NV, EV> mapVertices(final MapFunction<Vertex<K, VV>, NV> mapper, TypeInformation<Vertex<K, NV>> returnType) {
  DataSet<Vertex<K, NV>> mappedVertices = vertices.map(
      new MapFunction<Vertex<K, VV>, Vertex<K, NV>>() {
        private Vertex<K, NV> output = new Vertex<>();
        public Vertex<K, NV> map(Vertex<K, VV> value) throws Exception {
          output.f0 = value.f0;
          output.f1 = mapper.map(value);
          return output;
        }
      })
      .returns(returnType)
      .withForwardedFields("f0")
        .name("Map vertices");
  return new Graph<>(mappedVertices, this.edges, this.context);
}

private void createPrintSink(PythonOperationInfo info) {
  DataSet<byte[]> parent = sets.getDataSet(info.parentID);
  parent.map(new StringDeserializerMap()).setParallelism(info.parallelism).name("PrintSinkPreStep")
    .output(new PrintingOutputFormat<String>(info.toError)).setParallelism(info.parallelism);
}

private void createCsvSink(PythonOperationInfo info) {
  DataSet<byte[]> parent = sets.getDataSet(info.parentID);
  parent.map(new StringTupleDeserializerMap()).setParallelism(info.parallelism).name("CsvSinkPreStep")
      .writeAsCsv(info.path, info.lineDelimiter, info.fieldDelimiter, info.writeMode).setParallelism(info.parallelism).name("CsvSink");
}

private void createTextSource(ExecutionEnvironment env, PythonOperationInfo info) {
  sets.add(info.setID, env.readTextFile(info.path).setParallelism(info.parallelism).name("TextSource")
    .map(new SerializerMap<String>()).setParallelism(info.parallelism).name("TextSourcePostStep"));
}

private void createSequenceSource(ExecutionEnvironment env, PythonOperationInfo info) {
  sets.add(info.setID, env.generateSequence(info.frm, info.to).setParallelism(info.parallelism).name("SequenceSource")
    .map(new SerializerMap<Long>()).setParallelism(info.parallelism).name("SequenceSourcePostStep"));
}

private void createValueSource(ExecutionEnvironment env, PythonOperationInfo info) {
  sets.add(info.setID, env.fromCollection(info.values).setParallelism(info.parallelism).name("ValueSource")
    .map(new SerializerMap<>()).setParallelism(info.parallelism).name("ValueSourcePostStep"));
}

/**
 * Count the number of elements in a DataSet.
 *
 * @param input DataSet of elements to be counted
 * @param <T> element type
 * @return count
 */
public static <T> DataSet<LongValue> count(DataSet<T> input) {
  return input
    .map(new MapTo<>(new LongValue(1)))
      .returns(LONG_VALUE_TYPE_INFO)
      .name("Emit 1")
    .reduce(new AddLongValue())
      .name("Sum");
}

@Override
public Graph<LongValue, NullValue, NullValue> generate() {
  Preconditions.checkState(vertexPairCount > 0);
  // Vertices
  long vertexCount = 2 * vertexPairCount;
  DataSet<Vertex<LongValue, NullValue>> vertices = GraphGeneratorUtils.vertexSequence(env, parallelism, vertexCount);
  // Edges
  LongValueSequenceIterator iterator = new LongValueSequenceIterator(0, vertexCount - 1);
  DataSet<Edge<LongValue, NullValue>> edges = env
    .fromParallelCollection(iterator, LongValue.class)
      .setParallelism(parallelism)
      .name("Edge iterators")
    .map(new LinkVertexToSingletonNeighbor())
      .setParallelism(parallelism)
      .name("Complete graph edges");
  // Graph
  return Graph.fromDataSet(vertices, edges, env);
}

private <K extends Tuple> void createFirstOperation(PythonOperationInfo info) {
  if (sets.isDataSet(info.parentID)) {
    DataSet<byte[]> op = sets.getDataSet(info.parentID);
    sets.add(info.setID, op
      .first(info.count).setParallelism(info.parallelism).name("First"));
  } else if (sets.isUnsortedGrouping(info.parentID)) {
    UnsortedGrouping<Tuple2<K, byte[]>> op = sets.getUnsortedGrouping(info.parentID);
    sets.add(info.setID, op
      .first(info.count).setParallelism(info.parallelism).name("First")
      .map(new KeyDiscarder<K>()).setParallelism(info.parallelism).name("FirstPostStep"));
  } else if (sets.isSortedGrouping(info.parentID)) {
    SortedGrouping<Tuple2<K, byte[]>> op = sets.getSortedGrouping(info.parentID);
    sets.add(info.setID, op
      .first(info.count).setParallelism(info.parallelism).name("First")
      .map(new KeyDiscarder<K>()).setParallelism(info.parallelism).name("FirstPostStep"));
  }
}

@Test
public void testBranchBeforeIteration() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  DataSet<Long> source1 = env.generateSequence(0,1);
  DataSet<Long> source2 = env.generateSequence(0,1);
  IterativeDataSet<Long> loopHead = source2.iterate(10).name("Loop");
  DataSet<Long> loopTail = source1.map(new IdentityMapper<Long>()).withBroadcastSet(loopHead, "BC").name("In-Loop Mapper");
  DataSet<Long> loopRes = loopHead.closeWith(loopTail);
  DataSet<Long> map = source1.map(new IdentityMapper<Long>()).withBroadcastSet(loopRes, "BC").name("Post-Loop Mapper");
  map.output(new DiscardingOutputFormat<Long>());
  Plan plan = env.createProgramPlan();
  try {
    compileNoStats(plan);
  }
  catch (Exception e) {
    e.printStackTrace();
    Assert.fail(e.getMessage());
  }
}

@Test
public void testBranchAfterIteration() {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(DEFAULT_PARALLELISM);
  DataSet<Long> sourceA = env.generateSequence(0,1);
  IterativeDataSet<Long> loopHead = sourceA.iterate(10);
  DataSet<Long> loopTail = loopHead.map(new IdentityMapper<Long>()).name("Mapper");
  DataSet<Long> loopRes = loopHead.closeWith(loopTail);
  loopRes.output(new DiscardingOutputFormat<Long>());
  loopRes.map(new IdentityMapper<Long>())
      .output(new DiscardingOutputFormat<Long>());
  Plan plan = env.createProgramPlan();
  try {
    compileNoStats(plan);
  }
  catch (Exception e) {
    e.printStackTrace();
    Assert.fail(e.getMessage());
  }
}

Popular methods of MapOperator

Popular in Java

Parsing JSON documents to java classes using gson
setRequestProperty (URLConnection)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
scheduleAtFixedRate (ScheduledExecutorService)
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Top Vim plugins

How to use namemethodin org.apache.flink.api.java.operators.MapOperator

Best Java code snippets using org.apache.flink.api.java.operators.MapOperator.name (Showing top 20 results out of 315)

How to use
name
method
in
org.apache.flink.api.java.operators.MapOperator