org.apache.flink.api.java.operators.UnionOperator.distinct java code examples

/**
 * Performs union on the vertices and edges sets of the input graphs
 * removing duplicate vertices but maintaining duplicate edges.
 *
 * @param graph the graph to perform union with
 * @return a new graph
 */
public Graph<K, VV, EV> union(Graph<K, VV, EV> graph) {
  DataSet<Vertex<K, VV>> unionedVertices = graph
    .getVertices()
    .union(this.getVertices())
      .name("Vertices")
    .distinct()
      .name("Vertices");
  DataSet<Edge<K, EV>> unionedEdges = graph
    .getEdges()
    .union(this.getEdges())
      .name("Edges");
  return new Graph<>(unionedVertices, unionedEdges, this.context);
}

@Test
public void testCorrectnessOfDistinctOnTuplesWithKeyFieldSelector() throws Exception {
  /*
   * check correctness of distinct on tuples with key field selector
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
  DataSet<Tuple3<Integer, Long, String>> distinctDs = ds.union(ds).distinct(0, 1, 2);
  List<Tuple3<Integer, Long, String>> result = distinctDs.collect();
  String expected = "1,1,Hi\n" +
      "2,2,Hello\n" +
      "3,2,Hello world\n";
  compareResultAsTuples(result, expected);
}

@Test
public void testCorrectnessOfDistinctOnTuples() throws Exception{
  /*
   * check correctness of distinct on tuples
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env);
  DataSet<Tuple3<Integer, Long, String>> distinctDs = ds.union(ds).distinct();
  List<Tuple3<Integer, Long, String>> result = distinctDs.collect();
  String expected = "1,1,Hi\n" +
      "2,2,Hello\n" +
      "3,2,Hello world\n";
  compareResultAsTuples(result, expected);
}

@Test
public void testCorrectnessOfDistinctOnTuplesWithKeyFieldSelectorWithNotAllFieldsSelected() throws Exception{
  /*
   * check correctness of distinct on tuples with key field selector with not all fields selected
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env);
  DataSet<Tuple1<Integer>> distinctDs = ds.union(ds).distinct(0).project(0);
  List<Tuple1<Integer>> result = distinctDs.collect();
  String expected = "1\n" +
      "2\n";
  compareResultAsTuples(result, expected);
}

@Test
public void testCorrectnessOfDistinctOnTuplesWithFieldExpressions() throws Exception {
  /*
   * check correctness of distinct on tuples with field expressions
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env);
  DataSet<Tuple1<Integer>> reduceDs = ds.union(ds).distinct("f0").project(0);
  List<Tuple1<Integer>> result = reduceDs.collect();
  String expected = "1\n" +
      "2\n";
  compareResultAsTuples(result, expected);
}

  @Test
  public void testCorrectnessOfDistinctOnAtomicWithSelectAllChar() throws Exception {
    /*
     * check correctness of distinct on Strings, using Keys.ExpressionKeys.SELECT_ALL_CHAR
     */

    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSet<String> ds = CollectionDataSets.getStringDataSet(env);
    DataSet<String> reduceDs = ds.union(ds).distinct("*");

    List<String> result = reduceDs.collect();

    String expected = "I am fine.\n" +
        "Luke Skywalker\n" +
        "LOL\n" +
        "Hello world, how are you?\n" +
        "Hi\n" +
        "Hello world\n" +
        "Hello\n" +
        "Random comment\n";

    compareResultAsText(result, expected);
  }
}

@Test
public void testCorrectnessOfDistinctOnTuplesWithKeyExtractorFunction() throws Exception {
  /*
   * check correctness of distinct on tuples with key extractor function
   */
  final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  DataSet<Tuple5<Integer, Long,  Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env);
  DataSet<Tuple1<Integer>> reduceDs = ds.union(ds).distinct(new KeySelector1()).project(0);
  List<Tuple1<Integer>> result = reduceDs.collect();
  String expected = "1\n" + "2\n";
  compareResultAsTuples(result, expected);
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // the test data is constructed such that the merge join zig zag
  // has an early out, leaving elements on the static path input unconsumed
  DataSet<Path> edges = env.fromElements(
      new Path(2, 1),
      new Path(4, 1),
      new Path(6, 3),
      new Path(8, 3),
      new Path(10, 1),
      new Path(12, 1),
      new Path(14, 3),
      new Path(16, 3),
      new Path(18, 1),
      new Path(20, 1));
  IterativeDataSet<Path> currentPaths = edges.iterate(10);
  DataSet<Path> newPaths = currentPaths
      .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
        .with(new PathConnector())
      .union(currentPaths).distinct("from", "to");
  DataSet<Path> result = currentPaths.closeWith(newPaths);
  result.output(new DiscardingOutputFormat<Path>());
  env.execute();
}

@Override
protected void testProgram() throws Exception {
  ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
  // the test data is constructed such that the merge join zig zag
  // has an early out, leaving elements on the dynamic path input unconsumed
  DataSet<Path> edges = env.fromElements(
      new Path(1, 2),
      new Path(1, 4),
      new Path(3, 6),
      new Path(3, 8),
      new Path(1, 10),
      new Path(1, 12),
      new Path(3, 14),
      new Path(3, 16),
      new Path(1, 18),
      new Path(1, 20));
  IterativeDataSet<Path> currentPaths = edges.iterate(10);
  DataSet<Path> newPaths = currentPaths
      .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from")
        .with(new PathConnector())
      .union(currentPaths).distinct("from", "to");
  DataSet<Path> result = currentPaths.closeWith(newPaths);
  result.output(new DiscardingOutputFormat<Path>());
  env.execute();
}

@Override
protected DataSet<GraphHead> computeNewGraphHeads() {
 return firstCollection.getGraphHeads()
  .union(secondCollection.getGraphHeads())
  .distinct(new Id<GraphHead>());
}

@Override
protected DataSet<Vertex> computeNewVertices(
 DataSet<GraphHead> newGraphHeads) {
 return firstCollection.getVertices()
  .union(secondCollection.getVertices())
  .distinct(new Id<Vertex>());
}

/**
 * {@inheritDoc}
 */
@Override
protected DataSet<Vertex> computeNewVertices(
 DataSet<GraphHead> newGraphHeads) {
 return firstCollection.getVertices()
  .union(secondCollection.getVertices())
  .distinct(new Id<Vertex>());
}

/**
 * {@inheritDoc}
 */
@Override
protected DataSet<Edge> computeNewEdges(DataSet<Vertex> newVertices) {
 return firstCollection.getEdges()
  .union(secondCollection.getEdges())
  .distinct(new Id<Edge>());
}

 @Override
 protected DataSet<Edge> computeNewEdges(DataSet<Vertex> newVertices) {
  return firstCollection.getEdges()
   .union(secondCollection.getEdges())
   .distinct(new Id<Edge>());
 }
}

/**
 * {@inheritDoc}
 */
@Override
protected DataSet<GraphHead> computeNewGraphHeads() {
 return firstCollection.getGraphHeads()
  .union(secondCollection.getGraphHeads())
  .distinct(new Id<GraphHead>());
}

/**
 * Creates a new logical graph by union the vertex and edge sets of two
 * input graphs. Vertex and edge equality is based on their respective
 * identifiers.
 *
 * @param firstGraph  first input graph
 * @param secondGraph second input graph
 * @return combined graph
 */
@Override
public LogicalGraph execute(LogicalGraph firstGraph,
 LogicalGraph secondGraph) {
 DataSet<Vertex> newVertexSet = firstGraph.getVertices()
  .union(secondGraph.getVertices())
  .distinct(new Id<Vertex>());
 DataSet<Edge> newEdgeSet = firstGraph.getEdges()
  .union(secondGraph.getEdges())
  .distinct(new Id<Edge>());
 return firstGraph.getConfig().getLogicalGraphFactory().fromDataSets(newVertexSet, newEdgeSet);
}

/**
 * Performs union on the vertices and edges sets of the input graphs
 * removing duplicate vertices but maintaining duplicate edges.
 * 
 * @param graph the graph to perform union with
 * @return a new graph
 */
public Graph<K, VV, EV> union(Graph<K, VV, EV> graph) {
  DataSet<Vertex<K, VV>> unionedVertices = graph
    .getVertices()
    .union(this.getVertices())
      .name("Vertices")
    .distinct()
      .name("Vertices");
  DataSet<Edge<K, EV>> unionedEdges = graph
    .getEdges()
    .union(this.getEdges())
      .name("Edges");
  return new Graph<>(unionedVertices, unionedEdges, this.context);
}

/**
 * Returns the subgraph of the given supergraph that is induced by the
 * edges that fulfil the given filter function.
 *
 * @param superGraph supergraph
 * @return edge-induced subgraph
 */
private LG edgeInducedSubgraph(LG superGraph) {
 DataSet<E> filteredEdges = superGraph.getEdges().filter(edgeFilterFunction);
 DataSet<V> filteredVertices = filteredEdges
  .join(superGraph.getVertices())
  .where(new SourceId<>()).equalTo(new Id<>())
  .with(new RightSide<>())
  .union(filteredEdges
   .join(superGraph.getVertices())
   .where(new TargetId<>()).equalTo(new Id<>())
   .with(new RightSide<>()))
  .distinct(new Id<>());
 return superGraph.getFactory().fromDataSets(filteredVertices, filteredEdges);
}

 @Override
 public LogicalGraph sample(LogicalGraph graph) {
  DataSet<Edge> newEdges = graph.getEdges().filter(new RandomFilter<>(sampleSize, randomSeed));

  DataSet<Vertex> newSourceVertices = graph.getVertices()
   .join(newEdges)
   .where(new Id<>()).equalTo(new SourceId<>())
   .with(new LeftSide<>())
   .distinct(new Id<>());

  DataSet<Vertex> newTargetVertices = graph.getVertices()
   .join(newEdges)
   .where(new Id<>()).equalTo(new TargetId<>())
   .with(new LeftSide<>())
   .distinct(new Id<>());

  DataSet<Vertex> newVertices = newSourceVertices.union(newTargetVertices).distinct(new Id<>());
  return graph.getConfig().getLogicalGraphFactory().fromDataSets(newVertices, newEdges);
 }
}

/**
 * {@inheritDoc}
 */
@Override
public LogicalGraph sample(LogicalGraph graph) {
 DataSet<Edge> newEdges = graph.getEdges().filter(new RandomFilter<>(sampleSize, randomSeed));
 DataSet<Vertex> newSourceVertices = graph.getVertices()
  .join(newEdges)
  .where(new Id<>()).equalTo(new SourceId<>())
  .with(new LeftSide<>())
  .distinct(new Id<>());
 DataSet<Vertex> newTargetVertices = graph.getVertices()
  .join(newEdges)
  .where(new Id<>()).equalTo(new TargetId<>())
  .with(new LeftSide<>())
  .distinct(new Id<>());
 DataSet<Vertex> newVertices = newSourceVertices.union(newTargetVertices).distinct(new Id<>());
 return graph.getConfig().getLogicalGraphFactory().fromDataSets(newVertices, newEdges);
}

Popular methods of UnionOperator

groupBy
map
name
union
<init>
Create an operator that produces the union of the two given data sets.
reduce
setParallelism
collect
filter
join
output
rebalance

Popular in Java

Making http post requests using okhttp
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
runOnUiThread (Activity)
setRequestProperty (URLConnection)
KeyStore (java.security)
KeyStore is responsible for maintaining cryptographic keys and their owners. The type of the syste
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Iterator (java.util)
An iterator over a sequence of objects, such as a collection.If a collection has been changed since
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
Top plugins for Android Studio

How to use distinctmethodin org.apache.flink.api.java.operators.UnionOperator

Best Java code snippets using org.apache.flink.api.java.operators.UnionOperator.distinct (Showing top 20 results out of 315)

How to use
distinct
method
in
org.apache.flink.api.java.operators.UnionOperator