/** * Performs union on the vertices and edges sets of the input graphs * removing duplicate vertices but maintaining duplicate edges. * * @param graph the graph to perform union with * @return a new graph */ public Graph<K, VV, EV> union(Graph<K, VV, EV> graph) { DataSet<Vertex<K, VV>> unionedVertices = graph .getVertices() .union(this.getVertices()) .name("Vertices") .distinct() .name("Vertices"); DataSet<Edge<K, EV>> unionedEdges = graph .getEdges() .union(this.getEdges()) .name("Edges"); return new Graph<>(unionedVertices, unionedEdges, this.context); }
@Test public void testCorrectnessOfDistinctOnTuplesWithKeyFieldSelector() throws Exception { /* * check correctness of distinct on tuples with key field selector */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> distinctDs = ds.union(ds).distinct(0, 1, 2); List<Tuple3<Integer, Long, String>> result = distinctDs.collect(); String expected = "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n"; compareResultAsTuples(result, expected); }
@Test public void testCorrectnessOfDistinctOnTuples() throws Exception{ /* * check correctness of distinct on tuples */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.getSmall3TupleDataSet(env); DataSet<Tuple3<Integer, Long, String>> distinctDs = ds.union(ds).distinct(); List<Tuple3<Integer, Long, String>> result = distinctDs.collect(); String expected = "1,1,Hi\n" + "2,2,Hello\n" + "3,2,Hello world\n"; compareResultAsTuples(result, expected); }
@Test public void testCorrectnessOfDistinctOnTuplesWithKeyFieldSelectorWithNotAllFieldsSelected() throws Exception{ /* * check correctness of distinct on tuples with key field selector with not all fields selected */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env); DataSet<Tuple1<Integer>> distinctDs = ds.union(ds).distinct(0).project(0); List<Tuple1<Integer>> result = distinctDs.collect(); String expected = "1\n" + "2\n"; compareResultAsTuples(result, expected); }
@Test public void testCorrectnessOfDistinctOnTuplesWithFieldExpressions() throws Exception { /* * check correctness of distinct on tuples with field expressions */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env); DataSet<Tuple1<Integer>> reduceDs = ds.union(ds).distinct("f0").project(0); List<Tuple1<Integer>> result = reduceDs.collect(); String expected = "1\n" + "2\n"; compareResultAsTuples(result, expected); }
@Test public void testCorrectnessOfDistinctOnAtomicWithSelectAllChar() throws Exception { /* * check correctness of distinct on Strings, using Keys.ExpressionKeys.SELECT_ALL_CHAR */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<String> ds = CollectionDataSets.getStringDataSet(env); DataSet<String> reduceDs = ds.union(ds).distinct("*"); List<String> result = reduceDs.collect(); String expected = "I am fine.\n" + "Luke Skywalker\n" + "LOL\n" + "Hello world, how are you?\n" + "Hi\n" + "Hello world\n" + "Hello\n" + "Random comment\n"; compareResultAsText(result, expected); } }
@Test public void testCorrectnessOfDistinctOnTuplesWithKeyExtractorFunction() throws Exception { /* * check correctness of distinct on tuples with key extractor function */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env); DataSet<Tuple1<Integer>> reduceDs = ds.union(ds).distinct(new KeySelector1()).project(0); List<Tuple1<Integer>> result = reduceDs.collect(); String expected = "1\n" + "2\n"; compareResultAsTuples(result, expected); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // the test data is constructed such that the merge join zig zag // has an early out, leaving elements on the static path input unconsumed DataSet<Path> edges = env.fromElements( new Path(2, 1), new Path(4, 1), new Path(6, 3), new Path(8, 3), new Path(10, 1), new Path(12, 1), new Path(14, 3), new Path(16, 3), new Path(18, 1), new Path(20, 1)); IterativeDataSet<Path> currentPaths = edges.iterate(10); DataSet<Path> newPaths = currentPaths .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from") .with(new PathConnector()) .union(currentPaths).distinct("from", "to"); DataSet<Path> result = currentPaths.closeWith(newPaths); result.output(new DiscardingOutputFormat<Path>()); env.execute(); }
@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // the test data is constructed such that the merge join zig zag // has an early out, leaving elements on the dynamic path input unconsumed DataSet<Path> edges = env.fromElements( new Path(1, 2), new Path(1, 4), new Path(3, 6), new Path(3, 8), new Path(1, 10), new Path(1, 12), new Path(3, 14), new Path(3, 16), new Path(1, 18), new Path(1, 20)); IterativeDataSet<Path> currentPaths = edges.iterate(10); DataSet<Path> newPaths = currentPaths .join(edges, JoinHint.REPARTITION_SORT_MERGE).where("to").equalTo("from") .with(new PathConnector()) .union(currentPaths).distinct("from", "to"); DataSet<Path> result = currentPaths.closeWith(newPaths); result.output(new DiscardingOutputFormat<Path>()); env.execute(); }
@Override protected DataSet<GraphHead> computeNewGraphHeads() { return firstCollection.getGraphHeads() .union(secondCollection.getGraphHeads()) .distinct(new Id<GraphHead>()); }
@Override protected DataSet<Vertex> computeNewVertices( DataSet<GraphHead> newGraphHeads) { return firstCollection.getVertices() .union(secondCollection.getVertices()) .distinct(new Id<Vertex>()); }
/** * {@inheritDoc} */ @Override protected DataSet<Vertex> computeNewVertices( DataSet<GraphHead> newGraphHeads) { return firstCollection.getVertices() .union(secondCollection.getVertices()) .distinct(new Id<Vertex>()); }
/** * {@inheritDoc} */ @Override protected DataSet<Edge> computeNewEdges(DataSet<Vertex> newVertices) { return firstCollection.getEdges() .union(secondCollection.getEdges()) .distinct(new Id<Edge>()); }
@Override protected DataSet<Edge> computeNewEdges(DataSet<Vertex> newVertices) { return firstCollection.getEdges() .union(secondCollection.getEdges()) .distinct(new Id<Edge>()); } }
/** * {@inheritDoc} */ @Override protected DataSet<GraphHead> computeNewGraphHeads() { return firstCollection.getGraphHeads() .union(secondCollection.getGraphHeads()) .distinct(new Id<GraphHead>()); }
/** * Creates a new logical graph by union the vertex and edge sets of two * input graphs. Vertex and edge equality is based on their respective * identifiers. * * @param firstGraph first input graph * @param secondGraph second input graph * @return combined graph */ @Override public LogicalGraph execute(LogicalGraph firstGraph, LogicalGraph secondGraph) { DataSet<Vertex> newVertexSet = firstGraph.getVertices() .union(secondGraph.getVertices()) .distinct(new Id<Vertex>()); DataSet<Edge> newEdgeSet = firstGraph.getEdges() .union(secondGraph.getEdges()) .distinct(new Id<Edge>()); return firstGraph.getConfig().getLogicalGraphFactory().fromDataSets(newVertexSet, newEdgeSet); }
/** * Performs union on the vertices and edges sets of the input graphs * removing duplicate vertices but maintaining duplicate edges. * * @param graph the graph to perform union with * @return a new graph */ public Graph<K, VV, EV> union(Graph<K, VV, EV> graph) { DataSet<Vertex<K, VV>> unionedVertices = graph .getVertices() .union(this.getVertices()) .name("Vertices") .distinct() .name("Vertices"); DataSet<Edge<K, EV>> unionedEdges = graph .getEdges() .union(this.getEdges()) .name("Edges"); return new Graph<>(unionedVertices, unionedEdges, this.context); }
/** * Returns the subgraph of the given supergraph that is induced by the * edges that fulfil the given filter function. * * @param superGraph supergraph * @return edge-induced subgraph */ private LG edgeInducedSubgraph(LG superGraph) { DataSet<E> filteredEdges = superGraph.getEdges().filter(edgeFilterFunction); DataSet<V> filteredVertices = filteredEdges .join(superGraph.getVertices()) .where(new SourceId<>()).equalTo(new Id<>()) .with(new RightSide<>()) .union(filteredEdges .join(superGraph.getVertices()) .where(new TargetId<>()).equalTo(new Id<>()) .with(new RightSide<>())) .distinct(new Id<>()); return superGraph.getFactory().fromDataSets(filteredVertices, filteredEdges); }
@Override public LogicalGraph sample(LogicalGraph graph) { DataSet<Edge> newEdges = graph.getEdges().filter(new RandomFilter<>(sampleSize, randomSeed)); DataSet<Vertex> newSourceVertices = graph.getVertices() .join(newEdges) .where(new Id<>()).equalTo(new SourceId<>()) .with(new LeftSide<>()) .distinct(new Id<>()); DataSet<Vertex> newTargetVertices = graph.getVertices() .join(newEdges) .where(new Id<>()).equalTo(new TargetId<>()) .with(new LeftSide<>()) .distinct(new Id<>()); DataSet<Vertex> newVertices = newSourceVertices.union(newTargetVertices).distinct(new Id<>()); return graph.getConfig().getLogicalGraphFactory().fromDataSets(newVertices, newEdges); } }
/** * {@inheritDoc} */ @Override public LogicalGraph sample(LogicalGraph graph) { DataSet<Edge> newEdges = graph.getEdges().filter(new RandomFilter<>(sampleSize, randomSeed)); DataSet<Vertex> newSourceVertices = graph.getVertices() .join(newEdges) .where(new Id<>()).equalTo(new SourceId<>()) .with(new LeftSide<>()) .distinct(new Id<>()); DataSet<Vertex> newTargetVertices = graph.getVertices() .join(newEdges) .where(new Id<>()).equalTo(new TargetId<>()) .with(new LeftSide<>()) .distinct(new Id<>()); DataSet<Vertex> newVertices = newSourceVertices.union(newTargetVertices).distinct(new Id<>()); return graph.getConfig().getLogicalGraphFactory().fromDataSets(newVertices, newEdges); }