@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n")) .map(new VertexParser()); DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")) .flatMap(new EdgeParser()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration .getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); // emit result List<Tuple2<Long, Long>> resutTuples = new ArrayList<>(); result.output(new LocalCollectionOutputFormat<>(resutTuples)); env.execute(); }
vertices.map(new DuplicateValue<Long>());
vertices.map(new DuplicateValue<Long>());
vertices.map(new DuplicateValue<Long>());
vertices.map(new DuplicateValue<Long>());
DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>());
.map(new ConnectedComponents.DuplicateValue<>());
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); env.execute(); }