@Override protected void testProgram() throws Exception { EnumTriangles.main(new String[] { "--edges", edgePath, "--output", resultPath }); }
@Override protected void testProgram() throws Exception { ConnectedComponents.main( "--vertices", verticesPath, "--edges", edgesPath, "--output", resultPath, "--iterations", "100"); }
@Override protected void testProgram() throws Exception { TransitiveClosureNaive.main( "--edges", edgesPath, "--output", resultPath, "--iterations", "5"); }
DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); .map(new ConnectedComponents.DuplicateValue<>()); .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .with(new ConnectedComponents.ComponentIdFilter());
final int numPages = params.getInt("numPages", PageRankData.getNumberOfPages()); final int maxIterations = params.getInt("iterations", 10); DataSet<Long> pagesInput = getPagesDataSet(env, params); DataSet<Tuple2<Long, Long>> linksInput = getLinksDataSet(env, params); map(new RankAssigner((1.0d / numPages))); linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList()); .join(adjacencyListInput).where(0).equalTo(0).flatMap(new JoinVertexWithEdgesMatch()) .map(new Dampener(DAMPENING_FACTOR, numPages)); newRanks.join(iteration).where(0).equalTo(0) .filter(new EpsilonFilter()));
DataSet<Long> vertices = getVertexDataSet(env, params); DataSet<Tuple2<Long, Long>> edges = getEdgeDataSet(env, params).flatMap(new UndirectEdge()); vertices.map(new DuplicateValue<Long>()); DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter());
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
map(new RankAssigner((1.0d / 10))); linksInput.groupBy(0).reduceGroup(new BuildOutgoingEdgeList()); .flatMap(new JoinVertexWithEdgesMatch()) .map(new Dampener(0.85, 10)); newRanks.join(iteration).where(0).equalTo(0) .filter(new EpsilonFilter()));
.includeFields(true, true) .types(Integer.class, Integer.class) .map(new TupleEdgeConverter()); } else { System.out.println("Executing EnumTriangles example with default edges data set."); System.out.println("Use --edges to specify file input."); edges = EnumTrianglesData.getDefaultEdgeDataSet(env); .map(new EdgeByIdProjector()); .groupBy(Edge.V1).sortGroup(Edge.V2, Order.ASCENDING).reduceGroup(new TriadBuilder()) .join(edgesById).where(Triad.V2, Triad.V3).equalTo(Edge.V1, Edge.V2).with(new TriadFilter());
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet() .join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
@Test public void testPageRankWithConvergenceCriterion() throws Exception { PageRank.main(new String[]{ "--pages", verticesPath, "--links", edgesPath, "--output", resultPath, "--numPages", PageRankData.NUM_VERTICES + "", "--vertices", "1000"}); expected = PageRankData.RANKS_AFTER_EPSILON_0_0001_CONVERGENCE; } }
private static void runConnectedComponents(ExecutionEnvironment env) throws Exception { env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); // read vertex and edge data DataSet<Long> vertices = ConnectedComponentsData.getDefaultVertexDataSet(env) .rebalance(); DataSet<Tuple2<Long, Long>> edges = ConnectedComponentsData.getDefaultEdgeDataSet(env) .rebalance() .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices .map(new ConnectedComponents.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, // update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); env.execute(); }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
.flatMap(new ConnectedComponents.UndirectEdge()); .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0)
@Test public void dumpBulkIterationKMeans() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { ConnectedComponents.main(new String[] { "--vertices", IN_FILE, "--edges", IN_FILE, "--output", OUT_FILE, "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("ConnectedComponents failed with an exception"); } dump(env.getPlan()); }
@Test public void testPageRankSmallNumberOfIterations() throws Exception { PageRank.main(new String[]{ "--pages", verticesPath, "--links", edgesPath, "--output", resultPath, "--numPages", PageRankData.NUM_VERTICES + "", "--iterations", "3"}); expected = PageRankData.RANKS_AFTER_3_ITERATIONS; }
@Test public void dumpBulkIterationKMeans() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { ConnectedComponents.main(new String[] { "--vertices", IN_FILE, "--edges", IN_FILE, "--output", OUT_FILE, "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("ConnectedComponents failed with an exception"); } dump(env.getPlan()); }
@Test public void dumpPageRank() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { PageRank.main(new String[]{ "--pages", IN_FILE, "--links", IN_FILE, "--output", OUT_FILE, "--numPages", "10", "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("PageRank failed with an exception"); } dump(env.getPlan()); }
@Test public void dumpPageRank() { // prepare the test environment PreviewPlanEnvironment env = new PreviewPlanEnvironment(); env.setAsContext(); try { // --pages <path> --links <path> --output <path> --numPages <n> --iterations <n> PageRank.main(new String[]{ "--pages", IN_FILE, "--links", IN_FILE, "--output", OUT_FILE, "--numPages", "10", "--iterations", "123"}); } catch (OptimizerPlanEnvironment.ProgramAbortException pae) { // all good. } catch (Exception e) { e.printStackTrace(); Assert.fail("PageRank failed with an exception"); } dump(env.getPlan()); }