@Override protected void testProgram() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20).map(new Dupl()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 20, 0); iter.closeWith(iter.getWorkset(), iter.getWorkset()) .output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result)); env.execute(); }
@Override protected void testProgram() throws Exception { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataSet<Tuple2<Long, Long>> input = env.generateSequence(0, 9).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 5, 1); iteration.closeWith(iteration.getWorkset(), iteration.getWorkset().map(new TestMapper())) .output(new LocalCollectionOutputFormat<Tuple2<Long, Long>>(result)); env.execute(); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testEmptyWorksetIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(43); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20) .map(new MapFunction<Long, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(Long value){ return null; } }); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0); iter.closeWith(iter.getWorkset(), iter.getWorkset()) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); new JobGraphGenerator().compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testSinkOnWorksetDeltaIteration() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); @SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> input = env.fromElements(new Tuple2<Long, Long>(0L,0L)); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 10, 0); DataSet<Tuple2<Long, Long>> mapped = iteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()); mapped.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); try { env.createProgramPlan(); fail("should throw an exception"); } catch (InvalidProgramException e) { // expected } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
@Test public void testWorksetIterationNotDependingOnSolutionSet() { try { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 100).map(new Duplicator<Long>()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 100, 1); DataSet<Tuple2<Long, Long>> iterEnd = iteration.getWorkset().map(new TestMapper<Tuple2<Long,Long>>()); iteration.closeWith(iterEnd, iterEnd) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan p = env.createProgramPlan(); OptimizedPlan op = compileNoStats(p); WorksetIterationPlanNode wipn = (WorksetIterationPlanNode) op.getDataSinks().iterator().next().getInput().getSource(); assertTrue(wipn.getSolutionSetPlanNode().getOutgoingChannels().isEmpty()); JobGraphGenerator jgg = new JobGraphGenerator(); jgg.compileJobGraph(op); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
private <D, W> DeltaIterationBase<D, W> translateDeltaIteration(DeltaIterationResultSet<?, ?> untypedIterationEnd) { @SuppressWarnings("unchecked") DeltaIterationResultSet<D, W> iterationEnd = (DeltaIterationResultSet<D, W>) untypedIterationEnd; DeltaIteration<D, W> iterationHead = iterationEnd.getIterationHead(); String name = iterationHead.getName() == null ? "Unnamed Delta Iteration" : iterationHead.getName(); DeltaIterationBase<D, W> iterationOperator = new DeltaIterationBase<>(new BinaryOperatorInformation<>(iterationEnd.getType(), iterationEnd.getWorksetType(), iterationEnd.getType()), iterationEnd.getKeyPositions(), name); iterationOperator.setMaximumNumberOfIterations(iterationEnd.getMaxIterations()); if (iterationHead.getParallelism() > 0) { iterationOperator.setParallelism(iterationHead.getParallelism()); } DeltaIteration.SolutionSetPlaceHolder<D> solutionSetPlaceHolder = iterationHead.getSolutionSet(); DeltaIteration.WorksetPlaceHolder<W> worksetPlaceHolder = iterationHead.getWorkset(); translated.put(solutionSetPlaceHolder, iterationOperator.getSolutionSet()); translated.put(worksetPlaceHolder, iterationOperator.getWorkset()); Operator<D> translatedSolutionSet = translate(iterationEnd.getNextSolutionSet()); Operator<W> translatedWorkset = translate(iterationEnd.getNextWorkset()); iterationOperator.setNextWorkset(translatedWorkset); iterationOperator.setSolutionSetDelta(translatedSolutionSet); iterationOperator.setInitialSolutionSet(translate(iterationHead.getInitialSolutionSet())); iterationOperator.setInitialWorkset(translate(iterationHead.getInitialWorkset())); // register all aggregators iterationOperator.getAggregators().addAll(iterationHead.getAggregators()); iterationOperator.setSolutionSetUnManaged(iterationHead.isSolutionSetUnManaged()); return iterationOperator; }
DataSet<Tuple2<Long, Long>> inFirst = firstIteration.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()); DataSet<Tuple2<Long, Long>> joined = mainIteration.getWorkset().join(firstResult).where(0).equalTo(0) .projectFirst(0).projectSecond(0);
@Test public void testAggregatorWithParameterForIterateDelta() throws Exception { /* * Test aggregator with parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregatorWithParameter(4); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(result, expected); }
@Test public void testAggregatorWithoutParameterForIterateDelta() throws Exception { /* * Test aggregator without parameter for iterateDelta */ final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); DataSet<Tuple2<Integer, Integer>> initialSolutionSet = CollectionDataSets.getIntegerDataSet(env).map(new TupleMakerMap()); DeltaIteration<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> iteration = initialSolutionSet.iterateDelta( initialSolutionSet, MAX_ITERATIONS, 0); // register aggregator LongSumAggregator aggr = new LongSumAggregator(); iteration.registerAggregator(NEGATIVE_ELEMENTS_AGGR, aggr); DataSet<Tuple2<Integer, Integer>> updatedDs = iteration.getWorkset().map(new AggregateMapDelta()); DataSet<Tuple2<Integer, Integer>> newElements = updatedDs.join(iteration.getSolutionSet()) .where(0).equalTo(0).flatMap(new UpdateFilter()); DataSet<Tuple2<Integer, Integer>> iterationRes = iteration.closeWith(newElements, newElements); List<Integer> result = iterationRes.map(new ProjectSecondMapper()).collect(); Collections.sort(result); List<Integer> expected = Arrays.asList(1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5); assertEquals(expected, result); }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Long> vertices = env.fromElements(ConnectedComponentsData.getEnumeratingVertices(NUM_VERTICES).split("\n")) .map(new VertexParser()); DataSet<Tuple2<Long, Long>> edges = env.fromElements(ConnectedComponentsData.getRandomOddEvenEdges(NUM_EDGES, NUM_VERTICES, SEED).split("\n")) .flatMap(new EdgeParser()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration .getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .coGroup(iteration.getSolutionSet()).where(0).equalTo(0) .with(new MinIdAndUpdate()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); // emit result List<Tuple2<Long, Long>> resutTuples = new ArrayList<>(); result.output(new LocalCollectionOutputFormat<>(resutTuples)); env.execute(); }
DataSet<Tuple2<Long, Long>> next = iteration.getWorkset().map(new IdentityMapper<Tuple2<Long,Long>>());
public static void connectedComponentsWithCoGroup(String[] args) throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(Integer.parseInt(args[0])); DataSet<Tuple1<Long>> initialVertices = env.readCsvFile(args[1]).types(Long.class).name(VERTEX_SOURCE); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(args[2]).types(Long.class, Long.class).name(EDGES_SOURCE); DataSet<Tuple2<Long, Long>> verticesWithId = initialVertices.flatMap(new DummyMapFunction()); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithId.iterateDelta(verticesWithId, Integer.parseInt(args[4]), 0).name(ITERATION_NAME); DataSet<Tuple2<Long, Long>> joinWithNeighbors = iteration.getWorkset().join(edges) .where(0).equalTo(0) .with(new DummyJoinFunction()).name(JOIN_NEIGHBORS_MATCH); DataSet<Tuple2<Long, Long>> minAndUpdate = joinWithNeighbors.coGroup(iteration.getSolutionSet()) .where(0).equalTo(0) .with(new DummyCoGroupFunction()).name(MIN_ID_AND_UPDATE); iteration.closeWith(minAndUpdate, minAndUpdate).writeAsCsv(args[3]).name(SINK); env.execute(); }
@Test(expected = InvalidProgramException.class) public void testRangePartitionInIteration() throws Exception { // does not apply for collection execution if (super.mode == TestExecutionMode.COLLECTION) { throw new InvalidProgramException("Does not apply for collection execution"); } final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSource<Long> source = env.generateSequence(0, 10000); DataSet<Tuple2<Long, String>> tuples = source.map(new MapFunction<Long, Tuple2<Long, String>>() { @Override public Tuple2<Long, String> map(Long v) throws Exception { return new Tuple2<>(v, Long.toString(v)); } }); DeltaIteration<Tuple2<Long, String>, Tuple2<Long, String>> it = tuples.iterateDelta(tuples, 10, 0); DataSet<Tuple2<Long, String>> body = it.getWorkset() .partitionByRange(1) // Verify that range partition is not allowed in iteration .join(it.getSolutionSet()) .where(0).equalTo(0).projectFirst(0).projectSecond(1); DataSet<Tuple2<Long, String>> result = it.closeWith(body, body); result.collect(); // should fail }
@Test public void testDeltaConnectedComponentsWithParametrizableConvergence() throws Exception { // name of the aggregator that checks for convergence final String updatedElements = "updated.elements.aggr"; // the iteration stops if less than this number of elements change value final long convergenceThreshold = 3; final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> initialSolutionSet = env.fromCollection(verticesInput); DataSet<Tuple2<Long, Long>> edges = env.fromCollection(edgesInput); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = initialSolutionSet.iterateDelta(initialSolutionSet, 10, 0); // register the convergence criterion iteration.registerAggregationConvergenceCriterion(updatedElements, new LongSumAggregator(), new UpdatedElementsConvergenceCriterion(convergenceThreshold)); DataSet<Tuple2<Long, Long>> verticesWithNewComponents = iteration.getWorkset().join(edges).where(0).equalTo(0) .with(new NeighborWithComponentIDJoin()) .groupBy(0).min(1); DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(iteration.getSolutionSet()).where(0).equalTo(0) .flatMap(new MinimumIdFilter(updatedElements)); List<Tuple2<Long, Long>> result = iteration.closeWith(updatedComponentId, updatedComponentId).collect(); Collections.sort(result, new TestBaseUtils.TupleComparator<Tuple2<Long, Long>>()); assertEquals(expectedResult, result); }
public static DataSet<Tuple2<Long, Double>> constructPlan(DataSet<Tuple2<Long, Double>> initialData, int numIterations) { DeltaIteration<Tuple2<Long, Double>, Tuple2<Long, Double>> iteration = initialData.iterateDelta(initialData, numIterations, 0); DataSet<Tuple2<Long, Double>> delta = iteration.getSolutionSet() .join(iteration.getWorkset().flatMap(new Duplicator())).where(0).equalTo(0).with(new SummingJoin()).name(JOIN_1) .groupBy(0).aggregate(Aggregations.MIN, 1).map(new Expander()) .join(iteration.getSolutionSet()).where(0).equalTo(0).with(new SummingJoinProject()).name(JOIN_2); DataSet<Tuple2<Long, Double>> changes = delta.groupBy(0).aggregate(Aggregations.SUM, 1); DataSet<Tuple2<Long, Double>> result = iteration.closeWith(delta, changes); return result; }
@Test public void testTempInIterationTest() throws Exception { ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); DataSet<Tuple2<Long, Long>> input = env.readCsvFile("file:///does/not/exist").types(Long.class, Long.class); DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = input.iterateDelta(input, 1, 0); DataSet<Tuple2<Long, Long>> update = iteration.getWorkset() .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new DummyFlatJoinFunction<Tuple2<Long, Long>>()); iteration.closeWith(update, update) .output(new DiscardingOutputFormat<Tuple2<Long, Long>>()); Plan plan = env.createProgramPlan(); OptimizedPlan oPlan = (new Optimizer(new Configuration())).compile(plan); JobGraphGenerator jgg = new JobGraphGenerator(); JobGraph jg = jgg.compileJobGraph(oPlan); boolean solutionSetUpdateChecked = false; for(JobVertex v : jg.getVertices()) { if(v.getName().equals("SolutionSet Delta")) { // check if input of solution set delta is temped TaskConfig tc = new TaskConfig(v.getConfiguration()); assertTrue(tc.isInputAsynchronouslyMaterialized(0)); solutionSetUpdateChecked = true; } } assertTrue(solutionSetUpdateChecked); }
public static DataSet<Tuple2<Long, Long>> doDeltaIteration(DataSet<Tuple2<Long, Long>> vertices, DataSet<Tuple2<Long, Long>> edges) { DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> depIteration = vertices.iterateDelta(vertices, 100, 0); DataSet<Tuple1<Long>> candidates = depIteration.getWorkset().join(edges).where(0).equalTo(0) .projectSecond(1); DataSet<Tuple1<Long>> grouped = candidates.groupBy(0).reduceGroup(new Reduce101()); DataSet<Tuple2<Long, Long>> candidatesDependencies = grouped.join(edges).where(0).equalTo(1).projectSecond(0, 1); DataSet<Tuple2<Long, Long>> verticesWithNewComponents = candidatesDependencies.join(depIteration.getSolutionSet()).where(0).equalTo(0) .with(new Join222()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedComponentId = verticesWithNewComponents.join(depIteration.getSolutionSet()).where(0).equalTo(0) .flatMap(new FlatMapJoin()); DataSet<Tuple2<Long, Long>> depResult = depIteration.closeWith(updatedComponentId, updatedComponentId); return depResult; }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ConnectedComponents.ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new ConnectedComponents.UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> minNeighbor = iteration.getWorkset() .join(edges).where(0).equalTo(0).with(new ConnectedComponents.NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1); DataSet<Tuple2<Long, Long>> updatedIds = iteration.getSolutionSet() .join(minNeighbor).where(0).equalTo(0).with(new UpdateComponentIdMatchMirrored()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(updatedIds, updatedIds); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }
@Override protected void testProgram() throws Exception { // set up execution environment ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment(); // read vertex and edge data DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class); DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class) .flatMap(new UndirectEdge()); // assign the initial components (equal to the vertex id) DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>()); // open a delta iteration DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration = verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0); iteration.setSolutionSetUnManaged(true); // apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin()) .groupBy(0).aggregate(Aggregations.MIN, 1) .join(iteration.getSolutionSet()).where(0).equalTo(0) .with(new ComponentIdFilter()); // close the delta iteration (delta and new workset are identical) DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes); result.writeAsCsv(resultPath, "\n", " "); // execute program env.execute("Connected Components Example"); }