private static Plan getPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input"); initialInput.setDegreeOfParallelism(1); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(2); @SuppressWarnings("unchecked") MapOperator map2 = MapOperator.builder(new IdentityMapper()).input(iteration.getPartialSolution(), iteration.getPartialSolution()).name("map").build(); iteration.setNextPartialSolution(map2); FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with union test"); plan.setDefaultParallelism(numSubTasks); return plan; }
@Override public boolean preVisit(Operator<?> visitable) { if (this.seen.add(visitable)) { // add to the map final String name = visitable.getName(); List<Operator<?>> list = this.map.get(name); if (list == null) { list = new ArrayList<Operator<?>>(2); this.map.put(name, list); } list.add(visitable); // recurse into bulk iterations if (visitable instanceof BulkIteration) { ((BulkIteration) visitable).getNextPartialSolution().accept(this); } else if (visitable instanceof DeltaIteration) { ((DeltaIteration) visitable).getSolutionSetDelta().accept(this); ((DeltaIteration) visitable).getNextWorkset().accept(this); } return true; } else { return false; } }
static Plan getTestPlanPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input"); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(NUM_ITERATIONS); ReduceOperator sumReduce = ReduceOperator.builder(new SumReducer()) .input(iteration.getPartialSolution()) .name("Compute sum (Reduce)") .build(); iteration.setNextPartialSolution(sumReduce); @SuppressWarnings("unchecked") FileDataSink finalResult = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)"); plan.setDefaultParallelism(numSubTasks); return plan; }
private static Plan getTestPlanPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input"); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(5); Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1); ReduceOperator sumReduce = ReduceOperator.builder(new PickOneReducer()) .input(iteration.getPartialSolution()) .name("Compute sum (Reduce)") .build(); iteration.setNextPartialSolution(sumReduce); FileDataSink finalResult = new FileDataSink(CsvOutputFormat.class, output, iteration, "Output"); CsvOutputFormat.configureRecordFormat(finalResult) .recordDelimiter('\n') .fieldDelimiter(' ') .field(StringValue.class, 0); Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)"); plan.setDefaultParallelism(numSubTasks); Assert.assertTrue(plan.getDefaultParallelism() > 1); return plan; }
BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(5); Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1); .input(iteration.getPartialSolution()) .name("Compute sum (Reduce)") .build(); iteration.setNextPartialSolution(sumReduce); .input(iteration.getPartialSolution()) .name("Compute termination criterion (Map)") .build(); iteration.setTerminationCriterion(terminationMapper);
BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(5); Assert.assertTrue(iteration.getMaximumNumberOfIterations() > 1); .input(iteration.getPartialSolution()) .name("Compute sum (Reduce)") .build(); iteration.setNextPartialSolution(sumReduce); .build(); iteration.setTerminationCriterion(terminationMapper);
static Plan getTestPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input"); initialInput.setDegreeOfParallelism(1); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(2); ReduceOperator dummyReduce = ReduceOperator.builder(new DummyReducer(), IntValue.class, 0) .input(iteration.getPartialSolution()) .name("Reduce something") .build(); MapOperator dummyMap = MapOperator.builder(new IdentityMapper()).input(dummyReduce).build(); iteration.setNextPartialSolution(dummyMap); FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with chained map test"); plan.setDefaultParallelism(numSubTasks); return plan; } }
pageWithRankInput.getParameters().setLong(DanglingPageRankInputFormat.NUM_VERTICES_PARAMETER, numVertices); BulkIteration iteration = new BulkIteration("Page Rank Loop"); iteration.setInput(pageWithRankInput); .input1(iteration.getPartialSolution()) .input2(adjacencyListInput) .name("Join with Edges") .input1(iteration.getPartialSolution()) .input2(join) .name("Rank Aggregation") rankAggregation.getParameters().setLong(DotProductCoGroup.NUM_DANGLING_VERTICES_PARAMETER, numDanglingVertices); iteration.setNextPartialSolution(rankAggregation); iteration.setMaximumNumberOfIterations(numIterations); iteration.getAggregators().registerAggregationConvergenceCriterion(DotProductCoGroup.AGGREGATOR_NAME, PageRankStatsAggregator.class, DiffL1NormConvergenceCriterion.class);
pageWithRankInput.getParameters().setLong(NUM_VERTICES_CONFIG_PARAM, numVertices); BulkIteration iteration = new BulkIteration("Page Rank Loop"); iteration.setInput(pageWithRankInput); .input1(iteration.getPartialSolution()) .input2(adjacencyListInput) .name("Join with Edges") .build(); iteration.setNextPartialSolution(rankAggregation); iteration.setMaximumNumberOfIterations(numIterations); .input1(iteration.getPartialSolution()) .input2(rankAggregation) .name("Join Old and New") .build(); iteration.setTerminationCriterion(termination);
BulkIteration iter = new BulkIteration("k-means loop"); iter.setInput(clusterPoints); iter.setMaximumNumberOfIterations(numIterations); .setBroadcastVariable("centers", iter.getPartialSolution()) .input(dataPoints) .name("Find Nearest Centers") .build(); iter.setNextPartialSolution(recomputeClusterCenter);
initialClusterPoints.setDegreeOfParallelism(1); BulkIteration iteration = new BulkIteration("K-Means Loop"); iteration.setInput(initialClusterPoints); iteration.setMaximumNumberOfIterations(numIterations); .input2(iteration.getPartialSolution()) .name("Compute Distances") .build(); .name("Recompute Center Positions") .build(); iteration.setNextPartialSolution(recomputeClusterCenter);