FileDataSource pageWithRankInput = new FileDataSource(new DanglingPageRankInputFormat(), pageWithRankInputPath, "DanglingPageWithRankInput"); pageWithRankInput.getParameters().setLong(DanglingPageRankInputFormat.NUM_VERTICES_PARAMETER, numVertices); FileDataSource adjacencyListInput = new FileDataSource(new ImprovedAdjacencyListInputFormat(), adjacencyListInputPath, "AdjancencyListInput");
@Override public Plan getPlan(String... args) throws IllegalArgumentException { // parse job parameters final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); final String input = (args.length > 1 ? args[1] : ""); final String output = (args.length > 2 ? args[2] : ""); // This task will read the input data and generate the key/value pairs final FileDataSource source = new FileDataSource(new TeraInputFormat(), input, "Data Source"); source.setDegreeOfParallelism(numSubTasks); // This task writes the sorted data back to disk final FileDataSink sink = new FileDataSink(new TeraOutputFormat(), output, "Data Sink"); sink.setDegreeOfParallelism(numSubTasks); sink.setGlobalOrder(new Ordering(0, TeraKey.class, Order.ASCENDING), new TeraDistribution()); sink.setInput(source); return new Plan(sink, "TeraSort"); } }
static Plan getTestPlanPlan(int numSubTasks, String input, String output) { FileDataSource source = new FileDataSource(new TextInputFormat(), input, "Input Lines"); source.setParameter(TextInputFormat.CHARSET_NAME, "ASCII"); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); @SuppressWarnings("unchecked") FileDataSink out = new FileDataSink(new CsvOutputFormat("\n"," ", StringValue.class, IntValue.class), output, reducer, "Word Counts"); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
@Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); @SuppressWarnings("unchecked") FileDataSink out = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class, IntValue.class), output, reducer, "Word Counts"); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
/** * Creates a configuration builder that can be used to set the input format's parameters to the config in a fluent * fashion. * * @return A config builder for setting parameters. */ public static ConfigBuilder configureRecordFormat(FileDataSource target) { return new ConfigBuilder(target, target.getParameters()); }
FileDataSource initialSolutionSet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, DoubleValue.class), solutionSetInput, "Initial Solution Set"); FileDataSource initialDeltaSet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, DoubleValue.class), deltasInput, "Initial DeltaSet"); FileDataSource dependencySet = new FileDataSource(new CsvInputFormat(' ', LongValue.class, LongValue.class, LongValue.class), dependencySetInput, "Dependency Set");
@SuppressWarnings({ "deprecation", "unchecked" }) @Override protected Plan getTestJob() { String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath; String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath; FileDataSource input1 = new FileDataSource( new ContractITCaseInputFormat(), input1Path); DelimitedInputFormat.configureDelimitedFormat(input1) .recordDelimiter('\n'); input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSource input2 = new FileDataSource( new ContractITCaseInputFormat(), input2Path); DelimitedInputFormat.configureDelimitedFormat(input2) .recordDelimiter('\n'); input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.addInput(input1); testMapper.addInput(input2); return new Plan(output); }
@Override public Plan getPlan(String... args) { int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(new TextInputFormat(), dataInput, "Input Lines"); MapOperator mapper = MapOperator.builder(new TokenizeLine()).input(source).name("Tokenize Lines").build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0).input(mapper) .name("Count Words").build(); FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, reducer, "Word Counts"); CsvOutputFormat.configureRecordFormat(out).recordDelimiter('\n') .fieldDelimiter(' ').field(StringValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "WordCount Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
FileDataSource pageWithRankInput = new FileDataSource(new DanglingPageRankInputFormat(), pageWithRankInputPath, "PageWithRank Input"); pageWithRankInput.getParameters().setLong(NUM_VERTICES_CONFIG_PARAM, numVertices); FileDataSource adjacencyListInput = new FileDataSource(new ImprovedAdjacencyListInputFormat(), adjacencyListInputPath, "AdjancencyListInput");
FileDataSource edges = new FileDataSource(new EdgeInputFormat(), edgeInput, "Input Edges"); edges.setParameter(EdgeInputFormat.ID_DELIMITER_CHAR, delimiter);
@Override public Plan getPlan(String... args) throws IllegalArgumentException { // parse program parameters int numSubtasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String recordsPath = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(CsvInputFormat.class, recordsPath); source.setDegreeOfParallelism(numSubtasks); CsvInputFormat.configureRecordFormat(source) .recordDelimiter('\n') .fieldDelimiter('|') .field(IntValue.class, 0); FileDataSink sink = new FileDataSink(CsvOutputFormat.class, output); sink.setDegreeOfParallelism(numSubtasks); CsvOutputFormat.configureRecordFormat(sink) .recordDelimiter('\n') .fieldDelimiter('|') .lenient(true) .field(IntValue.class, 0); sink.setGlobalOrder(new Ordering(0, IntValue.class, Order.ASCENDING), new UniformIntegerDistribution(Integer.MIN_VALUE, Integer.MAX_VALUE)); sink.setInput(source); return new Plan(sink); }
@SuppressWarnings({ "deprecation", "unchecked" }) @Override protected JobGraph getJobGraph() throws Exception { String path1 = config.getBoolean("input1PathHasData", false) ? textInput : emptyInput; String path2 = config.getBoolean("input2PathHasData", false) ? textInput : emptyInput; FileDataSource input1 = new FileDataSource(new ContractITCaseInputFormat(), path1); FileDataSource input2 = new FileDataSource(new ContractITCaseInputFormat(), path2); MapOperator testMapper1 = MapOperator.builder(new TestMapper()).build(); MapOperator testMapper2 = MapOperator.builder(new TestMapper()).build(); FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultDir); testMapper1.setInput(input1); testMapper2.setInput(input2); output.addInput(testMapper1); output.addInput(testMapper2); Plan plan = new Plan(output); plan.setDefaultParallelism(4); PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
String output = args.length > 2 ? args[2] : ""; FileDataSource edges = new FileDataSource(new EdgeWithDegreesInputFormat(), edgeInput, "Input Edges with Degrees"); edges.setParameter(EdgeWithDegreesInputFormat.VERTEX_DELIMITER_CHAR, '|'); edges.setParameter(EdgeWithDegreesInputFormat.DEGREE_DELIMITER_CHAR, ',');
private static Plan getPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(new PointInFormat(), input, "Input"); initialInput.setDegreeOfParallelism(1); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(2); @SuppressWarnings("unchecked") MapOperator map2 = MapOperator.builder(new IdentityMapper()).input(iteration.getPartialSolution(), iteration.getPartialSolution()).name("map").build(); iteration.setNextPartialSolution(map2); FileDataSink finalResult = new FileDataSink(new PointOutFormat(), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with union test"); plan.setDefaultParallelism(numSubTasks); return plan; }
static Plan getTestPlanPlan(int numSubTasks, String input, String output) { FileDataSource initialInput = new FileDataSource(TextInputFormat.class, input, "input"); BulkIteration iteration = new BulkIteration("Loop"); iteration.setInput(initialInput); iteration.setMaximumNumberOfIterations(NUM_ITERATIONS); ReduceOperator sumReduce = ReduceOperator.builder(new SumReducer()) .input(iteration.getPartialSolution()) .name("Compute sum (Reduce)") .build(); iteration.setNextPartialSolution(sumReduce); @SuppressWarnings("unchecked") FileDataSink finalResult = new FileDataSink(new CsvOutputFormat("\n", " ", StringValue.class), output, iteration, "Output"); Plan plan = new Plan(finalResult, "Iteration with AllReducer (keyless Reducer)"); plan.setDefaultParallelism(numSubTasks); return plan; }
FileDataSource edges = new FileDataSource(new EdgeInputFormat(), edgeInput, "Input Edges"); edges.setParameter(EdgeInputFormat.ID_DELIMITER_CHAR, delimiter);
@Override protected Plan getTestJob() { FileDataSource input = new FileDataSource( new ContractITCaseInputFormat(), inPath); DelimitedInputFormat.configureDelimitedFormat(input) .recordDelimiter('\n'); input.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.setInput(input); return new Plan(output); }
@Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); @SuppressWarnings("unchecked") CsvInputFormat format = new CsvInputFormat(' ', IntValue.class, IntValue.class); FileDataSource input = new FileDataSource(format, dataInput, "Input"); // create the reduce contract and sets the key to the first field ReduceOperator sorter = ReduceOperator.builder(new IdentityReducer(), IntValue.class, 0) .input(input) .name("Reducer") .build(); // sets the group sorting to the second field sorter.setGroupOrder(new Ordering(1, IntValue.class, Order.ASCENDING)); // create and configure the output format FileDataSink out = new FileDataSink(new CsvOutputFormat(), output, sorter, "Sorted Output"); CsvOutputFormat.configureRecordFormat(out) .recordDelimiter('\n') .fieldDelimiter(' ') .field(IntValue.class, 0) .field(IntValue.class, 1); Plan plan = new Plan(out, "SecondarySort Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
@Override protected Plan getTestJob() { FileDataSource input_left = new FileDataSource(new CoGroupTestInFormat(), leftInPath); DelimitedInputFormat.configureDelimitedFormat(input_left) .recordDelimiter('\n'); input_left.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1)); FileDataSource input_right = new FileDataSource(new CoGroupTestInFormat(), rightInPath); DelimitedInputFormat.configureDelimitedFormat(input_right) .recordDelimiter('\n'); input_right.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1)); CoGroupOperator testCoGrouper = CoGroupOperator.builder(new TestCoGrouper(), StringValue.class, 0, 0) .build(); testCoGrouper.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1)); testCoGrouper.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY, config.getString("CoGroupTest#LocalStrategy", "")); testCoGrouper.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY, config.getString("CoGroupTest#ShipStrategy", "")); FileDataSink output = new FileDataSink(new CoGroupOutFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testCoGrouper); testCoGrouper.setFirstInput(input_left); testCoGrouper.setSecondInput(input_right); return new Plan(output); }
FileDataSource source = new FileDataSource(new CsvInputFormat(',', IntValue.class, IntValue.class, IntValue.class), recordsPath);