/** * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name, * writing to the file indicated by the given path. It uses the given contract as its input. * * @param f The {@link FileOutputFormat} implementation used to encode the data. * @param filePath The path to the file to write the contents to. * @param input The contract to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. */ public FileDataSink(Class<? extends FileOutputFormat<Record>> f, String filePath, Operator<Record> input, String name) { this(f, filePath, name); setInput(input); }
/** * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation the default name, * writing to the file indicated by the given path. It uses the given contract as its input. * * @param f The {@link FileOutputFormat} implementation used to encode the data. * @param filePath The path to the file to write the contents to. * @param input The contract to use as the input. */ public FileDataSink(FileOutputFormat<Record> f, String filePath, Operator<Record> input) { this(f, filePath); setInput(input); }
/** * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name, * writing to the file indicated by the given path. It uses the given contract as its input. * * @param f The {@link FileOutputFormat} implementation used to encode the data. * @param filePath The path to the file to write the contents to. * @param input The contract to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. */ public FileDataSink(FileOutputFormat<Record> f, String filePath, Operator<Record> input, String name) { this(f, filePath, name); setInput(input); }
/** * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name, * writing to the file indicated by the given path. It uses the given contracts as its input. * * @param f The {@link FileOutputFormat} implementation used to encode the data. * @param filePath The path to the file to write the contents to. * @param input The contracts to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. * @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead. */ @Deprecated public FileDataSink(FileOutputFormat<Record> f, String filePath, List<Operator<Record>> input, String name) { this(f, filePath, name); Validate.notNull(input, "The input must not be null."); setInput(Operator.createUnionCascade(input)); }
/** * Creates a FileDataSink with the provided {@link FileOutputFormat} implementation and the given name, * writing to the file indicated by the given path. It uses the given contracts as its input. * * @param f The {@link FileOutputFormat} implementation used to encode the data. * @param filePath The path to the file to write the contents to. * @param input The contracts to use as the input. * @param name The given name for the sink, used in plans, logs and progress messages. * @deprecated This method will be removed in future versions. Use the {@link eu.stratosphere.api.common.operators.Union} operator instead. */ @Deprecated public FileDataSink(Class<? extends FileOutputFormat<Record>> f, String filePath, List<Operator<Record>> input, String name) { this(f, filePath, name); Validate.notNull(input, "The inputs must not be null."); setInput(Operator.createUnionCascade(input)); }
@Override public Plan getPlan(String... args) throws IllegalArgumentException { // parse job parameters final int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); final String input = (args.length > 1 ? args[1] : ""); final String output = (args.length > 2 ? args[2] : ""); // This task will read the input data and generate the key/value pairs final FileDataSource source = new FileDataSource(new TeraInputFormat(), input, "Data Source"); source.setDegreeOfParallelism(numSubTasks); // This task writes the sorted data back to disk final FileDataSink sink = new FileDataSink(new TeraOutputFormat(), output, "Data Sink"); sink.setDegreeOfParallelism(numSubTasks); sink.setGlobalOrder(new Ordering(0, TeraKey.class, Order.ASCENDING), new TeraDistribution()); sink.setInput(source); return new Plan(sink, "TeraSort"); } }
@Override public Plan getPlan(String... args) throws IllegalArgumentException { // parse program parameters int numSubtasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String recordsPath = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); FileDataSource source = new FileDataSource(CsvInputFormat.class, recordsPath); source.setDegreeOfParallelism(numSubtasks); CsvInputFormat.configureRecordFormat(source) .recordDelimiter('\n') .fieldDelimiter('|') .field(IntValue.class, 0); FileDataSink sink = new FileDataSink(CsvOutputFormat.class, output); sink.setDegreeOfParallelism(numSubtasks); CsvOutputFormat.configureRecordFormat(sink) .recordDelimiter('\n') .fieldDelimiter('|') .lenient(true) .field(IntValue.class, 0); sink.setGlobalOrder(new Ordering(0, IntValue.class, Order.ASCENDING), new UniformIntegerDistribution(Integer.MIN_VALUE, Integer.MAX_VALUE)); sink.setInput(source); return new Plan(sink); }
.appendOrdering(2, IntValue.class, Order.DESCENDING), new TripleIntDistribution(Order.DESCENDING, Order.ASCENDING, Order.DESCENDING)); sink.setInput(source);
@SuppressWarnings({ "deprecation", "unchecked" }) @Override protected Plan getTestJob() { String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath; String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath; FileDataSource input1 = new FileDataSource( new ContractITCaseInputFormat(), input1Path); DelimitedInputFormat.configureDelimitedFormat(input1) .recordDelimiter('\n'); input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSource input2 = new FileDataSource( new ContractITCaseInputFormat(), input2Path); DelimitedInputFormat.configureDelimitedFormat(input2) .recordDelimiter('\n'); input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.addInput(input1); testMapper.addInput(input2); return new Plan(output); }
@Override protected Plan getTestJob() { FileDataSource input = new FileDataSource( new ContractITCaseInputFormat(), inPath); DelimitedInputFormat.configureDelimitedFormat(input) .recordDelimiter('\n'); input.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.setInput(input); return new Plan(output); }
result.setInput(groupByReturnFlag);
@Override protected JobGraph getJobGraph() throws Exception { // init data source FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath); // init (working) map task MapOperator testMapper = MapOperator.builder(TestMapper.class).build(); // init data sink FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); // compose working program output.setInput(testMapper); testMapper.setInput(input); // generate plan Plan plan = new Plan(output); plan.setDefaultParallelism(4); // optimize and compile plan PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); // return job graph of working job NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
@Override protected JobGraph getFailingJobGraph() throws Exception { // init data source FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath); // init failing map task MapOperator testMapper = MapOperator.builder(FailingMapper.class).build(); // init data sink FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); // compose failing program output.setInput(testMapper); testMapper.setInput(input); // generate plan Plan plan = new Plan(output); plan.setDefaultParallelism(4); // optimize and compile plan PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); // return job graph of failing job NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
result.setDegreeOfParallelism(numSubTasks); result.setInput(findShortestPaths); findShortestPaths.setFirstInput(pathsInput); findShortestPaths.setSecondInput(concatPaths);
.field(StringValue.class, 2); triangles.setInput(closeTriads); closeTriads.setSecondInput(edges); closeTriads.setFirstInput(buildTriads);
output.setDegreeOfParallelism(1); output.setInput(testCross); testCross.setFirstInput(input_left); testCross.setSecondInput(input_right);
output.setDegreeOfParallelism(1); output.setInput(testMatcher); testMatcher.setFirstInput(input_left); testMatcher.setSecondInput(input_right);
@Override protected JobGraph getJobGraph() throws Exception { FileDataSource input = new FileDataSource( new ContractITCaseInputFormat(), inPath); DelimitedInputFormat.configureDelimitedFormat(input) .recordDelimiter('\n'); input.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1)); ReduceOperator testReducer = ReduceOperator.builder(new TestReducer(), StringValue.class, 0) .build(); testReducer.setDegreeOfParallelism(config.getInteger("ReduceTest#NoSubtasks", 1)); testReducer.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY, config.getString("ReduceTest#LocalStrategy", "")); testReducer.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY, config.getString("ReduceTest#ShipStrategy", "")); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testReducer); testReducer.setInput(input); Plan plan = new Plan(output); PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
@Override protected Plan getTestJob() { FileDataSource input_left = new FileDataSource(new CoGroupTestInFormat(), leftInPath); DelimitedInputFormat.configureDelimitedFormat(input_left) .recordDelimiter('\n'); input_left.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1)); FileDataSource input_right = new FileDataSource(new CoGroupTestInFormat(), rightInPath); DelimitedInputFormat.configureDelimitedFormat(input_right) .recordDelimiter('\n'); input_right.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1)); CoGroupOperator testCoGrouper = CoGroupOperator.builder(new TestCoGrouper(), StringValue.class, 0, 0) .build(); testCoGrouper.setDegreeOfParallelism(config.getInteger("CoGroupTest#NoSubtasks", 1)); testCoGrouper.getParameters().setString(PactCompiler.HINT_LOCAL_STRATEGY, config.getString("CoGroupTest#LocalStrategy", "")); testCoGrouper.getParameters().setString(PactCompiler.HINT_SHIP_STRATEGY, config.getString("CoGroupTest#ShipStrategy", "")); FileDataSink output = new FileDataSink(new CoGroupOutFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testCoGrouper); testCoGrouper.setFirstInput(input_left); testCoGrouper.setSecondInput(input_right); return new Plan(output); }
@Override protected Plan getTestJob() { // Sc1 generates M parameters a,b,c for second degree polynomials P(x) = ax^2 + bx + c identified by id FileDataSource sc1 = new FileDataSource(new CsvInputFormat(), sc1Path); CsvInputFormat.configureRecordFormat(sc1).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1) .field(IntValue.class, 2).field(IntValue.class, 3); // Sc2 generates N x values to be evaluated with the polynomial identified by id FileDataSource sc2 = new FileDataSource(new CsvInputFormat(), sc2Path); CsvInputFormat.configureRecordFormat(sc2).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1); // Sc3 generates N y values to be evaluated with the polynomial identified by id FileDataSource sc3 = new FileDataSource(new CsvInputFormat(), sc3Path); CsvInputFormat.configureRecordFormat(sc3).fieldDelimiter(' ').field(StringValue.class, 0).field(IntValue.class, 1); // Jn1 matches x and y values on id and emits (id, x, y) triples JoinOperator jn1 = JoinOperator.builder(Jn1.class, StringValue.class, 0, 0).input1(sc2).input2(sc3).build(); // Jn2 matches polynomial and arguments by id, computes p = min(P(x),P(y)) and emits (id, p) tuples JoinOperator jn2 = JoinOperator.builder(Jn2.class, StringValue.class, 0, 0).input1(jn1).input2(sc1).build(); // Mp1 selects (id, x, y) triples where x = y and broadcasts z (=x=y) to Mp2 MapOperator mp1 = MapOperator.builder(Mp1.class).input(jn1).build(); // Mp2 filters out all p values which can be divided by z MapOperator mp2 = MapOperator.builder(Mp2.class).setBroadcastVariable("z", mp1).input(jn2).build(); FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(mp2); return new Plan(output); }