@Override public Plan getPlan(String... args) { // parse job parameters int numSubTasks = (args.length > 0 ? Integer.parseInt(args[0]) : 1); String dataInput = (args.length > 1 ? args[1] : ""); String output = (args.length > 2 ? args[2] : ""); HadoopDataSource<LongWritable, Text> source = new HadoopDataSource<LongWritable, Text>( new TextInputFormat(), new JobConf(), "Input Lines"); TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput)); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines") .build(); ReduceOperator reducer = ReduceOperator.builder(CountWords.class, StringValue.class, 0) .input(mapper) .name("Count Words") .build(); HadoopDataSink<Text, IntWritable> out = new HadoopDataSink<Text, IntWritable>(new TextOutputFormat<Text, IntWritable>(),new JobConf(), "Hadoop TextOutputFormat", reducer, Text.class, IntWritable.class); TextOutputFormat.setOutputPath(out.getJobConf(), new Path(output)); Plan plan = new Plan(out, "Hadoop OutputFormat Example"); plan.setDefaultParallelism(numSubTasks); return plan; }
/** * The private constructor that only gets invoked from the Builder. * @param builder */ protected MapOperator(Builder builder) { super(builder.udf, OperatorInfoHelper.unary(), builder.name); if (builder.inputs != null && !builder.inputs.isEmpty()) { setInput(Operator.createUnionCascade(builder.inputs)); } setBroadcastVariables(builder.broadcastInputs); setSemanticProperties(FunctionAnnotation.readSingleConstantAnnotations(builder.udf)); }
/** * Creates and returns a MapOperator from using the values given * to the builder. * * @return The created operator */ public MapOperator build() { if (name == null) { name = udf.getUserCodeClass().getName(); } return new MapOperator(this); } }
@SuppressWarnings({ "deprecation", "unchecked" }) @Override protected JobGraph getJobGraph() throws Exception { String path1 = config.getBoolean("input1PathHasData", false) ? textInput : emptyInput; String path2 = config.getBoolean("input2PathHasData", false) ? textInput : emptyInput; FileDataSource input1 = new FileDataSource(new ContractITCaseInputFormat(), path1); FileDataSource input2 = new FileDataSource(new ContractITCaseInputFormat(), path2); MapOperator testMapper1 = MapOperator.builder(new TestMapper()).build(); MapOperator testMapper2 = MapOperator.builder(new TestMapper()).build(); FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultDir); testMapper1.setInput(input1); testMapper2.setInput(input2); output.addInput(testMapper1); output.addInput(testMapper2); Plan plan = new Plan(output); plan.setDefaultParallelism(4); PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
@Override protected Plan getTestJob() { FileDataSource input = new FileDataSource( new ContractITCaseInputFormat(), inPath); DelimitedInputFormat.configureDelimitedFormat(input) .recordDelimiter('\n'); input.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("MapTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.setInput(input); return new Plan(output); }
MapOperator filterO = MapOperator.builder(new FilterO()) .input(orders) .name("FilterO") .build(); filterO.setParameter(YEAR_FILTER, 1993); filterO.setParameter(PRIO_FILTER, "5"); filterO.getCompilerHints().setFilterFactor(0.05f);
@SuppressWarnings({ "deprecation", "unchecked" }) @Override protected Plan getTestJob() { String input1Path = config.getString("UnionTest#Input1Path", "").equals("empty") ? emptyInPath : inPath; String input2Path = config.getString("UnionTest#Input2Path", "").equals("empty") ? emptyInPath : inPath; FileDataSource input1 = new FileDataSource( new ContractITCaseInputFormat(), input1Path); DelimitedInputFormat.configureDelimitedFormat(input1) .recordDelimiter('\n'); input1.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSource input2 = new FileDataSource( new ContractITCaseInputFormat(), input2Path); DelimitedInputFormat.configureDelimitedFormat(input2) .recordDelimiter('\n'); input2.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); MapOperator testMapper = MapOperator.builder(new TestMapper()).build(); testMapper.setDegreeOfParallelism(config.getInteger("UnionTest#NoSubtasks", 1)); FileDataSink output = new FileDataSink( new ContractITCaseOutputFormat(), resultPath); output.setDegreeOfParallelism(1); output.setInput(testMapper); testMapper.addInput(input1); testMapper.addInput(input2); return new Plan(output); }
MapOperator filterDocs = MapOperator.builder(new FilterDocs()) .input(docs) .name("Filter Docs") .build(); filterDocs.getCompilerHints().setFilterFactor(0.15f); MapOperator filterRanks = MapOperator.builder(new FilterRanks()) .input(ranks) .name("Filter Ranks") .build(); filterRanks.getCompilerHints().setFilterFactor(0.25f); MapOperator filterVisits = MapOperator.builder(new FilterVisits()) .input(visits) .name("Filter Visits") .build(); filterVisits.getCompilerHints().setFilterFactor(0.2f);
@Override public Class<? extends Key<?>>[] getKeyClasses() { return emptyClassArray(); }
setSourceStatistics(ordersSource, orderSize, 100f); setSourceStatistics(lineItemSource, lineitemSize, 140f); mapper.getCompilerHints().setAvgOutputRecordSize(16f); mapper.getCompilerHints().setFilterFactor(orderSelectivity); joiner.getCompilerHints().setFilterFactor(joinSelectivity);
@Override protected JobGraph getJobGraph() throws Exception { // init data source FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath); // init (working) map task MapOperator testMapper = MapOperator.builder(TestMapper.class).build(); // init data sink FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); // compose working program output.setInput(testMapper); testMapper.setInput(input); // generate plan Plan plan = new Plan(output); plan.setDefaultParallelism(4); // optimize and compile plan PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); // return job graph of working job NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
MapOperator.builder(new LineItemFilter()) .name("LineItem Filter") .build(); lineItemFilter.setDegreeOfParallelism(this.degreeOfParallelism); .build(); lineItemFilter.setInput(lineItems); groupByReturnFlag.setInput(lineItemFilter); result.setInput(groupByReturnFlag);
MapOperator filterO1 = MapOperator.builder(new FilterO()) .name("FilterO") .input(orders1) .build(); filterO1.setParameter(TPCHQuery3.YEAR_FILTER, 1993); filterO1.setParameter(TPCHQuery3.PRIO_FILTER, "5"); filterO1.getCompilerHints().setFilterFactor(0.05f); MapOperator filterO2 = MapOperator.builder(new FilterO()) .name("FilterO") .input(orders2) .build(); filterO2.setParameter(TPCHQuery3.YEAR_FILTER, 1993); filterO2.setParameter(TPCHQuery3.PRIO_FILTER, "5");
TextInputFormat.addInputPath(source.getJobConf(), new Path(dataInput)); MapOperator mapper = MapOperator.builder(new TokenizeLine()) .input(source) .name("Tokenize Lines")
@Override protected JobGraph getFailingJobGraph() throws Exception { // init data source FileDataSource input = new FileDataSource(new ContractITCaseInputFormat(), inputPath); // init failing map task MapOperator testMapper = MapOperator.builder(FailingMapper.class).build(); // init data sink FileDataSink output = new FileDataSink(new ContractITCaseOutputFormat(), resultPath); // compose failing program output.setInput(testMapper); testMapper.setInput(input); // generate plan Plan plan = new Plan(output); plan.setDefaultParallelism(4); // optimize and compile plan PactCompiler pc = new PactCompiler(new DataStatistics()); OptimizedPlan op = pc.compile(plan); // return job graph of failing job NepheleJobGraphGenerator jgg = new NepheleJobGraphGenerator(); return jgg.compileJobGraph(op); }
MapOperator.builder(LiFilter.class) .name("LineItemFilter") .build(); lineFilter.setDegreeOfParallelism(degreeOfParallelism); MapOperator.builder(OFilter.class) .name("OrdersFilter") .build(); ordersFilter.setDegreeOfParallelism(degreeOfParallelism); aggregation.setDegreeOfParallelism(this.degreeOfParallelism); lineFilter.setInput(lineItems); ordersFilter.setInput(orders); join.setFirstInput(ordersFilter); join.setSecondInput(lineFilter);
public static void main(String[] args) throws Exception { GenericDataSource<UserGeneratingInputFormat> source = new GenericDataSource<UserGeneratingInputFormat>(UserGeneratingInputFormat.class); MapOperator mapper = MapOperator.builder(new NumberExtractingMapper()) .input(source).name("le mapper").build(); ReduceOperator reducer = ReduceOperator.builder(new ConcatenatingReducer(), IntValue.class, 1) .input(mapper).name("le reducer").build(); GenericDataSink sink = new GenericDataSink(PrintingOutputFormat.class, reducer); Plan p = new Plan(sink); p.setDefaultParallelism(4); LocalExecutor.execute(p); }
MapOperator mapO = MapOperator.builder(FilterO.class) .name("FilterO") .build(); MapOperator mapLi = MapOperator.builder(FilterLI.class) .name("FilterLi") .build(); MapOperator projectC = MapOperator.builder(ProjectC.class) .name("ProjectC") .build(); MapOperator projectN = MapOperator.builder(ProjectN.class) .name("ProjectN") .build(); joinOL.setSecondInput(mapLi); projectC.setInput(customers); projectN.setInput(nations); mapLi.setInput(lineitems); mapO.setInput(orders);
public void testMapPriorToFirstRecordReading() throws Exception { GenericDataSource<InfiniteIntegerInputFormat> source = new GenericDataSource<InfiniteIntegerInputFormat>( new InfiniteIntegerInputFormat(), "Source"); MapOperator mapper = MapOperator.builder(StuckInOpenIdentityMapper.class) .input(source) .name("Stuck-In-Open Mapper") .build(); GenericDataSink sink = new GenericDataSink(new DiscardingOutputFormat(), mapper, "Sink"); Plan p = new Plan(sink); p.setDefaultParallelism(4); runAndCancelJob(p, 10 * 1000, 10 * 1000); }
MapOperator filterPart = MapOperator.builder(PartFilter.class) .name("filterParts") .build(); MapOperator mapPartsupp = MapOperator.builder(PartsuppMap.class) .name("mapPartsupp") .build(); MapOperator mapOrder = MapOperator.builder(OrderMap.class) .name("mapOrder") .build(); MapOperator mapLineItem = MapOperator.builder(LineItemMap.class) .name("proj.Partsupp") .build(); MapOperator mapSupplier = MapOperator.builder(SupplierMap.class) .name("proj.Partsupp") .build(); filterPart.setInput(partInput); mapPartsupp.setInput(partSuppInput); mapOrder.setInput(ordersInput); mapLineItem.setInput(lineItemInput); mapSupplier.setInput(supplierInput);