private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(UHCDictionaryReducer.class); job.setPartitionerClass(UHCDictionaryPartitioner.class); job.setNumReduceTasks(numberOfReducers); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException { FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance()); int numberOfReducers = reducerMapping.getTotalReducerNum(); logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers); if (numberOfReducers > 250) { throw new IllegalArgumentException( "The max reducer number for FactDistinctColumnsJob is 250, but now it is " + numberOfReducers + ", decrease 'kylin.engine.mr.uhc-reducer-count'"); } job.setReducerClass(FactDistinctColumnsReducer.class); job.setPartitionerClass(FactDistinctColumnPartitioner.class); job.setNumReduceTasks(numberOfReducers); // make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, output);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, output);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
private void setOutputFormat(org.apache.hadoop.mapreduce.Job job) { // the OutputFormat we report to Hadoop is always PigOutputFormat which // can be wrapped with LazyOutputFormat provided if PigConfiguration.PIG_OUTPUT_LAZY is set if ("true".equalsIgnoreCase(job.getConfiguration().get(PigConfiguration.PIG_OUTPUT_LAZY))) { LazyOutputFormat.setOutputFormatClass(job,PigOutputFormatTez.class); } else { job.setOutputFormatClass(PigOutputFormatTez.class); } }
public static void setOutputFormat(org.apache.hadoop.mapreduce.Job job) { // the OutputFormat we report to Hadoop is always PigOutputFormat which // can be wrapped with LazyOutputFormat provided if PigConfiguration.PIG_OUTPUT_LAZY is set if ("true".equalsIgnoreCase(job.getConfiguration().get(PigConfiguration.PIG_OUTPUT_LAZY))) { LazyOutputFormat.setOutputFormatClass(job,PigOutputFormat.class); } else { job.setOutputFormatClass(PigOutputFormat.class); } }
private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(UHCDictionaryReducer.class); job.setPartitionerClass(UHCDictionaryPartitioner.class); job.setNumReduceTasks(numberOfReducers); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception { Job job = Job.getInstance(conf, "Test-Lazy-Output"); FileInputFormat.setInputPaths(job, INPUT); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(TestMapReduceLazyOutput.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(numReducers); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (createLazily) { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); } else { job.setOutputFormatClass(TextOutputFormat.class); } assertTrue(job.waitForCompletion(true)); }
job.setInputFormatClass(NoSplitTextInputFormat.class); job.setNumReduceTasks(numReducers); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); job.setMapperClass(XMLParserMapper.class); job.setReducerClass(GenerateDNBlockInfosReducer.class);
/** * Set up the MapReduce job to output a schema (TBox). */ protected void configureSchemaOutput() { Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration()); SequenceFileOutputFormat.setOutputPath(job, outPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SchemaWritable.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, "schemaobj", SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException { FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance()); int numberOfReducers = reducerMapping.getTotalReducerNum(); logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers); if (numberOfReducers > 250) { throw new IllegalArgumentException( "The max reducer number for FactDistinctColumnsJob is 250, but now it is " + numberOfReducers + ", decrease 'kylin.engine.mr.uhc-reducer-count'"); } job.setReducerClass(FactDistinctColumnsReducer.class); job.setPartitionerClass(FactDistinctColumnPartitioner.class); job.setNumReduceTasks(numberOfReducers); // make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception { Job job = Job.getInstance(conf, "Test-Lazy-Output"); FileInputFormat.setInputPaths(job, INPUT); FileOutputFormat.setOutputPath(job, output); job.setJarByClass(TestMapReduceLazyOutput.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(numReducers); job.setMapperClass(TestMapper.class); job.setReducerClass(TestReducer.class); if (createLazily) { LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); } else { job.setOutputFormatClass(TextOutputFormat.class); } assertTrue(job.waitForCompletion(true)); }
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, output);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, output);
/** * Set up a MapReduce job to output human-readable text. */ protected void configureTextOutput(String destination) { Path outPath; outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination); TextOutputFormat.setOutputPath(job, outPath); LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT, TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT, TextOutputFormat.class, Text.class, Text.class); MultipleOutputs.setCountersEnabled(job, true); }
.get(PigConfiguration.PIG_OUTPUT_LAZY))) { Job storeJob = new Job(jobConf); LazyOutputFormat.setOutputFormatClass(storeJob, PigOutputFormat.class); jobConf = (JobConf) storeJob.getConfiguration();
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT, SequenceFileOutputFormat.class, Fact.class, NullWritable.class);