org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat.setOutputFormatClass java code examples

private void setupReducer(Path output, int numberOfReducers) throws IOException {
  job.setReducerClass(UHCDictionaryReducer.class);
  job.setPartitionerClass(UHCDictionaryPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  //prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

private void setupReducer(Path output, CubeSegment cubeSeg)
    throws IOException {
  FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
  int numberOfReducers = reducerMapping.getTotalReducerNum();
  logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
  if (numberOfReducers > 250) {
    throw new IllegalArgumentException(
        "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
            + numberOfReducers
            + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
  }
  job.setReducerClass(FactDistinctColumnsReducer.class);
  job.setPartitionerClass(FactDistinctColumnPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  // make each reducer output to respective dir
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  // prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, output);

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
FileOutputFormat.setOutputPath(job, output);

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, output);

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

private void setOutputFormat(org.apache.hadoop.mapreduce.Job job) {
  // the OutputFormat we report to Hadoop is always PigOutputFormat which
  // can be wrapped with LazyOutputFormat provided if PigConfiguration.PIG_OUTPUT_LAZY is set
  if ("true".equalsIgnoreCase(job.getConfiguration().get(PigConfiguration.PIG_OUTPUT_LAZY))) {
    LazyOutputFormat.setOutputFormatClass(job,PigOutputFormatTez.class);
  } else {
    job.setOutputFormatClass(PigOutputFormatTez.class);
  }
}

public static void setOutputFormat(org.apache.hadoop.mapreduce.Job job) {
  // the OutputFormat we report to Hadoop is always PigOutputFormat which
  // can be wrapped with LazyOutputFormat provided if PigConfiguration.PIG_OUTPUT_LAZY is set
  if ("true".equalsIgnoreCase(job.getConfiguration().get(PigConfiguration.PIG_OUTPUT_LAZY))) {
    LazyOutputFormat.setOutputFormatClass(job,PigOutputFormat.class);
  } else {
    job.setOutputFormatClass(PigOutputFormat.class);
  }
}

private void setupReducer(Path output, int numberOfReducers) throws IOException {
  job.setReducerClass(UHCDictionaryReducer.class);
  job.setPartitionerClass(UHCDictionaryPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  //prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

private static void runTestLazyOutput(Configuration conf, Path output,
  int numReducers, boolean createLazily) 
throws Exception {
 Job job = Job.getInstance(conf, "Test-Lazy-Output");
 FileInputFormat.setInputPaths(job, INPUT);
 FileOutputFormat.setOutputPath(job, output);
 job.setJarByClass(TestMapReduceLazyOutput.class);
 job.setInputFormatClass(TextInputFormat.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setNumReduceTasks(numReducers);
 job.setMapperClass(TestMapper.class);
 job.setReducerClass(TestReducer.class);
 if (createLazily) {
  LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
 } else {
  job.setOutputFormatClass(TextOutputFormat.class);
 }
 assertTrue(job.waitForCompletion(true));
}

job.setInputFormatClass(NoSplitTextInputFormat.class);
job.setNumReduceTasks(numReducers);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
job.setMapperClass(XMLParserMapper.class);
job.setReducerClass(GenerateDNBlockInfosReducer.class);

/**
 * Set up the MapReduce job to output a schema (TBox).
 */
protected void configureSchemaOutput() {
  Path outPath = MRReasoningUtils.getSchemaPath(job.getConfiguration());
  SequenceFileOutputFormat.setOutputPath(job, outPath);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(SchemaWritable.class);
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  MultipleOutputs.addNamedOutput(job, "schemaobj",
    SequenceFileOutputFormat.class, NullWritable.class, SchemaWritable.class);
  MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
    TextOutputFormat.class, Text.class, Text.class);
  MultipleOutputs.setCountersEnabled(job, true);
}

private void setupReducer(Path output, CubeSegment cubeSeg)
    throws IOException {
  FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
  int numberOfReducers = reducerMapping.getTotalReducerNum();
  logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
  if (numberOfReducers > 250) {
    throw new IllegalArgumentException(
        "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
            + numberOfReducers
            + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
  }
  job.setReducerClass(FactDistinctColumnsReducer.class);
  job.setPartitionerClass(FactDistinctColumnPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  // make each reducer output to respective dir
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  // prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

private static void runTestLazyOutput(Configuration conf, Path output,
  int numReducers, boolean createLazily) 
throws Exception {
 Job job = Job.getInstance(conf, "Test-Lazy-Output");
 FileInputFormat.setInputPaths(job, INPUT);
 FileOutputFormat.setOutputPath(job, output);
 job.setJarByClass(TestMapReduceLazyOutput.class);
 job.setInputFormatClass(TextInputFormat.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setNumReduceTasks(numReducers);
 job.setMapperClass(TestMapper.class);
 job.setReducerClass(TestReducer.class);
 if (createLazily) {
  LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
 } else {
  job.setOutputFormatClass(TextOutputFormat.class);
 }
 assertTrue(job.waitForCompletion(true));
}

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, output);

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
FileOutputFormat.setOutputPath(job, output);

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, output);

/**
 * Set up a MapReduce job to output human-readable text.
 */
protected void configureTextOutput(String destination) {
  Path outPath;
  outPath = MRReasoningUtils.getOutputPath(job.getConfiguration(), destination);
  TextOutputFormat.setOutputPath(job, outPath);
  LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
  MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
    TextOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, MRReasoningUtils.TERMINAL_OUT,
    TextOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, MRReasoningUtils.SCHEMA_OUT,
    TextOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INCONSISTENT_OUT,
    TextOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, MRReasoningUtils.DEBUG_OUT,
    TextOutputFormat.class, Text.class, Text.class);
  MultipleOutputs.setCountersEnabled(job, true);
}

  .get(PigConfiguration.PIG_OUTPUT_LAZY))) {
Job storeJob = new Job(jobConf);
LazyOutputFormat.setOutputFormatClass(storeJob,
    PigOutputFormat.class);
jobConf = (JobConf) storeJob.getConfiguration();

LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
MultipleOutputs.addNamedOutput(job, MRReasoningUtils.INTERMEDIATE_OUT,
  SequenceFileOutputFormat.class, Fact.class, NullWritable.class);

Javadoc

Set the underlying output format for LazyOutputFormat.

Popular methods of LazyOutputFormat

getBaseOutputFormat

Popular in Java

Reading from database using SQL prepared statement
getSupportFragmentManager (FragmentActivity)
scheduleAtFixedRate (Timer)
setScale (BigDecimal)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
VirtualMachine (com.sun.tools.attach)
A Java virtual machine. A VirtualMachine represents a Java virtual machine to which this Java vir
JList (javax.swing)
Best plugins for Eclipse

How to use setOutputFormatClassmethodin org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat

Best Java code snippets using org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat.setOutputFormatClass (Showing top 20 results out of 315)

How to use
setOutputFormatClass
method
in
org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat