org.apache.hadoop.mapred.JobConf.setMapperClass java code examples

Refine search

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

  /**
   * Sets task classes with related info if needed into configuration object.
   *
   * @param jobConf Configuration to change.
   * @param setMapper Option to set mapper and input format classes.
   * @param setCombiner Option to set combiner class.
   * @param setReducer Option to set reducer and output format classes.
   */
  public static void setTasksClasses(JobConf jobConf, boolean setMapper, boolean setCombiner, boolean setReducer) {
    if (setMapper) {
      jobConf.setMapperClass(HadoopWordCount1Map.class);
      jobConf.setInputFormat(TextInputFormat.class);
    }

    if (setCombiner)
      jobConf.setCombinerClass(HadoopWordCount1Reduce.class);

    if (setReducer) {
      jobConf.setReducerClass(HadoopWordCount1Reduce.class);
      jobConf.setOutputFormat(TextOutputFormat.class);
    }
  }
}

@Test
public void testNonAvroMapper() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroMapper.class);
 // reducer is default, identity
 // configure output for avro
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 JobConf job = new JobConf(TEST_UTIL.getConfiguration());
 job.setJobName(jobName);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
 TableMapReduceUtil.addDependencyJars(job);
 job.setReducerClass(Reducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 RunningJob runningJob = JobClient.runJob(job);
 runningJob.waitForCompletion();
 assertTrue(runningJob.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

if(!isAvro) {
  conf.setPartitionerClass(HadoopStoreBuilderPartitioner.class);
  conf.setMapperClass(mapperClass);
  conf.setMapOutputKeyClass(BytesWritable.class);
  conf.setMapOutputValueClass(BytesWritable.class);
  conf.setReducerClass(HadoopStoreBuilderReducer.class);
conf.setInputFormat(inputFormatClass);
conf.setOutputFormat(SequenceFileOutputFormat.class);
conf.setOutputKeyClass(BytesWritable.class);
conf.setOutputValueClass(BytesWritable.class);
conf.setJarByClass(getClass());
conf.setReduceSpeculativeExecution(false);
FileInputFormat.setInputPaths(conf, inputPath);
conf.set("final.output.dir", outputDir.toString());
conf.set(VoldemortBuildAndPushJob.CHECKSUM_TYPE, CheckSum.toString(checkSumType));
conf.set("dfs.umaskmode", "002");
FileOutputFormat.setOutputPath(conf, tempDir);
  conf.setMapOutputValueClass(ByteBuffer.class);
  conf.setInputFormat(inputFormatClass);
  conf.setOutputKeyClass(ByteBuffer.class);
  conf.setOutputValueClass(ByteBuffer.class);
  conf.setReducerClass(AvroStoreBuilderReducer.class);

@Test
public void testNonAvroMapOnly() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroOnlyMapper.class);
 // configure output for avro
 job.setNumReduceTasks(0);                     // map-only
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

private JobConf createBaseJobConf(HiveConf conf, String jobName, Table t, StorageDescriptor sd,
                 ValidWriteIdList writeIds, CompactionInfo ci) {
 JobConf job = new JobConf(conf);
 job.setJobName(jobName);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 job.setJarByClass(CompactorMR.class);
 LOG.debug("User jar set to " + job.getJar());
 job.setMapperClass(CompactorMap.class);
 job.setNumReduceTasks(0);
 job.setInputFormat(CompactorInputFormat.class);
 job.setOutputFormat(NullOutputFormat.class);
 job.setOutputCommitter(CompactorOutputCommitter.class);

 @Test
  public void testJob() throws Exception {
  JobConf job = new JobConf();
  Path outputPath = new Path(DIR.getRoot().getPath() + "/out");
  outputPath.getFileSystem(job).delete(outputPath);

  job.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, DIR.getRoot().getPath() + "/in");

  job.setMapperClass(AvroTestConverter.class);
  job.setNumReduceTasks(0);

  FileOutputFormat.setOutputPath(job, outputPath);
  System.out.println(createSchema());
  AvroJob.setOutputSchema(job,
              Pair.getPairSchema(Schema.create(Schema.Type.LONG),
                        createSchema()));
  job.setOutputFormat(AvroOutputFormat.class);

  JobClient.runJob(job);
 }
}

private JobConf createBaseJobConf(HiveConf conf, String jobName, Table t, StorageDescriptor sd,
                 ValidTxnList txns, CompactionInfo ci) {
 JobConf job = new JobConf(conf);
 job.setJobName(jobName);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 job.setJarByClass(CompactorMR.class);
 LOG.debug("User jar set to " + job.getJar());
 job.setMapperClass(CompactorMap.class);
 job.setNumReduceTasks(0);
 job.setInputFormat(CompactorInputFormat.class);
 job.setOutputFormat(NullOutputFormat.class);
 job.setOutputCommitter(CompactorOutputCommitter.class);

HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);

job.setInputFormat(work.getInputformatClass());
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(MergeFileMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);

HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);

job.setInputFormat(work.getInputformatClass());
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(MergeFileMapper.class);
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(0);

job.setMapperClass(ExecMapper.class);
job.setReducerClass(ExecReducer.class);
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

job.setMapperClass(ExecMapper.class);
job.setReducerClass(ExecReducer.class);
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
 job.setInputFormat(JavaUtils.loadClass(inpFormat));
} catch (ClassNotFoundException e) {
 throw new RuntimeException(e.getMessage(), e);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);

private static void runIOTest( Class<? extends Mapper> mapperClass, 
                Path outputDir
                ) throws IOException {
 JobConf job = new JobConf(fsConfig, TestDFSIO.class);
 FileInputFormat.setInputPaths(job, CONTROL_DIR);
 job.setInputFormat(SequenceFileInputFormat.class);
 job.setMapperClass(mapperClass);
 job.setReducerClass(AccumulatingReducer.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(Text.class);
 job.setNumReduceTasks(1);
 JobClient.runJob(job);
}

protected JobConf configStage2 () throws Exception
{
  final JobConf conf = new JobConf(getConf(), ConCmptBlock.class);
  conf.set("block_width", "" + block_width);
  conf.setJobName("ConCmptBlock_pass2");
  
  conf.setMapperClass(MapStage2.class);        
  conf.setReducerClass(RedStage2.class);
  FileInputFormat.setInputPaths(conf, tempbm_path);  
  FileOutputFormat.setOutputPath(conf, nextbm_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

protected JobConf configStage1() throws Exception
{
  final JobConf conf = new JobConf(getConf(), ConCmptBlock.class);
  conf.set("block_width", "" + block_width);
  conf.set("recursive_diagmult", "" + recursive_diagmult);
  conf.setJobName("ConCmptBlock_pass1");
  conf.setMapperClass(MapStage1.class);
  conf.setReducerClass(RedStage1.class);
  FileInputFormat.setInputPaths(conf, edge_path, curbm_path);  
  FileOutputFormat.setOutputPath(conf, tempbm_path);  
  conf.setNumReduceTasks( nreducers );
  conf.setOutputKeyClass(IntWritable.class);
  conf.setOutputValueClass(Text.class);
  return conf;
}

  protected JobConf configStage1() throws Exception
  {
    final JobConf conf = new JobConf(getConf(), PagerankPrep.class);
    conf.set("make_symmetric", "" + make_symmetric);
    conf.setJobName("PagerankPrep_Stage1");

    conf.setMapperClass(MapStage1.class);
    conf.setReducerClass(RedStage1.class);

    FileInputFormat.setInputPaths(conf, edge_path);  
    FileOutputFormat.setOutputPath(conf, output_path);  

    conf.setNumReduceTasks( nreducers );

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
  }
}

Javadoc

Set the Mapper class for the job.

Popular methods of JobConf

<init>
A new map/reduce configuration where the behavior of reading from the default resources can be turne
set
get
setInputFormat
Set the InputFormat implementation for the map-reduce job.
setOutputFormat
Set the OutputFormat implementation for the map-reduce job.
getInt
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setReducerClass
Set the Reducer class for the job.
setNumReduceTasks
Set the requisite number of reduce tasks for this job.HOW MANY REDUCES? The right number of reduces
setBoolean
setJobName
Set the user-specified job name.

Popular in Java

Parsing JSON documents to java classes using gson
getSharedPreferences (Context)
getSystemService (Context)
getExternalFilesDir (Context)
SocketException (java.net)
This SocketException may be thrown during socket creation or setting options, and is the superclass
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
ImageIO (javax.imageio)
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
CodeWhisperer alternatives

How to use setMapperClassmethodin org.apache.hadoop.mapred.JobConf

Best Java code snippets using org.apache.hadoop.mapred.JobConf.setMapperClass (Showing top 20 results out of 756)

Refine search

How to use
setMapperClass
method
in
org.apache.hadoop.mapred.JobConf