org.apache.hadoop.mapreduce.Job.setReducerClass java code examples

Refine search

protected void configureReducer(Job job) throws IOException {
 job.setOutputFormatClass(AvroKeyCompactorOutputFormat.class);
 job.setReducerClass(AvroKeyDedupReducer.class);
 job.setOutputKeyClass(AvroKey.class);
 job.setOutputValueClass(NullWritable.class);
 setNumberOfReducers(job);
}

public static void main(String[] args) throws IOException,
   InterruptedException, ClassNotFoundException {
 Configuration conf = new Configuration();
 Job job = new Job(conf);
 job.setJobName("Convert Text");
 job.setJarByClass(Mapper.class);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 // increase if you need sorting or a special number of files
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.setOutputFormatClass(SequenceFileOutputFormat.class);
 job.setInputFormatClass(TextInputFormat.class);
 TextInputFormat.addInputPath(job, new Path("/lol"));
 SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz"));
 // submit and wait for completion
 job.waitForCompletion(true);
 }

  Job.getInstance(conf,
   conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime()));
job.setJarByClass(MapReduceHFileSplitterJob.class);
job.setInputFormatClass(HFileInputFormat.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
 LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
 TableName tableName = TableName.valueOf(tabName);
 job.setMapperClass(HFileCellMapper.class);
 job.setReducerClass(CellSortReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setMapOutputValueClass(MapReduceExtendedCell.class);

protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(c, jobName);
 initJob(scans, job);
 job.setReducerClass(ScanReducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 job.waitForCompletion(true);
 assertTrue(job.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

setupFileSystems(job.getConfiguration());
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
job.setInputFormatClass(InFormat.class);
FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "1"));

private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
  ClassNotFoundException {
 job = new Job(getConf());
 job.setJobName("Link Verifier");
 job.setNumReduceTasks(numReducers);
 job.setJarByClass(getClass());
 setJobScannerConf(job);
 Scan scan = new Scan();
 scan.addColumn(FAMILY_NAME, COLUMN_PREV);
 scan.setCaching(10000);
 scan.setCacheBlocks(false);
 String[] split = labels.split(COMMA);
 scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
   split[(this.labelIndex * 2) + 1]));
 TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
   BytesWritable.class, BytesWritable.class, job);
 TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.setReducerClass(VerifyReducer.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 TextOutputFormat.setOutputPath(job, outputDir);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
 Path outputDir = getTestDir(TEST_NAME, "verify-output");
 LOG.info("Verify output dir: " + outputDir);
 Job job = Job.getInstance(conf);
 job.setJarByClass(this.getClass());
 job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
 setJobScannerConf(job);
 Scan scan = new Scan();
 TableMapReduceUtil.initTableMapperJob(
   htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
   BytesWritable.class, BytesWritable.class, job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
 int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
 TableMapReduceUtil.setScannerCaching(job, scannerCaching);
 job.setReducerClass(VerifyReducer.class);
 job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
 FileOutputFormat.setOutputPath(job, outputDir);
 assertTrue(job.waitForCompletion(true));
 long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
 assertEquals(0, numOutputRecords);
}

Job job = new Job(conf, "testWritingPEData");
setupRandomGeneratorMapper(job, false);
job.setReducerClass(CellSortReducer.class);
job.setOutputFormatClass(HFileOutputFormat2.class);
job.setNumReduceTasks(4);
assertTrue(job.waitForCompletion(false));
FileStatus [] files = fs.listStatus(testDir);
assertTrue(files.length > 0);

  boolean outputCompression) {
if (setMapper) {
  job.setMapperClass(HadoopWordCount2Mapper.class);
  job.setInputFormatClass(TextInputFormat.class);
  job.setReducerClass(HadoopWordCount2Reducer.class);
  job.setOutputFormatClass(TextOutputFormat.class);

Job job = new Job(conf);
job.setMapperClass(Mapper.class);
job.setReducerClass(Reducer.class);
job.setJarByClass(Mapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/"));
Path out = new Path("files/out/processed/");
fs.delete(out, true);
job.waitForCompletion(true);

/**
 * Tests an MR Scan initialized from properties set in the Configuration.
 */
protected void testScanFromConfiguration(String start, String stop, String last)
  throws IOException, InterruptedException, ClassNotFoundException {
 String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") +
  "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
 Configuration c = new Configuration(TEST_UTIL.getConfiguration());
 c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString());
 c.set(TableInputFormat.SCAN_COLUMN_FAMILY,
  Bytes.toString(INPUT_FAMILYS[0]) + ", " + Bytes.toString(INPUT_FAMILYS[1]));
 c.set(KEY_STARTROW, start != null ? start : "");
 c.set(KEY_LASTROW, last != null ? last : "");
 if (start != null) {
  c.set(TableInputFormat.SCAN_ROW_START, start);
 }
 if (stop != null) {
  c.set(TableInputFormat.SCAN_ROW_STOP, stop);
 }
 Job job = Job.getInstance(c, jobName);
 job.setMapperClass(ScanMapper.class);
 job.setReducerClass(ScanReducer.class);
 job.setMapOutputKeyClass(ImmutableBytesWritable.class);
 job.setMapOutputValueClass(ImmutableBytesWritable.class);
 job.setInputFormatClass(TableInputFormat.class);
 job.setNumReduceTasks(1);
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 TableMapReduceUtil.addDependencyJars(job);
 assertTrue(job.waitForCompletion(true));
}

Job job =
  Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis()));
job.setJarByClass(WALPlayer.class);
 job.setMapperClass(WALKeyValueMapper.class);
 job.setReducerClass(CellSortReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setMapOutputValueClass(MapReduceExtendedCell.class);
} else {
 job.setMapperClass(WALMapper.class);
 job.setOutputFormatClass(MultiTableOutputFormat.class);
 TableMapReduceUtil.addDependencyJars(job);

setupFileSystems(job.getConfiguration());
job.setMapperClass(TestMapper.class);
job.setReducerClass(TestReducer.class);
job.setCombinerClass(TestCombiner.class);
job.setInputFormatClass(InFormat.class);
FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT + "2"));

  private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException {
    int hllShardBase = MapReduceUtil.getCuboidHLLCounterReducerNum(cubeSeg.getCubeInstance());

    job.setReducerClass(CalculateStatsFromBaseCuboidReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(hllShardBase);

    FileOutputFormat.setOutputPath(job, output);
    job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());

    deletePath(job.getConfiguration(), output);
  }
}

 Job job = new Job(util.getConfiguration());
 Scan scan = new Scan(startRow, endRow); // limit the scan
 job.setJarByClass(util.getClass());
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
     TestTableSnapshotInputFormat.class);
 job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
 job.setNumReduceTasks(1);
 job.setOutputFormatClass(NullOutputFormat.class);
 Assert.assertTrue(job.waitForCompletion(true));
} finally {
 if (!shutdownCluster) {

public static void main(String[] args) throws Exception {
 CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args);
 Configuration configuration = new Configuration();
 if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) {
  configuration.setBoolean(USE_THROTTLING_SERVER, true);
  String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest");
  configuration.set(RESOURCE_ID, resourceLimited);
  configuration.set(
    BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(),
      new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME),
      null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt()));
 }
 if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) {
  configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt()));
 }
 Job job = Job.getInstance(configuration, "ThrottlingStressTest");
 job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS));
 StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli);
 job.setJarByClass(MRStressTest.class);
 job.setMapperClass(StresserMapper.class);
 job.setReducerClass(AggregatorReducer.class);
 job.setInputFormatClass(MyInputFormat.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(DoubleWritable.class);
 FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis()));
 System.exit(job.waitForCompletion(true) ? 0 : 1);
}

public void checkOutputFormat() throws Exception {
 Job job = new Job();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyTest", "part-r-00000");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputKeySchema(job, STRING);
 AvroJob.setOutputKeySchema(job, Pair.getPairSchema(STRING,LONG));
 job.setMapperClass(WordCountMapper.class);
 job.setReducerClass(WordCountReducer.class);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(LongWritable.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setInputFormatClass(AvroKeyInputFormat.class);
 job.setOutputFormatClass(AvroTrevniKeyOutputFormat.class);
 job.waitForCompletion(true);
 wordCountUtil.validateCountsFile();
}

  RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
 HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
 job.setMapperClass(CellSortImporter.class);
 job.setReducerClass(CellReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setMapOutputKeyClass(CellWritableComparable.class);
   RawComparator.class);
 Path partitionsPath =
   new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
 FileSystem fs = FileSystem.get(job.getConfiguration());
 fs.deleteOnExit(partitionsPath);
job.setMapperClass(CellImporter.class);
try (Connection conn = ConnectionFactory.createConnection(conf);
  Table table = conn.getTable(tableName);
  RegionLocator regionLocator = conn.getRegionLocator(tableName)){
 job.setReducerClass(CellSortReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
job.setMapperClass(Importer.class);
TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
job.setNumReduceTasks(0);

static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor, RegionLocator regionLocator,
    Class<? extends OutputFormat<?, ?>> cls) throws IOException, UnsupportedEncodingException {
  Configuration conf = job.getConfiguration();
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(KeyValue.class);
  job.setOutputFormatClass(cls);
    job.setReducerClass(KeyValueSortReducer.class);
  } else if (Put.class.equals(job.getMapOutputValueClass())) {
    job.setReducerClass(PutSortReducer.class);
  } else if (Text.class.equals(job.getMapOutputValueClass())) {
    job.setReducerClass(TextSortReducer.class);
  } else {
    LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());

/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
  throws IOException {
 String tableName = args[0];
 Path outputDir = new Path(args[1]);
 String reportSeparatorString = (args.length > 2) ? args[2]: ":";
 conf.set("ReportSeparator", reportSeparatorString);
 Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
 job.setJarByClass(CellCounter.class);
 Scan scan = getConfiguredScanForJob(conf, args);
 TableMapReduceUtil.initTableMapperJob(tableName, scan,
   CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job);
 job.setNumReduceTasks(1);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(IntWritable.class);
 job.setOutputFormatClass(TextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setReducerClass(IntSumReducer.class);
 return job;
}

Javadoc

Set the Reducer for the job.

Popular methods of Job

getConfiguration
setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl
<init>

Popular in Java

Making http post requests using okhttp
onRequestPermissionsResult (Fragment)
getExternalFilesDir (Context)
setRequestProperty (URLConnection)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
AtomicInteger (java.util.concurrent.atomic)
An int value that may be updated atomically. See the java.util.concurrent.atomic package specificati
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Best plugins for Eclipse

How to use setReducerClassmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.setReducerClass (Showing top 20 results out of 1,719)

Refine search

How to use
setReducerClass
method
in
org.apache.hadoop.mapreduce.Job