org.apache.hadoop.mapreduce.Job.setPartitionerClass java code examples

public static void useSortableBytesAsMapOutputKey(Job job, Class<? extends Partitioner> partitionerClass)
{
 job.setMapOutputKeyClass(BytesWritable.class);
 job.setGroupingComparatorClass(SortableBytesGroupingComparator.class);
 job.setSortComparatorClass(SortableBytesSortingComparator.class);
 job.setPartitionerClass(partitionerClass);
}

private void setupReducer(Path output, int numberOfReducers) throws IOException {
  job.setReducerClass(UHCDictionaryReducer.class);
  job.setPartitionerClass(UHCDictionaryPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  //prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException {
  Configuration conf = job.getConfiguration();
  // create the partitions file
  FileSystem fs = FileSystem.get(conf);
  Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + RandomUtil.randomUUID());
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, splitPoints);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
    writeMultipleTables)
  throws IOException {
 Configuration conf = job.getConfiguration();
 // create the partitions file
 FileSystem fs = FileSystem.get(conf);
 String hbaseTmpFsDir =
   conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
     fs.getHomeDirectory() + "/hbase-staging");
 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
 fs.makeQualified(partitionsPath);
 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
 fs.deleteOnExit(partitionsPath);
 // configure job to use it
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

job.setOutputValueClass(Writable.class);
if (partitioner == HRegionPartitioner.class) {
 job.setPartitionerClass(HRegionPartitioner.class);
 int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table));
 if (job.getNumReduceTasks() > regions) {
 job.setPartitionerClass(partitioner);

private void setupReducer(Path output, CubeSegment cubeSeg)
    throws IOException {
  FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance());
  int numberOfReducers = reducerMapping.getTotalReducerNum();
  logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers);
  if (numberOfReducers > 250) {
    throw new IllegalArgumentException(
        "The max reducer number for FactDistinctColumnsJob is 250, but now it is "
            + numberOfReducers
            + ", decrease 'kylin.engine.mr.uhc-reducer-count'");
  }
  job.setReducerClass(FactDistinctColumnsReducer.class);
  job.setPartitionerClass(FactDistinctColumnPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  // make each reducer output to respective dir
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  // prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

FileSystem fs = FileSystem.get(job.getConfiguration());
fs.deleteOnExit(partitionsPath);
job.setPartitionerClass(CellWritableComparablePartitioner.class);
job.setNumReduceTasks(regionLocator.getStartKeys().length);
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),

  job.setPartitionerClass(TeraSort.SimplePartitioner.class);
else {
  long start = System.currentTimeMillis();
    "Partition file added to distributed cache: " + partUri);
  job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/);

Arrays.fill(endKey, (byte)0xff);
job.setPartitionerClass(SimpleTotalOrderPartitioner.class);

FileSystem fs = FileSystem.get(conf);
verify(job).setPartitionerClass(TotalOrderPartitioner.class);

groupByJob.setOutputValueClass(NullWritable.class);
groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class);
if (!config.getSegmentGranularIntervals().isPresent()) {
 groupByJob.setNumReduceTasks(1);

job.setJobName(jobName);
job.setPartitionerClass(NaturalKeyPartitioner.class);
job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class);
job.setSortComparatorClass(CompositeKeyComparator.class);

job.setPartitionerClass(CustomV2Partitioner.class);
job.setInputFormatClass(CustomV2InputFormat.class);
job.setOutputFormatClass(CustomV2OutputFormat.class);

private void setupMapper(String intermediateTable) throws IOException {
  String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable);
  HCatInputFormat.setInput(job, dbTableNames[0],
      dbTableNames[1]);
  
  job.setInputFormatClass(HCatInputFormat.class);
  job.setMapperClass(InvertedIndexMapper.class);
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(ImmutableBytesWritable.class);
  job.setPartitionerClass(InvertedIndexPartitioner.class);
}

public int run(String[] args) throws Exception {	
  Job job = new Job(getConf(), "count-dns-bases");
  job.setJarByClass(FastaCountBaseDriver.class);	
  job.setMapperClass(FastaCountBaseMapper.class);
  job.setReducerClass(FastaCountBaseReducer.class);
  job.setNumReduceTasks(5); // worked
  //job.setCombinerClass(FastaCountBaseCombiner.class);
  job.setInputFormatClass(FastaInputFormat.class);
  job.setPartitionerClass(BasePartitioner.class); // worked
  job.setSortComparatorClass(BaseComparator.class); // worked
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(LongWritable.class);
  FileInputFormat.addInputPath(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  boolean status = job.waitForCompletion(true);
  theLogger.info("run(): status="+status);
  return status ? 0 : 1;
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys)
    throws IOException {
  
  Configuration conf = job.getConfiguration();
  // create the partitions file
  Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID());
  FileSystem fs = partitionsPath.getFileSystem(conf);
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, tablesStartKeys);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

  @Override
public int run(String[] args) throws Exception {
  Configuration conf = getConf();
  Job job = new Job(conf);
  job.setJarByClass(SecondarySortDriver.class);
  job.setJobName("SecondarySortDriver");
  
    // args[0] = input directory
    // args[1] = output directory
  FileInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setOutputKeyClass(DateTemperaturePair.class);
  job.setOutputValueClass(Text.class);
  
    job.setMapperClass(SecondarySortMapper.class);
    job.setReducerClass(SecondarySortReducer.class);		
    job.setPartitionerClass(DateTemperaturePartitioner.class);
    job.setGroupingComparatorClass(DateTemperatureGroupingComparator.class);
  boolean status = job.waitForCompletion(true);
  theLogger.info("run(): status="+status);
  return status ? 0 : 1;
}

@Override
public int run(String[] args) throws Exception {
  int neighborWindow = Integer.parseInt(args[0]);
  Path inputPath = new Path(args[1]);
  Path outputPath = new Path(args[2]);
  Job job = new Job(new Configuration(), "RelativeFrequencyDriver");
  job.setJarByClass(RelativeFrequencyDriver.class);
  job.setJobName("RelativeFrequencyDriver");
  // Delete the output directory if it exists already
  FileSystem.get(getConf()).delete(outputPath, true);
  job.getConfiguration().setInt("neighbor.window", neighborWindow);
  FileInputFormat.setInputPaths(job, inputPath);
  FileOutputFormat.setOutputPath(job, outputPath);
  // (key,value) generated by map()
  job.setMapOutputKeyClass(PairOfWords.class);
  job.setMapOutputValueClass(IntWritable.class);
  // (key,value) generated by reduce()
  job.setOutputKeyClass(PairOfWords.class);
  job.setOutputValueClass(DoubleWritable.class);
  job.setMapperClass(RelativeFrequencyMapper.class);
  job.setReducerClass(RelativeFrequencyReducer.class);
  job.setCombinerClass(RelativeFrequencyCombiner.class);
  job.setPartitionerClass(OrderInversionPartitioner.class);
  job.setNumReduceTasks(3);
  long startTime = System.currentTimeMillis();
  job.waitForCompletion(true);
  THE_LOGGER.info("Job Finished in milliseconds: " + (System.currentTimeMillis() - startTime));
  return 0;
}

job.setPartitionerClass(SecondarySortPartitioner.class);

Javadoc

Set the Partitioner for the job.

Popular methods of Job

getConfiguration
setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl

Popular in Java

Start an intent from android
getSupportFragmentManager (FragmentActivity)
getSystemService (Context)
getApplicationContext (Context)
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
BoxLayout (javax.swing)
Best plugins for Eclipse

How to use setPartitionerClassmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.setPartitionerClass (Showing top 20 results out of 585)

How to use
setPartitionerClass
method
in
org.apache.hadoop.mapreduce.Job