org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.setPartitionFile java code examples

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException {
  Configuration conf = job.getConfiguration();
  // create the partitions file
  FileSystem fs = FileSystem.get(conf);
  Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + RandomUtil.randomUUID());
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, splitPoints);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
    writeMultipleTables)
  throws IOException {
 Configuration conf = job.getConfiguration();
 // create the partitions file
 FileSystem fs = FileSystem.get(conf);
 String hbaseTmpFsDir =
   conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
     fs.getHomeDirectory() + "/hbase-staging");
 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
 fs.makeQualified(partitionsPath);
 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
 fs.deleteOnExit(partitionsPath);
 // configure job to use it
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

/**
 * Check if there's partition files for hfile, if yes replace the table splits, to make the job more reducers
 * @param conf the job configuration
 * @param path the hfile partition file
 * @throws IOException
 */
@SuppressWarnings("deprecation")
private void reconfigurePartitions(Configuration conf, Path path) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  if (fs.exists(path)) {
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf)) {
      int partitionCount = 0;
      Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
      Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
      while (reader.next(key, value)) {
        partitionCount++;
      }
      TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), path);
      // The reduce tasks should be one more than partition keys
      job.setNumReduceTasks(partitionCount + 1);
    }
  } else {
    logger.info("File '" + path.toString() + " doesn't exist, will not reconfigure hfile Partitions");
  }
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys)
    throws IOException {
  
  Configuration conf = job.getConfiguration();
  // create the partitions file
  Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID());
  FileSystem fs = partitionsPath.getFileSystem(conf);
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, tablesStartKeys);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);

/**
 * Set the path to the SequenceFile storing the sorted partition keyset.
 * It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
 * keys in the SequenceFile.
 * @deprecated Use 
 * {@link #setPartitionFile(Configuration, Path)}
 * instead
 */
@Deprecated
public static void setPartitionFile(JobConf job, Path p) {
 org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.
     setPartitionFile(job, p);
}

/**
 * Set the path to the SequenceFile storing the sorted partition keyset.
 * It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
 * keys in the SequenceFile.
 * @deprecated Use 
 * {@link #setPartitionFile(Configuration, Path)}
 * instead
 */
@Deprecated
public static void setPartitionFile(JobConf job, Path p) {
 org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.
     setPartitionFile(job, p);
}

/**
 * Set the path to the SequenceFile storing the sorted partition keyset.
 * It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
 * keys in the SequenceFile.
 * @deprecated Use 
 * {@link #setPartitionFile(Configuration, Path)}
 * instead
 */
@Deprecated
public static void setPartitionFile(JobConf job, Path p) {
 org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.
     setPartitionFile(job, p);
}

/**
 * Set the path to the SequenceFile storing the sorted partition keyset.
 * It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
 * keys in the SequenceFile.
 * @deprecated Use 
 * {@link #setPartitionFile(Configuration, Path)}
 * instead
 */
@Deprecated
public static void setPartitionFile(JobConf job, Path p) {
 org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.
     setPartitionFile(job, p);
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
    writeMultipleTables)
  throws IOException {
 Configuration conf = job.getConfiguration();
 // create the partitions file
 FileSystem fs = FileSystem.get(conf);
 String hbaseTmpFsDir =
   conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
    HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
 fs.makeQualified(partitionsPath);
 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
 fs.deleteOnExit(partitionsPath);
 // configure job to use it
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys)
    throws IOException {
  
  Configuration conf = job.getConfiguration();
  // create the partitions file
  Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID());
  FileSystem fs = partitionsPath.getFileSystem(conf);
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, tablesStartKeys);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys)
    throws IOException {
  
  Configuration conf = job.getConfiguration();
  // create the partitions file
  Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID());
  FileSystem fs = partitionsPath.getFileSystem(conf);
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, tablesStartKeys);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

private int createParitionFile(String inputPath, String outputFile, float frequency, int samplesCnt) throws IOException, ClassNotFoundException, InterruptedException {
  Path input = new Path(inputPath);
  Job sampler = new Job(getConf());
  
  TextInputFormat.addInputPath(sampler, input);
  InputSampler.Sampler<LongWritable, Text> inputSampler =
      new InputSampler.RandomSampler<LongWritable, Text>(frequency, samplesCnt);
  Path partitionFile = new Path(outputFile);
  TotalOrderPartitioner.setPartitionFile(sampler.getConfiguration(), partitionFile);
  InputSampler.writePartitionFile(sampler, inputSampler);
  return 0;
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean
    writeMultipleTables)
  throws IOException {
 Configuration conf = job.getConfiguration();
 // create the partitions file
 FileSystem fs = FileSystem.get(conf);
 String hbaseTmpFsDir =
   conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
    HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
 fs.makeQualified(partitionsPath);
 writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables);
 fs.deleteOnExit(partitionsPath);
 // configure job to use it
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

private int createParitionFile(String sequenceFileInput, int regionCnt) throws IOException, ClassNotFoundException, InterruptedException {
  Path input = new Path(sequenceFileInput);
  Job sampler = new Job(getConf());
  sampler.setNumReduceTasks(regionCnt);
  sampler.setInputFormatClass(SequenceFileInputFormat.class);
  sampler.setOutputFormatClass(SequenceFileOutputFormat.class);
  sampler.setOutputKeyClass(BytesWritable.class);
  SequenceFileInputFormat.addInputPath(sampler, input);
  Configuration config = sampler.getConfiguration();
  InputSampler.Sampler<BytesWritable, NullWritable> inputSampler =
      new InputSampler.RandomSampler<BytesWritable, NullWritable>(
      config.getFloat(SAMPLER_FREQUENCY_KEY, SAMPLER_FREQUENCY_DEFAULT_VALUE),
      config.getInt(SAMPLER_NUM_SAMPLES_KEY, SAMPLER_NUM_SAMPLES_DEFAULT_VALUE));
  Path partitionFile = new Path(config.get(OUTPUT_KEYS_FILE_NAME_KEY, OUTPUT_KEYS_FILE_NAME_DEFAULT_VALUE));
  TotalOrderPartitioner.setPartitionFile(config, partitionFile);
  InputSampler.<BytesWritable, NullWritable>writePartitionFile(sampler, inputSampler);
  return 0;
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
  throws IOException {
 Configuration conf = job.getConfiguration();
 // create the partitions file
 FileSystem fs = FileSystem.get(conf);
 String hbaseTmpFsDir =
   conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
    HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
 Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
 fs.makeQualified(partitionsPath);
 writePartitions(conf, partitionsPath, splitPoints);
 fs.deleteOnExit(partitionsPath);
 // configure job to use it
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException {
  Configuration conf = job.getConfiguration();
  // create the partitions file
  FileSystem fs = FileSystem.get(conf);
  Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + RandomUtil.randomUUID());
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, splitPoints);
  fs.deleteOnExit(partitionsPath);
  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

Javadoc

Set the path to the SequenceFile storing the sorted partition keyset. It must be the case that for R reduces, there are R-1 keys in the SequenceFile.

Popular methods of TotalOrderPartitioner

getPartitionFile
Get the path to the SequenceFile storing the sorted partition keyset.
setConf
Read in the partition file and build indexing data structures. If the keytype is org.apache.hadoop.i
LeafTrieNodeFactory
buildTrie
Given a sorted set of cut points, build a trie that will find the correct partition quickly.
buildTrieRec
This is the core of buildTrie. The interface, and stub, above, just adds an empty CarriedTrieNodeRef
readPartitions
Read the cut points from the given IFile.
<init>
getPartition

Popular in Java

Creating JSON documents from java classes using gson
getResourceAsStream (ClassLoader)
putExtra (Intent)
onRequestPermissionsResult (Fragment)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
String (java.lang)
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
From CI to AI: The AI layer in your organization

How to use setPartitionFilemethodin org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner

Best Java code snippets using org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner.setPartitionFile (Showing top 20 results out of 315)

How to use
setPartitionFile
method
in
org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner