public static void useSortableBytesAsMapOutputKey(Job job, Class<? extends Partitioner> partitionerClass) { job.setMapOutputKeyClass(BytesWritable.class); job.setGroupingComparatorClass(SortableBytesGroupingComparator.class); job.setSortComparatorClass(SortableBytesSortingComparator.class); job.setPartitionerClass(partitionerClass); }
private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(UHCDictionaryReducer.class); job.setPartitionerClass(UHCDictionaryPartitioner.class); job.setNumReduceTasks(numberOfReducers); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against * <code>splitPoints</code>. Cleans up the partitions file after job exists. */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints) throws IOException { Configuration conf = job.getConfiguration(); // create the partitions file FileSystem fs = FileSystem.get(conf); Path partitionsPath = new Path(conf.get("hbase.fs.tmp.dir"), "partitions_" + RandomUtil.randomUUID()); fs.makeQualified(partitionsPath); writePartitions(conf, partitionsPath, splitPoints); fs.deleteOnExit(partitionsPath); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); }
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against * <code>splitPoints</code>. Cleans up the partitions file after job exists. */ static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints, boolean writeMultipleTables) throws IOException { Configuration conf = job.getConfiguration(); // create the partitions file FileSystem fs = FileSystem.get(conf); String hbaseTmpFsDir = conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY, fs.getHomeDirectory() + "/hbase-staging"); Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID()); fs.makeQualified(partitionsPath); writePartitions(conf, partitionsPath, splitPoints, writeMultipleTables); fs.deleteOnExit(partitionsPath); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); }
job.setOutputValueClass(Writable.class); if (partitioner == HRegionPartitioner.class) { job.setPartitionerClass(HRegionPartitioner.class); int regions = MetaTableAccessor.getRegionCount(conf, TableName.valueOf(table)); if (job.getNumReduceTasks() > regions) { job.setPartitionerClass(partitioner);
private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException { FactDistinctColumnsReducerMapping reducerMapping = new FactDistinctColumnsReducerMapping(cubeSeg.getCubeInstance()); int numberOfReducers = reducerMapping.getTotalReducerNum(); logger.info("{} has reducers {}.", this.getClass().getName(), numberOfReducers); if (numberOfReducers > 250) { throw new IllegalArgumentException( "The max reducer number for FactDistinctColumnsJob is 250, but now it is " + numberOfReducers + ", decrease 'kylin.engine.mr.uhc-reducer-count'"); } job.setReducerClass(FactDistinctColumnsReducer.class); job.setPartitionerClass(FactDistinctColumnPartitioner.class); job.setNumReduceTasks(numberOfReducers); // make each reducer output to respective dir MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_COLUMN, SequenceFileOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_STATISTICS, SequenceFileOutputFormat.class, LongWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_PARTITION, TextOutputFormat.class, NullWritable.class, LongWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); // prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
FileSystem fs = FileSystem.get(job.getConfiguration()); fs.deleteOnExit(partitionsPath); job.setPartitionerClass(CellWritableComparablePartitioner.class); job.setNumReduceTasks(regionLocator.getStartKeys().length); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
job.setPartitionerClass(TeraSort.SimplePartitioner.class); else { long start = System.currentTimeMillis(); "Partition file added to distributed cache: " + partUri); job.setPartitionerClass(getTeraSortTotalOrderPartitioner()/*TeraSort.TotalOrderPartitioner.class*/);
Arrays.fill(endKey, (byte)0xff); job.setPartitionerClass(SimpleTotalOrderPartitioner.class);
FileSystem fs = FileSystem.get(conf); verify(job).setPartitionerClass(TotalOrderPartitioner.class);
groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setPartitionerClass(DetermineHashedPartitionsPartitioner.class); if (!config.getSegmentGranularIntervals().isPresent()) { groupByJob.setNumReduceTasks(1);
job.setJobName(jobName); job.setPartitionerClass(NaturalKeyPartitioner.class); job.setGroupingComparatorClass(NaturalKeyGroupingComparator.class); job.setSortComparatorClass(CompositeKeyComparator.class);
job.setPartitionerClass(CustomV2Partitioner.class); job.setInputFormatClass(CustomV2InputFormat.class); job.setOutputFormatClass(CustomV2OutputFormat.class);
private void setupMapper(String intermediateTable) throws IOException { String[] dbTableNames = HadoopUtil.parseHiveTableName(intermediateTable); HCatInputFormat.setInput(job, dbTableNames[0], dbTableNames[1]); job.setInputFormatClass(HCatInputFormat.class); job.setMapperClass(InvertedIndexMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setPartitionerClass(InvertedIndexPartitioner.class); }
public int run(String[] args) throws Exception { Job job = new Job(getConf(), "count-dns-bases"); job.setJarByClass(FastaCountBaseDriver.class); job.setMapperClass(FastaCountBaseMapper.class); job.setReducerClass(FastaCountBaseReducer.class); job.setNumReduceTasks(5); // worked //job.setCombinerClass(FastaCountBaseCombiner.class); job.setInputFormatClass(FastaInputFormat.class); job.setPartitionerClass(BasePartitioner.class); // worked job.setSortComparatorClass(BaseComparator.class); // worked job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean status = job.waitForCompletion(true); theLogger.info("run(): status="+status); return status ? 0 : 1; }
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against * <code>splitPoints</code>. Cleans up the partitions file after job exists. */ static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys) throws IOException { Configuration conf = job.getConfiguration(); // create the partitions file Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID()); FileSystem fs = partitionsPath.getFileSystem(conf); fs.makeQualified(partitionsPath); writePartitions(conf, partitionsPath, tablesStartKeys); fs.deleteOnExit(partitionsPath); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); }
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); job.setJarByClass(SecondarySortDriver.class); job.setJobName("SecondarySortDriver"); // args[0] = input directory // args[1] = output directory FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setOutputKeyClass(DateTemperaturePair.class); job.setOutputValueClass(Text.class); job.setMapperClass(SecondarySortMapper.class); job.setReducerClass(SecondarySortReducer.class); job.setPartitionerClass(DateTemperaturePartitioner.class); job.setGroupingComparatorClass(DateTemperatureGroupingComparator.class); boolean status = job.waitForCompletion(true); theLogger.info("run(): status="+status); return status ? 0 : 1; }
@Override public int run(String[] args) throws Exception { int neighborWindow = Integer.parseInt(args[0]); Path inputPath = new Path(args[1]); Path outputPath = new Path(args[2]); Job job = new Job(new Configuration(), "RelativeFrequencyDriver"); job.setJarByClass(RelativeFrequencyDriver.class); job.setJobName("RelativeFrequencyDriver"); // Delete the output directory if it exists already FileSystem.get(getConf()).delete(outputPath, true); job.getConfiguration().setInt("neighbor.window", neighborWindow); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); // (key,value) generated by map() job.setMapOutputKeyClass(PairOfWords.class); job.setMapOutputValueClass(IntWritable.class); // (key,value) generated by reduce() job.setOutputKeyClass(PairOfWords.class); job.setOutputValueClass(DoubleWritable.class); job.setMapperClass(RelativeFrequencyMapper.class); job.setReducerClass(RelativeFrequencyReducer.class); job.setCombinerClass(RelativeFrequencyCombiner.class); job.setPartitionerClass(OrderInversionPartitioner.class); job.setNumReduceTasks(3); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); THE_LOGGER.info("Job Finished in milliseconds: " + (System.currentTimeMillis() - startTime)); return 0; }
job.setPartitionerClass(SecondarySortPartitioner.class);