/** * Runs the job to invert the email domain popularity map and sort by popularity. * * @param inputPath The map from email domains to their popularity. * @param outputPath The output path for the sorted map of popularity to email domains. * @param numSplits The number of output file shards to write. * @param conf Base Hadoop configuration. * @return Whether the job was successful. * @throws Exception If there is an exception. */ private boolean invertAndSortByPopularity( Path inputPath, Path outputPath, int numSplits, Configuration conf) throws Exception { LOG.info("Configuring a fiji mapreduce job..."); FijiMapReduceJobBuilder jobBuilder = FijiMapReduceJobBuilder.create() .withConf(conf) .withInput(MapReduceJobInputs.newSequenceFileMapReduceJobInput(inputPath)) .withMapper(InvertCountMapper.class) .withReducer(TextListReducer.class) .withOutput(MapReduceJobOutputs.newAvroKeyValueMapReduceJobOutput(outputPath, numSplits)); LOG.info("Building the transform job..."); FijiMapReduceJob job = jobBuilder.build(); // Configure the job to sort by decreasing key, so the most popular email domain is first. job.getHadoopJob().setSortComparatorClass(DescendingIntWritableComparator.class); LOG.info("Running the transform job..."); return job.run(); }
@Override protected void configure(FijiMapReduceJobBuilder jobBuilder) throws ClassNotFoundException, IOException { // Configure lib jars and KV stores: super.configure(jobBuilder); jobBuilder .withConf(getConf()) .withInput(MapReduceJobInputFactory.create().fromSpaceSeparatedMap(mInputFlag)) .withOutput(MapReduceJobOutputFactory.create().fromSpaceSeparatedMap(mOutputFlag)) .withMapper(FijiMappers.forName(mMapperName)); if (!mCombinerName.isEmpty()) { jobBuilder.withCombiner(FijiReducers.forName(mCombinerName)); } if (!mReducerName.isEmpty()) { jobBuilder.withReducer(FijiReducers.forName(mReducerName)); } }