/** * Configures the Hadoop MapReduce job. * * @return Instance of the Hadoop MapRed job. * @throws IOException If failed. */ @SuppressWarnings("deprecation") private Job createConfigBasedHadoopJob() throws IOException { Job jobCfg = new Job(); Configuration cfg = jobCfg.getConfiguration(); // Use explicit configuration of distributed file system, if provided. cfg.addResource(U.resolveIgniteUrl(DFS_CFG)); jobCfg.setJobName("HadoopPopularWordExample"); jobCfg.setJarByClass(HadoopPopularWords.class); jobCfg.setInputFormatClass(TextInputFormat.class); jobCfg.setOutputKeyClass(Text.class); jobCfg.setOutputValueClass(IntWritable.class); jobCfg.setMapperClass(TokenizingMapper.class); jobCfg.setReducerClass(TopNWordsReducer.class); FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR); FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR); // Local job tracker allows the only task per wave, but text input format // replaces it with the calculated value based on input split size option. if ("local".equals(cfg.get("mapred.job.tracker", "local"))) { // Split job into tasks using 32MB split size. FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024); FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE); } return jobCfg; }
public static Job createJob() throws IOException { final Configuration conf = new Configuration(); final Job baseJob = Job.getInstance(new Cluster(conf), conf); baseJob.setOutputKeyClass(Text.class); baseJob.setOutputValueClass(IntWritable.class); baseJob.setMapperClass(NewMapTokenizer.class); baseJob.setCombinerClass(NewSummer.class); baseJob.setReducerClass(NewSummer.class); baseJob.setNumReduceTasks(1); baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1); baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50"); baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize( baseJob, Long.MAX_VALUE); return baseJob; }
@Override public List<BoundedDataSource<Pair<K, V>>> split(long desiredSplitSizeBytes) { final Job job = newJob(); long splitSize = Math.max(MIN_SPLIT_SIZE, desiredSplitSizeBytes); LOG.info(String.format("%s's max and min input split size will be set to %,d .", FileInputFormat.class.getSimpleName(), desiredSplitSizeBytes)); FileInputFormat.setMinInputSplitSize(job, splitSize); FileInputFormat.setMaxInputSplitSize(job, splitSize); return doSplit(job); }
public int run(String[] args) throws Exception { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 1; } TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); job.setInputFormatClass(TeraInputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
FileInputFormat.setInputPaths(job, inputAPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputAPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputAPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputAPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
public static Job createJob() throws IOException { final Configuration conf = new Configuration(); final Job baseJob = Job.getInstance(conf); baseJob.setOutputKeyClass(Text.class); baseJob.setOutputValueClass(IntWritable.class); baseJob.setMapperClass(NewMapTokenizer.class); baseJob.setCombinerClass(NewSummer.class); baseJob.setReducerClass(NewSummer.class); baseJob.setNumReduceTasks(1); baseJob.getConfiguration().setInt(JobContext.IO_SORT_MB, 1); baseJob.getConfiguration().set(JobContext.MAP_SORT_SPILL_PERCENT, "0.50"); baseJob.getConfiguration().setInt(JobContext.MAP_COMBINE_MIN_SPILLS, 3); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setMinInputSplitSize( baseJob, Long.MAX_VALUE); return baseJob; }
FileInputFormat.setInputPaths(job, inputAPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputAPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputPaths); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
protected List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException, InstantiationException { Job job = jobInstance(); FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes); FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes); return createFormat(job).getSplits(job); }
private List<InputSplit> computeSplits(long desiredBundleSizeBytes, SerializableConfiguration serializableConfiguration) throws IOException, IllegalAccessException, InstantiationException { Job job = SerializableConfiguration.newJob(serializableConfiguration); FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes); FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes); return createFormat(job).getSplits(job); }
protected List<InputSplit> computeSplits(long desiredBundleSizeBytes) throws IOException, IllegalAccessException, InstantiationException { Job job = jobInstance(); FileInputFormat.setMinInputSplitSize(job, desiredBundleSizeBytes); FileInputFormat.setMaxInputSplitSize(job, desiredBundleSizeBytes); return createFormat(job).getSplits(job); }
FileInputFormat.setInputPaths(job, inputPathA); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputPathA); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);
FileInputFormat.setInputPaths(job, inputPathA); if (minSplitSize > 0) { FileInputFormat.setMinInputSplitSize(job, minSplitSize);