FileInputFormat.addInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath);
@Override public Job addInputPaths(HadoopDruidIndexerConfig config, Job job) throws IOException { log.info("Adding paths[%s]", paths); FileInputFormat.addInputPaths(job, paths); return job; } }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "duplicate remover MR"; job.setJobName(jobName); job.setJarByClass(DuplicateRemover.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(DuplicateRemover.DuplicateMapper.class); job.setReducerClass(DuplicateRemover.DuplicateReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("dur.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
private void splitRealFiles(String[] args) throws IOException { Configuration conf = new Configuration(); Job job = Job.getInstance(); FileSystem fs = FileSystem.get(conf); if (!(fs instanceof DistributedFileSystem)) { throw new IOException("Wrong file system: " + fs.getClass().getName()); } int blockSize = conf.getInt("dfs.block.size", 128 * 1024 * 1024); DummyInputFormat inFormat = new DummyInputFormat(); for (int i = 0; i < args.length; i++) { FileInputFormat.addInputPaths(job, args[i]); } inFormat.setMinSplitSizeRack(blockSize); inFormat.setMaxSplitSize(10 * blockSize); List<InputSplit> splits = inFormat.getSplits(job); System.out.println("Total number of splits " + splits.size()); for (int i = 0; i < splits.size(); ++i) { CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i); System.out.println("Split[" + i + "] " + fileSplit); } }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "record set mutator MR"; job.setJobName(jobName); job.setJarByClass(RecordSetBulkMutator.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(RecordSetBulkMutator.BulkMutatorMapper.class); job.setReducerClass(RecordSetBulkMutator.BulkMutatorReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("rsbm.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "SetMerger MR"; job.setJobName(jobName); job.setJarByClass(SetOperator.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration()); job.setMapperClass(SetOperator.MergerMapper.class); job.setReducerClass(SetOperator.MergerReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); int numReducer = job.getConfiguration().getInt("seo.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "record set modifier MR"; job.setJobName(jobName); job.setJarByClass(RecordSetModifier.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(RecordSetModifier.ModifierMapper.class); job.setReducerClass(RecordSetModifier.ModifierReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); Utility.setConfiguration(job.getConfiguration()); int numReducer = job.getConfiguration().getInt("rsm.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "MultiJoiner MR"; job.setJobName(jobName); job.setJarByClass(MultiJoiner.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration()); job.setMapperClass(MultiJoiner.JoinerMapper.class); job.setReducerClass(MultiJoiner.JoinerReducer.class); job.setMapOutputKeyClass(Tuple.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class); job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class); int numReducer = job.getConfiguration().getInt("muj.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
@Override public int run(String[] args) throws Exception { Job job = new Job(getConf()); String jobName = "Joiner MR"; job.setJobName(jobName); job.setJarByClass(Joiner.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); Utility.setConfiguration(job.getConfiguration()); job.setMapperClass(Joiner.JoinerMapper.class); job.setReducerClass(Joiner.JoinerReducer.class); job.setMapOutputKeyClass(TextInt.class); job.setMapOutputValueClass(Tuple.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setGroupingComparatorClass(SecondarySort.TextIntIdPairGroupComprator.class); job.setPartitionerClass(SecondarySort.TextIntIdPairTuplePartitioner.class); int numReducer = job.getConfiguration().getInt("joi.num.reducer", -1); numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer; job.setNumReduceTasks(numReducer); int status = job.waitForCompletion(true) ? 0 : 1; return status; }
public int run(String[] args) throws Exception { if(args.length < 2) { printUsage(); return 2; } Job job = Job.getInstance(getConf()); job.setJobName("MultiFileWordCount"); job.setJarByClass(MultiFileWordCount.class); //set the InputFormat of the job to our InputFormat job.setInputFormatClass(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(IntWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPaths(job, input); FileOutputFormat.setOutputPath(job, output);
FileOutputFormat.setOutputPath(job, new Path(argv[++i])); } else if ("-indir".equals(argv[i])) { FileInputFormat.addInputPaths(job, argv[++i]); } else if ("-inFormatIndirect".equals(argv[i])) { job.getConfiguration().setClass(INDIRECT_INPUT_FORMAT,
job.setNumReduceTasks(0); FileInputFormat.addInputPaths(job, input); FileOutputFormat.setOutputPath(job, output);
job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPaths(job, input); FileOutputFormat.setOutputPath(job, output);
FileInputFormat.addInputPaths(theJob, inputDir);
FileInputFormat.addInputPaths(job, inputPaths); FileOutputFormat.setOutputPath(job, outputPath);
private void splitRealFiles(String[] args) throws IOException { Configuration conf = new Configuration(); Job job = Job.getInstance(); FileSystem fs = FileSystem.get(conf); if (!(fs instanceof DistributedFileSystem)) { throw new IOException("Wrong file system: " + fs.getClass().getName()); } long blockSize = fs.getDefaultBlockSize(); DummyInputFormat inFormat = new DummyInputFormat(); for (int i = 0; i < args.length; i++) { FileInputFormat.addInputPaths(job, args[i]); } inFormat.setMinSplitSizeRack(blockSize); inFormat.setMaxSplitSize(10 * blockSize); List<InputSplit> splits = inFormat.getSplits(job); System.out.println("Total number of splits " + splits.size()); for (int i = 0; i < splits.size(); ++i) { CombineFileSplit fileSplit = (CombineFileSplit) splits.get(i); System.out.println("Split[" + i + "] " + fileSplit); } }
FileOutputFormat.setOutputPath(job, new Path(argv[++i])); } else if ("-indir".equals(argv[i])) { FileInputFormat.addInputPaths(job, argv[++i]); } else if ("-inFormatIndirect".equals(argv[i])) { job.getConfiguration().setClass(INDIRECT_INPUT_FORMAT,
public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) { try { Job job = new Job(runtime.getConfiguration()); FileInputFormat.addInputPaths(job, "/tmp"); //placeholder source.configureSource(job, 0); Converter converter = source.getConverter(); JavaPairRDD<?, ?> input = runtime.getSparkContext().newAPIHadoopRDD( job.getConfiguration(), CrunchInputFormat.class, converter.getKeyClass(), converter.getValueClass()); input.rdd().setName(getName()); MapFn mapFn = converter.applyPTypeTransforms() ? source.getType().getInputMapFn() : IdentityFn.getInstance(); return input .map(new InputConverterFunction(source.getConverter())) .map(new MapFunction(mapFn, runtime.getRuntimeContext())); } catch (IOException e) { throw new RuntimeException(e); } } }
public JavaRDDLike<?, ?> getJavaRDDLike(SparkRuntime runtime) { try { Job job = new Job(runtime.getConfiguration()); FileInputFormat.addInputPaths(job, "/tmp"); //placeholder source.configureSource(job, 0); Converter converter = source.getConverter(); JavaPairRDD<?, ?> input = runtime.getSparkContext().newAPIHadoopRDD( job.getConfiguration(), CrunchInputFormat.class, converter.getKeyClass(), converter.getValueClass()); input.rdd().setName(getName()); MapFn mapFn = converter.applyPTypeTransforms() ? source.getType().getInputMapFn() : IdentityFn.getInstance(); return input .map(new InputConverterFunction(source.getConverter())) .map(new MapFunction(mapFn, runtime.getRuntimeContext())); } catch (IOException e) { throw new RuntimeException(e); } } }