/** * Get paths from a Hive location using the provided input format. */ public static Set<Path> getPaths(InputFormat<?, ?> inputFormat, Path location) throws IOException { JobConf jobConf = new JobConf(getHadoopConfiguration()); Set<Path> paths = Sets.newHashSet(); FileInputFormat.addInputPaths(jobConf, location.toString()); InputSplit[] splits = inputFormat.getSplits(jobConf, 1000); for (InputSplit split : splits) { if (!(split instanceof FileSplit)) { throw new IOException("Not a file split. Found " + split.getClass().getName()); } FileSplit fileSplit = (FileSplit) split; paths.add(fileSplit.getPath()); } return paths; }
FileInputFormat.addInputPaths(job, line.getOptionValue("in")); FileOutputFormat.setOutputPath(job,new Path (line.getOptionValue("out")));
job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) {
execargs.add("org.apache.avro.mapred.tether.WordCountTask"); FileInputFormat.addInputPaths(job, inputPath.toString()); FileOutputFormat.setOutputPath(job, outputPath); TetherJob.setExecutable(job, exec, execargs, false);
/** * Get paths from a Hive location using the provided input format. */ public static Set<Path> getPaths(InputFormat<?, ?> inputFormat, Path location) throws IOException { JobConf jobConf = new JobConf(getHadoopConfiguration()); Set<Path> paths = Sets.newHashSet(); FileInputFormat.addInputPaths(jobConf, location.toString()); InputSplit[] splits = inputFormat.getSplits(jobConf, 1000); for (InputSplit split : splits) { if (!(split instanceof FileSplit)) { throw new IOException("Not a file split. Found " + split.getClass().getName()); } FileSplit fileSplit = (FileSplit) split; paths.add(fileSplit.getPath()); } return paths; }
/** * Get paths from a Hive location using the provided input format. */ public static Set<Path> getPaths(InputFormat<?, ?> inputFormat, Path location) throws IOException { JobConf jobConf = new JobConf(getHadoopConfiguration()); Set<Path> paths = Sets.newHashSet(); FileInputFormat.addInputPaths(jobConf, location.toString()); InputSplit[] splits = inputFormat.getSplits(jobConf, 1000); for (InputSplit split : splits) { if (!(split instanceof FileSplit)) { throw new IOException("Not a file split. Found " + split.getClass().getName()); } FileSplit fileSplit = (FileSplit) split; paths.add(fileSplit.getPath()); } return paths; }
FileOutputFormat.setOutputPath(job, new Path(argv[++i])); } else if ("-indir".equals(argv[i])) { FileInputFormat.addInputPaths(job, argv[++i]); } else if ("-inFormatIndirect".equals(argv[i])) { job.setClass(org.apache.hadoop.mapreduce.
FileInputFormat.addInputPaths(job, path); LOG.info("Add a non-native table " + path); return numEmptyPaths; Text.class, false, props, null); recWriter.close(false); FileInputFormat.addInputPaths(job, onefile); return numEmptyPaths;
job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) {
job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) {
job.setStrings("io.serializations", job.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName()); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) {
job.setMapOutputKeyClass(outputKeyClass); job.setMapperClass(mapper); FileInputFormat.addInputPaths(job, table); job.set(TableInputFormat.COLUMN_LIST, columns); if (addDependencyJars) {
FileInputFormat.addInputPaths(theJob, inputDir);
FileOutputFormat.setOutputPath(job, new Path(argv[++i])); } else if ("-indir".equals(argv[i])) { FileInputFormat.addInputPaths(job, argv[++i]); } else if ("-inFormatIndirect".equals(argv[i])) { job.setClass(org.apache.hadoop.mapreduce.
public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return 1; } JobConf job = new JobConf(getConf(), MultiFileWordCount.class); job.setJobName("MultiFileWordCount"); //set the InputFormat of the job to our InputFormat job.setInputFormat(MyInputFormat.class); // the keys are words (strings) job.setOutputKeyClass(Text.class); // the values are counts (ints) job.setOutputValueClass(LongWritable.class); //use the defined mapper job.setMapperClass(MapClass.class); //use the WordCount Reducer job.setCombinerClass(LongSumReducer.class); job.setReducerClass(LongSumReducer.class); FileInputFormat.addInputPaths(job, args[0]); FileOutputFormat.setOutputPath(job, new Path(args[1])); JobClient.runJob(job); return 0; }
FileInputFormat.addInputPaths(theJob, inputDir);
FileInputFormat.addInputPaths(theJob, inputDir);
FileInputFormat.addInputPaths(theJob, inputDir);
pathStr.append(StringUtils.COMMA); pathStr.append(str5); FileInputFormat.addInputPaths(jobConf, pathStr.toString()); paths = FileInputFormat.getInputPaths(jobConf); assertEquals(5, paths.length);
FileInputFormat.addInputPaths(job, in.value(opts)); FileOutputFormat.setOutputPath(job, out.value(opts)); TetherJob.setExecutable(job, exec.value(opts));