org.apache.hadoop.mapred.FileInputFormat.addInputPaths java code examples

/**
 * Get paths from a Hive location using the provided input format.
 */
public static Set<Path> getPaths(InputFormat<?, ?> inputFormat, Path location) throws IOException {
 JobConf jobConf = new JobConf(getHadoopConfiguration());
 Set<Path> paths = Sets.newHashSet();
 FileInputFormat.addInputPaths(jobConf, location.toString());
 InputSplit[] splits = inputFormat.getSplits(jobConf, 1000);
 for (InputSplit split : splits) {
  if (!(split instanceof FileSplit)) {
   throw new IOException("Not a file split. Found " + split.getClass().getName());
  }
  FileSplit fileSplit = (FileSplit) split;
  paths.add(fileSplit.getPath());
 }
 return paths;
}

FileInputFormat.addInputPaths(job, line.getOptionValue("in"));
FileOutputFormat.setOutputPath(job,new Path (line.getOptionValue("out")));

job.setStrings("io.serializations", job.get("io.serializations"),
  MutationSerialization.class.getName(), ResultSerialization.class.getName());
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
if (addDependencyJars) {

execargs.add("org.apache.avro.mapred.tether.WordCountTask");
FileInputFormat.addInputPaths(job, inputPath.toString());
FileOutputFormat.setOutputPath(job, outputPath);
TetherJob.setExecutable(job, exec, execargs, false);

/**
 * Get paths from a Hive location using the provided input format.
 */
public static Set<Path> getPaths(InputFormat<?, ?> inputFormat, Path location) throws IOException {
 JobConf jobConf = new JobConf(getHadoopConfiguration());
 Set<Path> paths = Sets.newHashSet();
 FileInputFormat.addInputPaths(jobConf, location.toString());
 InputSplit[] splits = inputFormat.getSplits(jobConf, 1000);
 for (InputSplit split : splits) {
  if (!(split instanceof FileSplit)) {
   throw new IOException("Not a file split. Found " + split.getClass().getName());
  }
  FileSplit fileSplit = (FileSplit) split;
  paths.add(fileSplit.getPath());
 }
 return paths;
}

/**
 * Get paths from a Hive location using the provided input format.
 */
public static Set<Path> getPaths(InputFormat<?, ?> inputFormat, Path location) throws IOException {
 JobConf jobConf = new JobConf(getHadoopConfiguration());
 Set<Path> paths = Sets.newHashSet();
 FileInputFormat.addInputPaths(jobConf, location.toString());
 InputSplit[] splits = inputFormat.getSplits(jobConf, 1000);
 for (InputSplit split : splits) {
  if (!(split instanceof FileSplit)) {
   throw new IOException("Not a file split. Found " + split.getClass().getName());
  }
  FileSplit fileSplit = (FileSplit) split;
  paths.add(fileSplit.getPath());
 }
 return paths;
}

 FileOutputFormat.setOutputPath(job, new Path(argv[++i]));
} else if ("-indir".equals(argv[i])) {
 FileInputFormat.addInputPaths(job, argv[++i]);
} else if ("-inFormatIndirect".equals(argv[i])) {
 job.setClass(org.apache.hadoop.mapreduce.

 FileInputFormat.addInputPaths(job, path);
 LOG.info("Add a non-native table " + path);
 return numEmptyPaths;
  Text.class, false, props, null);
recWriter.close(false);
FileInputFormat.addInputPaths(job, onefile);
return numEmptyPaths;

job.setStrings("io.serializations", job.get("io.serializations"),
  MutationSerialization.class.getName(), ResultSerialization.class.getName());
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
if (addDependencyJars) {

job.setStrings("io.serializations", job.get("io.serializations"),
  MutationSerialization.class.getName(), ResultSerialization.class.getName());
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
if (addDependencyJars) {

job.setStrings("io.serializations", job.get("io.serializations"),
  MutationSerialization.class.getName(), ResultSerialization.class.getName());
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
if (addDependencyJars) {

job.setMapOutputKeyClass(outputKeyClass);
job.setMapperClass(mapper);
FileInputFormat.addInputPaths(job, table);
job.set(TableInputFormat.COLUMN_LIST, columns);
if (addDependencyJars) {

FileInputFormat.addInputPaths(theJob, inputDir);

 FileOutputFormat.setOutputPath(job, new Path(argv[++i]));
} else if ("-indir".equals(argv[i])) {
 FileInputFormat.addInputPaths(job, argv[++i]);
} else if ("-inFormatIndirect".equals(argv[i])) {
 job.setClass(org.apache.hadoop.mapreduce.

public int run(String[] args) throws Exception {
  if (args.length < 2) {
    printUsage();
    return 1;
  }
  JobConf job = new JobConf(getConf(), MultiFileWordCount.class);
  job.setJobName("MultiFileWordCount");
  //set the InputFormat of the job to our InputFormat
  job.setInputFormat(MyInputFormat.class);
  // the keys are words (strings)
  job.setOutputKeyClass(Text.class);
  // the values are counts (ints)
  job.setOutputValueClass(LongWritable.class);
  //use the defined mapper
  job.setMapperClass(MapClass.class);
  //use the WordCount Reducer
  job.setCombinerClass(LongSumReducer.class);
  job.setReducerClass(LongSumReducer.class);
  FileInputFormat.addInputPaths(job, args[0]);
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  JobClient.runJob(job);
  return 0;
}

FileInputFormat.addInputPaths(theJob, inputDir);

FileInputFormat.addInputPaths(theJob, inputDir);

FileInputFormat.addInputPaths(theJob, inputDir);

pathStr.append(StringUtils.COMMA);
pathStr.append(str5);
FileInputFormat.addInputPaths(jobConf, pathStr.toString());
paths = FileInputFormat.getInputPaths(jobConf);
assertEquals(5, paths.length);

FileInputFormat.addInputPaths(job, in.value(opts));
FileOutputFormat.setOutputPath(job, out.value(opts));
TetherJob.setExecutable(job, exec.value(opts));

Javadoc

Add the given comma separated paths to the list of inputs for the map-reduce job.

Popular methods of FileInputFormat

setInputPaths
Set the array of Paths as the list of inputs for the map-reduce job.
getInputPaths
Get the list of input Paths for the map-reduce job.
addInputPath
Add a Path to the list of inputs for the map-reduce job.
listStatus
List input directories. Subclasses may override to, e.g., select only files matching a regular expre
getSplits
Splits files returned by #listStatus(JobConf) when they're too big.
getRecordReader
isSplitable
Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be
computeSplitSize
getBlockIndex
getInputPathFilter
Get a PathFilter instance of the filter set for the input paths.
getPathStrings
addInputPathRecursively
Add files in the input path recursively into the results.

Popular in Java

Reactive rest calls using spring rest template
setContentView (Activity)
runOnUiThread (Activity)
findViewById (Activity)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
String (java.lang)
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
JFileChooser (javax.swing)
Top Sublime Text plugins

How to use addInputPathsmethodin org.apache.hadoop.mapred.FileInputFormat

Best Java code snippets using org.apache.hadoop.mapred.FileInputFormat.addInputPaths (Showing top 20 results out of 315)

How to use
addInputPaths
method
in
org.apache.hadoop.mapred.FileInputFormat