org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputDirRecursive java code examples

@Override
public List<InputSplit> getSplits(JobContext cx) throws IOException {
 Job modifiedJob = Job.getInstance(cx.getConfiguration());
 setSplitSize(modifiedJob);
 FileInputFormat.setInputDirRecursive(modifiedJob, true);
 return cleanSplits(super.getSplits(modifiedJob));
}

private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
            Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(getConf(), "HDFS Sync job");
 job.setJarByClass(getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(HdfsSyncMapper.class);
 job.setReducerClass(HdfsSyncReducer.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.getConfiguration().set(SRC_PATH_CONF, source.toString());
 job.getConfiguration().set(DST_PATH_CONF, destination.toString());
 job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
     this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
 FileOutputFormat.setOutputPath(job, new Path(output.toString()));
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

@Override
public List<InputSplit> getSplits(JobContext cx) throws IOException {
 Job modifiedJob = Job.getInstance(cx.getConfiguration());
 setSplitSize(modifiedJob);
 FileInputFormat.setInputDirRecursive(modifiedJob, true);
 return cleanSplits(super.getSplits(modifiedJob));
}

@Override
public List<InputSplit> getSplits(JobContext cx) throws IOException {
 Job modifiedJob = Job.getInstance(cx.getConfiguration());
 setSplitSize(modifiedJob);
 FileInputFormat.setInputDirRecursive(modifiedJob, true);
 return cleanSplits(super.getSplits(modifiedJob));
}

private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
            Path output)
  throws IOException, InterruptedException, ClassNotFoundException {
 Job job = new Job(getConf(), "HDFS Sync job");
 job.setJarByClass(getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(HdfsSyncMapper.class);
 job.setReducerClass(HdfsSyncReducer.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 job.getConfiguration().set(SRC_PATH_CONF, source.toString());
 job.getConfiguration().set(DST_PATH_CONF, destination.toString());
 job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
     this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
 FileOutputFormat.setOutputPath(job, new Path(output.toString()));
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
}

private int runHdfsCopyJob(Path input, Path output)
 throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
 LOG.info("Starting job for step 2...");
 Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(Stage2DirectoryCopyMapper.class);
 job.setReducerClass(Stage2DirectoryCopyReducer.class);
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
   this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 job.setNumReduceTasks(getConf().getInt(
   ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
   150));
 boolean success = job.waitForCompletion(true);
 if (success) {
  LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
    + "commands in Hive: \n\n"
    + VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
    + "\n");
 }
 return success ? 0 : 1;
}

private int runHdfsCopyJob(Path input, Path output)
 throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
 LOG.info("Starting job for step 2...");
 Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(Stage2DirectoryCopyMapper.class);
 job.setReducerClass(Stage2DirectoryCopyReducer.class);
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
   this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(Text.class);
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 job.setNumReduceTasks(getConf().getInt(
   ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
   150));
 boolean success = job.waitForCompletion(true);
 if (success) {
  LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
    + "commands in Hive: \n\n"
    + VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
    + "\n");
 }
 return success ? 0 : 1;
}

private int runCommitChangeJob(Path input, Path output)
 throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
 LOG.info("Starting job for step 3...");
 Job job = Job.getInstance(this.getConf(), "Stage3: Commit Change Job");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(Stage3CommitChangeMapper.class);
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(Text.class);
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
   this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 job.setNumReduceTasks(getConf().getInt(
   ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
   150));
 boolean success = job.waitForCompletion(true);
 if (success) {
  LOG.info("Job for step 3 finished successfully! To view logging data, run the following "
    + "commands in Hive: \n\n"
    + VelocityUtils.renderTemplate(STEP3_HQL_TEMPLATE, velocityContext));
 }
 return success ? 0 : 1;
}

private int runCommitChangeJob(Path input, Path output)
 throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
 LOG.info("Starting job for step 3...");
 Job job = Job.getInstance(this.getConf(), "Stage3: Commit Change Job");
 job.setJarByClass(this.getClass());
 job.setInputFormatClass(TextInputFormat.class);
 job.setMapperClass(Stage3CommitChangeMapper.class);
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(Text.class);
 FileInputFormat.setInputPaths(job, input);
 FileInputFormat.setInputDirRecursive(job, true);
 FileInputFormat.setMaxInputSplitSize(job,
   this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
 FileOutputFormat.setOutputPath(job, output);
 FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
 job.setNumReduceTasks(getConf().getInt(
   ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
   150));
 boolean success = job.waitForCompletion(true);
 if (success) {
  LOG.info("Job for step 3 finished successfully! To view logging data, run the following "
    + "commands in Hive: \n\n"
    + VelocityUtils.renderTemplate(STEP3_HQL_TEMPLATE, velocityContext));
 }
 return success ? 0 : 1;
}

job.setMapOutputValueClass(LongWritable.class);
job.setInputFormatClass(RioFileInputFormat.class);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, source);
TableMapReduceUtil.addDependencyJars(job);

try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
  HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setInputPaths(job, source);
  FileOutputFormat.setOutputPath(job, new Path(workdir));

 FileInputFormat.setInputPathFilter(job, RegexPathFilter.class);
FileInputFormat.setInputDirRecursive(job, config.shouldReadRecursively());

Popular methods of FileInputFormat

setInputPaths
Set the array of Paths as the list of inputs for the map-reduce job.
addInputPath
Add a Path to the list of inputs for the map-reduce job.
getInputPaths
Get the list of input Paths for the map-reduce job.
getSplits
Generate the list of files and make them into FileSplits.
listStatus
List input directories. Subclasses may override to, e.g., select only files matching a regular expre
addInputPaths
Add the given comma separated paths to the list of inputs for the map-reduce job.
setMaxInputSplitSize
Set the maximum split size
isSplitable
Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be
setMinInputSplitSize
Set the minimum input split size
createRecordReader
computeSplitSize
getBlockIndex

Popular in Java

Creating JSON documents from java classes using gson
setRequestProperty (URLConnection)
scheduleAtFixedRate (ScheduledExecutorService)
scheduleAtFixedRate (Timer)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
From CI to AI: The AI layer in your organization

How to use setInputDirRecursivemethodin org.apache.hadoop.mapreduce.lib.input.FileInputFormat

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputDirRecursive (Showing top 12 results out of 315)

How to use
setInputDirRecursive
method
in
org.apache.hadoop.mapreduce.lib.input.FileInputFormat