org.apache.hadoop.mapred.FileInputFormat.getInputPaths java code examples

@Override
public Path[] getInputPathsShim(JobConf conf) {
 try {
  return FileInputFormat.getInputPaths(conf);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

@Override
public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
  // only gather base statistics for FileInputFormats
  if (!(mapredInputFormat instanceof FileInputFormat)) {
    return null;
  }
  final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ?
      (FileBaseStatistics) cachedStats : null;
  try {
    final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf);
    return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1));
  } catch (IOException ioex) {
    if (LOG.isWarnEnabled()) {
      LOG.warn("Could not determine statistics due to an io error: "
          + ioex.getMessage());
    }
  } catch (Throwable t) {
    if (LOG.isErrorEnabled()) {
      LOG.error("Unexpected problem while getting the file statistics: "
          + t.getMessage(), t);
    }
  }
  // no statistics available
  return null;
}

 public void validateInput(JobConf job) throws IOException {
  // expecting exactly one path
  Path [] tableNames = FileInputFormat.getInputPaths(job);
  if (tableNames == null || tableNames.length > 1) {
   throw new IOException("expecting one table name");
  }

  // connected to table?
  if (getTable() == null) {
   throw new IOException("could not connect to table '" +
    tableNames[0].getName() + "'");
  }

  // expecting at least one column
  String colArg = job.get(COLUMN_LIST);
  if (colArg == null || colArg.length() == 0) {
   throw new IOException("expecting at least one column");
  }
 }
}

/**
 * setInputPaths add all the paths in the provided list to the Job conf object
 * as input paths for the job.
 *
 * @param job
 * @param pathsToAdd
 */
public static void setInputPaths(JobConf job, List<Path> pathsToAdd) {
 Path[] addedPaths = FileInputFormat.getInputPaths(job);
 if (addedPaths == null) {
  addedPaths = EMPTY_PATH;
 }
 Path[] combined = new Path[addedPaths.length + pathsToAdd.size()];
 System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length);
 int i = 0;
 for (Path p: pathsToAdd) {
  combined[addedPaths.length + (i++)] = p;
 }
 FileInputFormat.setInputPaths(job, combined);
}

@Override
protected void initialize(JobConf job) throws IOException {
 Path[] tableNames = FileInputFormat.getInputPaths(job);
 String colArg = job.get(COLUMN_LIST);
 String[] colNames = colArg.split(" ");
 byte [][] m_cols = new byte[colNames.length][];
 for (int i = 0; i < m_cols.length; i++) {
  m_cols[i] = Bytes.toBytes(colNames[i]);
 }
 setInputColumns(m_cols);
 Connection connection = ConnectionFactory.createConnection(job);
 initializeTable(connection, TableName.valueOf(tableNames[0].getName()));
}

/**
 * setInputPaths add all the paths in the provided list to the Job conf object
 * as input paths for the job.
 *
 * @param job
 * @param pathsToAdd
 */
public static void setInputPaths(JobConf job, List<Path> pathsToAdd) {
 Path[] addedPaths = FileInputFormat.getInputPaths(job);
 if (addedPaths == null) {
  addedPaths = new Path[0];
 }
 Path[] combined = new Path[addedPaths.length + pathsToAdd.size()];
 System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length);
 int i = 0;
 for(Path p: pathsToAdd) {
  combined[addedPaths.length + (i++)] = p;
 }
 FileInputFormat.setInputPaths(job, combined);
}

Path[] getInputPaths(JobConf job) throws IOException {
 Path[] dirs;
 if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
  dirs = mrwork.getPathToPartitionInfo().keySet().toArray(new Path[]{});
 } else {
  dirs = FileInputFormat.getInputPaths(job);
  if (dirs.length == 0) {
   // on tez we're avoiding to duplicate the file info in FileInputFormat.
   if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
    try {
     List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
     dirs = paths.toArray(new Path[paths.size()]);
    } catch (Exception e) {
     throw new IOException("Could not create input files", e);
    }
   } else {
    throw new IOException("No input paths specified in job");
   }
  }
 }
 StringInternUtils.internUriStringsInPathArray(dirs);
 return dirs;
}

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
 setColumns(job);
 // hive depends on FileSplits, so wrap in HBaseSplit
 Path[] tablePaths = FileInputFormat.getInputPaths(job);
 InputSplit [] results = delegate.getSplits(job, numSplits);
 for (int i = 0; i < results.length; i++) {
  results[i] = new HBaseSplit(results[i], tablePaths[0]);
 }
 return results;
}

Path[] getInputPaths(JobConf job) throws IOException {
 Path[] dirs;
 if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
  dirs = mrwork.getPathToPartitionInfo().keySet().toArray(new Path[]{});
 } else {
  dirs = FileInputFormat.getInputPaths(job);
  if (dirs.length == 0) {
   // on tez we're avoiding to duplicate the file info in FileInputFormat.
   if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) {
    try {
     List<Path> paths = Utilities.getInputPathsTez(job, mrwork);
     dirs = paths.toArray(new Path[paths.size()]);
    } catch (Exception e) {
     throw new IOException("Could not create input files", e);
    }
   } else {
    throw new IOException("No input paths specified in job");
   }
  }
 }
 StringInternUtils.internUriStringsInPathArray(dirs);
 return dirs;
}

Path[] tablePaths = org.apache.hadoop.mapred.FileInputFormat.getInputPaths(jobConf);

public InputSplit[] getSplits(JobConf job, int numSplits)
  throws IOException {
 Path[] symlinksDirs = FileInputFormat.getInputPaths(job);
 if (symlinksDirs.length == 0) {
  throw new IOException("No input paths specified in job.");

 InputSplit inputSplit, JobConf entries, Reporter reporter) throws IOException {
if (in == null) {
 Path paths[] = FileInputFormat.getInputPaths(entries);
 in = new FSDataInputStream[paths.length];
 FileSystem fs = paths[0].getFileSystem(entries);

public InputSplit[] getSplits(JobConf job, int numSplits)
  throws IOException {
 Path[] symlinksDirs = FileInputFormat.getInputPaths(job);
 if (symlinksDirs.length == 0) {
  throw new IOException("No input paths specified in job.");

 splits[0] = new JdbcInputSplit(FileInputFormat.getInputPaths(job)[0]);
 LOGGER.info("Creating 1 input split " + splits[0]);
 return splits;
Path[] tablePaths = FileInputFormat.getInputPaths(job);
  LOGGER.debug("Creating 1 input splits");
  splits = new InputSplit[1];
  splits[0] = new JdbcInputSplit(FileInputFormat.getInputPaths(job)[0]);
  return splits;

public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException {
 super.init(job);
 Path[] dirs = FileInputFormat.getInputPaths(job);
 if (dirs.length == 0) {
  throw new IOException("No input paths specified in job");
 }
 JobConf newjob = new JobConf(job);
 ArrayList<InputSplit> result = new ArrayList<InputSplit>();
 // for each dir, get the InputFormat, and do getSplits.
 PartitionDesc part;
 for (Path dir : dirs) {
  part = HiveFileFormatUtils
    .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir,
      IOPrepareCache.get().allocatePartitionDescMap(), true);
  // create a new InputFormat instance if this is the first time to see this
  // class
  Class inputFormatClass = part.getInputFileFormatClass();
  InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job);
  Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob);
  FileInputFormat.setInputPaths(newjob, dir);
  newjob.setInputFormat(inputFormat.getClass());
  InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length);
  for (InputSplit is : iss) {
   result.add(new HiveInputSplit(is, inputFormatClass.getName()));
  }
 }
 return result.toArray(new HiveInputSplit[result.size()]);
}

@Override
public Path[] getInputPathsShim(JobConf conf) {
 try {
  return FileInputFormat.getInputPaths(conf);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

@Override
public Path[] getInputPathsShim(JobConf conf) {
 try {
  return FileInputFormat.getInputPaths(conf);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

static private IntWritable deduceInputFile(JobConf job) {
 Path[] inputPaths = FileInputFormat.getInputPaths(job);
 Path inputFile = new Path(job.get(JobContext.MAP_INPUT_FILE));
 // value == one for sort-input; value == two for sort-output
 return (inputFile.getParent().equals(inputPaths[0])) ? 
   sortInput : sortOutput;
}

protected static void verifyNoDuplicates( Configuration conf )
 {
 Path[] inputPaths = FileInputFormat.getInputPaths( HadoopUtil.asJobConfInstance( conf ) );
 Set<Path> paths = new HashSet<Path>( (int) ( inputPaths.length / .75f ) );
 for( Path inputPath : inputPaths )
  {
  if( !paths.add( inputPath ) )
   throw new TapException( "may not add duplicate paths, found: " + inputPath );
  }
 }

static private IntWritable deduceInputFile(JobConf job) {
 Path[] inputPaths = FileInputFormat.getInputPaths(job);
 Path inputFile = new Path(job.get(JobContext.MAP_INPUT_FILE));
 // value == one for sort-input; value == two for sort-output
 return (inputFile.getParent().equals(inputPaths[0])) ? 
   sortInput : sortOutput;
}

Javadoc

Get the list of input Paths for the map-reduce job.

Popular methods of FileInputFormat

setInputPaths
Set the array of Paths as the list of inputs for the map-reduce job.
addInputPath
Add a Path to the list of inputs for the map-reduce job.
addInputPaths
Add the given comma separated paths to the list of inputs for the map-reduce job.
listStatus
List input directories. Subclasses may override to, e.g., select only files matching a regular expre
getSplits
Splits files returned by #listStatus(JobConf) when they're too big.
getRecordReader
isSplitable
Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be
computeSplitSize
getBlockIndex
getInputPathFilter
Get a PathFilter instance of the filter set for the input paths.
getPathStrings
addInputPathRecursively
Add files in the input path recursively into the results.

Popular in Java

Making http requests using okhttp
notifyDataSetChanged (ArrayAdapter)
addToBackStack (FragmentTransaction)
onCreateOptionsMenu (Activity)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Notification (javax.management)
Top Vim plugins

How to use getInputPathsmethodin org.apache.hadoop.mapred.FileInputFormat

Best Java code snippets using org.apache.hadoop.mapred.FileInputFormat.getInputPaths (Showing top 20 results out of 486)

How to use
getInputPaths
method
in
org.apache.hadoop.mapred.FileInputFormat