@Override public Path[] getInputPathsShim(JobConf conf) { try { return FileInputFormat.getInputPaths(conf); } catch (Exception e) { throw new RuntimeException(e); } }
@Override public BaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException { // only gather base statistics for FileInputFormats if (!(mapredInputFormat instanceof FileInputFormat)) { return null; } final FileBaseStatistics cachedFileStats = (cachedStats instanceof FileBaseStatistics) ? (FileBaseStatistics) cachedStats : null; try { final org.apache.hadoop.fs.Path[] paths = FileInputFormat.getInputPaths(this.jobConf); return getFileStats(cachedFileStats, paths, new ArrayList<FileStatus>(1)); } catch (IOException ioex) { if (LOG.isWarnEnabled()) { LOG.warn("Could not determine statistics due to an io error: " + ioex.getMessage()); } } catch (Throwable t) { if (LOG.isErrorEnabled()) { LOG.error("Unexpected problem while getting the file statistics: " + t.getMessage(), t); } } // no statistics available return null; }
public void validateInput(JobConf job) throws IOException { // expecting exactly one path Path [] tableNames = FileInputFormat.getInputPaths(job); if (tableNames == null || tableNames.length > 1) { throw new IOException("expecting one table name"); } // connected to table? if (getTable() == null) { throw new IOException("could not connect to table '" + tableNames[0].getName() + "'"); } // expecting at least one column String colArg = job.get(COLUMN_LIST); if (colArg == null || colArg.length() == 0) { throw new IOException("expecting at least one column"); } } }
/** * setInputPaths add all the paths in the provided list to the Job conf object * as input paths for the job. * * @param job * @param pathsToAdd */ public static void setInputPaths(JobConf job, List<Path> pathsToAdd) { Path[] addedPaths = FileInputFormat.getInputPaths(job); if (addedPaths == null) { addedPaths = EMPTY_PATH; } Path[] combined = new Path[addedPaths.length + pathsToAdd.size()]; System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length); int i = 0; for (Path p: pathsToAdd) { combined[addedPaths.length + (i++)] = p; } FileInputFormat.setInputPaths(job, combined); }
@Override protected void initialize(JobConf job) throws IOException { Path[] tableNames = FileInputFormat.getInputPaths(job); String colArg = job.get(COLUMN_LIST); String[] colNames = colArg.split(" "); byte [][] m_cols = new byte[colNames.length][]; for (int i = 0; i < m_cols.length; i++) { m_cols[i] = Bytes.toBytes(colNames[i]); } setInputColumns(m_cols); Connection connection = ConnectionFactory.createConnection(job); initializeTable(connection, TableName.valueOf(tableNames[0].getName())); }
/** * setInputPaths add all the paths in the provided list to the Job conf object * as input paths for the job. * * @param job * @param pathsToAdd */ public static void setInputPaths(JobConf job, List<Path> pathsToAdd) { Path[] addedPaths = FileInputFormat.getInputPaths(job); if (addedPaths == null) { addedPaths = new Path[0]; } Path[] combined = new Path[addedPaths.length + pathsToAdd.size()]; System.arraycopy(addedPaths, 0, combined, 0, addedPaths.length); int i = 0; for(Path p: pathsToAdd) { combined[addedPaths.length + (i++)] = p; } FileInputFormat.setInputPaths(job, combined); }
Path[] getInputPaths(JobConf job) throws IOException { Path[] dirs; if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { dirs = mrwork.getPathToPartitionInfo().keySet().toArray(new Path[]{}); } else { dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { // on tez we're avoiding to duplicate the file info in FileInputFormat. if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { try { List<Path> paths = Utilities.getInputPathsTez(job, mrwork); dirs = paths.toArray(new Path[paths.size()]); } catch (Exception e) { throw new IOException("Could not create input files", e); } } else { throw new IOException("No input paths specified in job"); } } } StringInternUtils.internUriStringsInPathArray(dirs); return dirs; }
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { setColumns(job); // hive depends on FileSplits, so wrap in HBaseSplit Path[] tablePaths = FileInputFormat.getInputPaths(job); InputSplit [] results = delegate.getSplits(job, numSplits); for (int i = 0; i < results.length; i++) { results[i] = new HBaseSplit(results[i], tablePaths[0]); } return results; }
Path[] getInputPaths(JobConf job) throws IOException { Path[] dirs; if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) { dirs = mrwork.getPathToPartitionInfo().keySet().toArray(new Path[]{}); } else { dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { // on tez we're avoiding to duplicate the file info in FileInputFormat. if (HiveConf.getVar(job, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { try { List<Path> paths = Utilities.getInputPathsTez(job, mrwork); dirs = paths.toArray(new Path[paths.size()]); } catch (Exception e) { throw new IOException("Could not create input files", e); } } else { throw new IOException("No input paths specified in job"); } } } StringInternUtils.internUriStringsInPathArray(dirs); return dirs; }
Path[] tablePaths = org.apache.hadoop.mapred.FileInputFormat.getInputPaths(jobConf);
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path[] symlinksDirs = FileInputFormat.getInputPaths(job); if (symlinksDirs.length == 0) { throw new IOException("No input paths specified in job.");
InputSplit inputSplit, JobConf entries, Reporter reporter) throws IOException { if (in == null) { Path paths[] = FileInputFormat.getInputPaths(entries); in = new FSDataInputStream[paths.length]; FileSystem fs = paths[0].getFileSystem(entries);
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { Path[] symlinksDirs = FileInputFormat.getInputPaths(job); if (symlinksDirs.length == 0) { throw new IOException("No input paths specified in job.");
splits[0] = new JdbcInputSplit(FileInputFormat.getInputPaths(job)[0]); LOGGER.info("Creating 1 input split " + splits[0]); return splits; Path[] tablePaths = FileInputFormat.getInputPaths(job); LOGGER.debug("Creating 1 input splits"); splits = new InputSplit[1]; splits[0] = new JdbcInputSplit(FileInputFormat.getInputPaths(job)[0]); return splits;
public InputSplit[] doGetSplits(JobConf job, int numSplits) throws IOException { super.init(job); Path[] dirs = FileInputFormat.getInputPaths(job); if (dirs.length == 0) { throw new IOException("No input paths specified in job"); } JobConf newjob = new JobConf(job); ArrayList<InputSplit> result = new ArrayList<InputSplit>(); // for each dir, get the InputFormat, and do getSplits. PartitionDesc part; for (Path dir : dirs) { part = HiveFileFormatUtils .getPartitionDescFromPathRecursively(pathToPartitionInfo, dir, IOPrepareCache.get().allocatePartitionDescMap(), true); // create a new InputFormat instance if this is the first time to see this // class Class inputFormatClass = part.getInputFileFormatClass(); InputFormat inputFormat = getInputFormatFromCache(inputFormatClass, job); Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), newjob); FileInputFormat.setInputPaths(newjob, dir); newjob.setInputFormat(inputFormat.getClass()); InputSplit[] iss = inputFormat.getSplits(newjob, numSplits / dirs.length); for (InputSplit is : iss) { result.add(new HiveInputSplit(is, inputFormatClass.getName())); } } return result.toArray(new HiveInputSplit[result.size()]); }
@Override public Path[] getInputPathsShim(JobConf conf) { try { return FileInputFormat.getInputPaths(conf); } catch (Exception e) { throw new RuntimeException(e); } }
@Override public Path[] getInputPathsShim(JobConf conf) { try { return FileInputFormat.getInputPaths(conf); } catch (Exception e) { throw new RuntimeException(e); } }
static private IntWritable deduceInputFile(JobConf job) { Path[] inputPaths = FileInputFormat.getInputPaths(job); Path inputFile = new Path(job.get(JobContext.MAP_INPUT_FILE)); // value == one for sort-input; value == two for sort-output return (inputFile.getParent().equals(inputPaths[0])) ? sortInput : sortOutput; }
protected static void verifyNoDuplicates( Configuration conf ) { Path[] inputPaths = FileInputFormat.getInputPaths( HadoopUtil.asJobConfInstance( conf ) ); Set<Path> paths = new HashSet<Path>( (int) ( inputPaths.length / .75f ) ); for( Path inputPath : inputPaths ) { if( !paths.add( inputPath ) ) throw new TapException( "may not add duplicate paths, found: " + inputPath ); } }
static private IntWritable deduceInputFile(JobConf job) { Path[] inputPaths = FileInputFormat.getInputPaths(job); Path inputFile = new Path(job.get(JobContext.MAP_INPUT_FILE)); // value == one for sort-input; value == two for sort-output return (inputFile.getParent().equals(inputPaths[0])) ? sortInput : sortOutput; }