/** * A simple hook to filter files and directories from the input. * The method may be overridden. Hadoop's FileInputFormat has a similar mechanism and applies the * same filters by default. * * @param fileStatus * @return true, if the given file or directory is accepted */ protected boolean acceptFile(FileStatus fileStatus) { final String name = fileStatus.getPath().getName(); return !name.startsWith("_") && !name.startsWith("."); }
/** * A simple hook to filter files and directories from the input. * The method may be overridden. Hadoop's FileInputFormat has a similar mechanism and applies the * same filters by default. * * @param fileStatus * @return true, if the given file or directory is accepted */ protected boolean acceptFile(FileStatus fileStatus) { final String name = fileStatus.getPath().getName(); return !name.startsWith("_") && !name.startsWith("."); }
private boolean testForUnsplittable(FileStatus pathFile) { if(pathFile.getPath().getName().endsWith(DEFLATE_SUFFIX)) { unsplittable = true; return true; } return false; }
private boolean testForUnsplittable(FileStatus pathFile) { if(pathFile.getPath().getName().endsWith(DEFLATE_SUFFIX)) { unsplittable = true; return true; } return false; }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
@Override public FileInputSplit[] createInputSplits(final int minNumSplits) throws IOException { final List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (final FileStatus file : files) { final long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { final long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); final FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } return inputSplits.toArray(new FileInputSplit[0]); }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException { FileSystem sFS = sourcePath.getFileSystem(); FileSystem tFS = targetPath.getFileSystem(); if (!tFS.exists(targetPath)) { if (sFS.getFileStatus(sourcePath).isDir()) { tFS.mkdirs(targetPath); FileStatus[] contents = sFS.listStatus(sourcePath); for (FileStatus content : contents) { String distPath = content.getPath().toString(); if (content.isDir()) { if (distPath.endsWith("/")) { distPath = distPath.substring(0, distPath.length() - 1); } } String localPath = targetPath.toString() + distPath.substring(distPath.lastIndexOf("/")); copy(content.getPath(), new Path(localPath), executable); } } else { try { FSDataOutputStream lfsOutput = tFS.create(targetPath, false); FSDataInputStream fsInput = sFS.open(sourcePath); IOUtils.copyBytes(fsInput, lfsOutput); new File(targetPath.toString()).setExecutable(executable); } catch (IOException ioe) { } } } }
List<F> formats = new ArrayList<F>(); for (int index = 0; index < list.length; index++) { formats.add(openInput(inputFormatClass, list[index].getPath().toString(), configuration));
List<F> formats = new ArrayList<F>(); for (int index = 0; index < list.length; index++) { formats.add(openInput(inputFormatClass, list[index].getPath().toString(), configuration));
FileInputSplit split = new FileInputSplit(0, file.getPath(), offset, file.getLen() - offset, null);
final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex] .getHosts()); final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, bytesUnassigned, blocks[blockIndex].getHosts()); hosts = new String[0]; final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, 0, hosts); inputSplits.add(fis);
final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex] .getHosts()); final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, bytesUnassigned, blocks[blockIndex].getHosts()); hosts = new String[0]; final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, 0, hosts); inputSplits.add(fis);
/** * Fill in the statistics. The last modification time and the total input size are prefilled. * * @param files * The files that are associated with this block input format. * @param stats * The pre-filled statistics. */ protected SequentialStatistics createStatistics(final List<FileStatus> files, final FileBaseStatistics stats) throws IOException { if (files.isEmpty()) return null; final BlockInfo blockInfo = this.createBlockInfo(); long totalCount = 0; for (final FileStatus file : files) { // invalid file if (file.getLen() < blockInfo.getInfoSize()) continue; final FSDataInputStream fdis = file.getPath().getFileSystem().open(file.getPath(), blockInfo.getInfoSize()); fdis.seek(file.getLen() - blockInfo.getInfoSize()); final DataInputStream input = new DataInputStream(fdis); blockInfo.read(input); totalCount += blockInfo.getAccumulatedRecordCount(); } final float avgWidth = totalCount == 0 ? 0 : (float) stats.getTotalInputSize() / totalCount; return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount); }
/** * Fill in the statistics. The last modification time and the total input size are prefilled. * * @param files * The files that are associated with this block input format. * @param stats * The pre-filled statistics. */ protected SequentialStatistics createStatistics(List<FileStatus> files, FileBaseStatistics stats) throws IOException { if (files.isEmpty()) { return null; } BlockInfo blockInfo = this.createBlockInfo(); long totalCount = 0; for (FileStatus file : files) { // invalid file if (file.getLen() < blockInfo.getInfoSize()) { continue; } FSDataInputStream fdis = file.getPath().getFileSystem().open(file.getPath(), blockInfo.getInfoSize()); fdis.seek(file.getLen() - blockInfo.getInfoSize()); DataInputStream input = new DataInputStream(fdis); blockInfo.read(input); totalCount += blockInfo.getAccumulatedRecordCount(); } final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount); return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount); }
/** * Fill in the statistics. The last modification time and the total input size are prefilled. * * @param files * The files that are associated with this block input format. * @param stats * The pre-filled statistics. */ protected SequentialStatistics createStatistics(List<FileStatus> files, FileBaseStatistics stats) throws IOException { if (files.isEmpty()) { return null; } BlockInfo blockInfo = this.createBlockInfo(); long totalCount = 0; for (FileStatus file : files) { // invalid file if (file.getLen() < blockInfo.getInfoSize()) { continue; } FSDataInputStream fdis = file.getPath().getFileSystem().open(file.getPath(), blockInfo.getInfoSize()); fdis.seek(file.getLen() - blockInfo.getInfoSize()); DataInputStream input = new DataInputStream(fdis); blockInfo.read(input); totalCount += blockInfo.getAccumulatedRecordCount(); } final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount); return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount); }
/** * Returns the number of blocks this file/directory consists of * assuming the file system's standard block size. * * @param file * the file * @return the number of block's the file/directory consists of * @throws IOException */ public int getNumberOfBlocks(final FileStatus file) throws IOException { int numberOfBlocks = 0; if (file == null) { return 0; } // For a file, this is easy if (!file.isDir()) { return getNumberOfBlocks(file.getLen(), file.getBlockSize()); } // file is a directory final FileStatus[] files = this.listStatus(file.getPath()); for (int i = 0; i < files.length; i++) { if (!files[i].isDir()) { numberOfBlocks += getNumberOfBlocks(files[i].getLen(), files[i].getBlockSize()); } } return numberOfBlocks; }
/** * Returns the number of blocks this file/directory consists of * assuming the file system's standard block size. * * @param file * the file * @return the number of block's the file/directory consists of * @throws IOException */ public int getNumberOfBlocks(final FileStatus file) throws IOException { int numberOfBlocks = 0; if (file == null) { return 0; } // For a file, this is easy if (!file.isDir()) { return getNumberOfBlocks(file.getLen(), file.getBlockSize()); } // file is a directory final FileStatus[] files = this.listStatus(file.getPath()); for (int i = 0; i < files.length; i++) { if (!files[i].isDir()) { numberOfBlocks += getNumberOfBlocks(files[i].getLen(), files[i].getBlockSize()); } } return numberOfBlocks; }
if (delete(entry.getPath(), true)) { retVal = true;
if (delete(entry.getPath(), true)) { retVal = true;