/** * Copies data from a src {@link Path} to a dst {@link Path}. * * <p> * This method should be used in preference to * {@link FileUtil#copy(FileSystem, Path, FileSystem, Path, boolean, boolean, Configuration)}, which does not handle * clean up of incomplete files if there is an error while copying data. * </p> * * <p> * TODO this method does not handle cleaning up any local files leftover by writing to S3. * </p> * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the {@link Path} to copy from the source {@link FileSystem} * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to copy data to */ public static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, Configuration conf) throws IOException { copyPath(srcFs, src, dstFs, dst, false, false, conf); }
/** * Copies data from a src {@link Path} to a dst {@link Path}. * * <p> * This method should be used in preference to * {@link FileUtil#copy(FileSystem, Path, FileSystem, Path, boolean, boolean, Configuration)}, which does not handle * clean up of incomplete files if there is an error while copying data. * </p> * * <p> * TODO this method does not handle cleaning up any local files leftover by writing to S3. * </p> * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the {@link Path} to copy from the source {@link FileSystem} * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to copy data to * @param overwrite true if the destination should be overwritten; otherwise, false */ public static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite, Configuration conf) throws IOException { copyPath(srcFs, src, dstFs, dst, false, overwrite, conf); }
public static void moveSelectFiles(String extension, String source, String destination) throws IOException { FileSystem fs = getFileSystem(); fs.mkdirs(new Path(destination)); FileStatus[] fileStatuses = fs.listStatus(new Path(source)); for (FileStatus fileStatus : fileStatuses) { Path path = fileStatus.getPath(); if (!fileStatus.isDirectory() && path.toString().toLowerCase().endsWith(extension.toLowerCase())) { HadoopUtils.deleteIfExists(fs, new Path(destination), true); HadoopUtils.copyPath(fs, path, fs, new Path(destination), getConfiguration()); } } } }
/** * Moves a src {@link Path} from a srcFs {@link FileSystem} to a dst {@link Path} on a dstFs {@link FileSystem}. If * the srcFs and the dstFs have the same scheme, and neither of them or S3 schemes, then the {@link Path} is simply * renamed. Otherwise, the data is from the src {@link Path} to the dst {@link Path}. So this method can handle copying * data between different {@link FileSystem} implementations. * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the source {@link Path} which will me moved * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to move data to * @param overwrite true if the destination should be overwritten; otherwise, false */ public static void movePath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite, Configuration conf) throws IOException { if (srcFs.getUri().getScheme().equals(dstFs.getUri().getScheme()) && !FS_SCHEMES_NON_ATOMIC.contains(srcFs.getUri().getScheme()) && !FS_SCHEMES_NON_ATOMIC.contains(dstFs.getUri().getScheme())) { renamePath(srcFs, src, dst); } else { copyPath(srcFs, src, dstFs, dst, true, overwrite, conf); } }
@Override public Void call() throws Exception { Path convertedFilePath = MRCompactorJobRunner.this.outputRecordCountProvider.convertPath( LateFileRecordCountProvider.restoreFilePath(filePath), MRCompactorJobRunner.this.outputExtension, MRCompactorJobRunner.this.inputRecordCountProvider); String targetFileName = convertedFilePath.getName(); Path outPath = MRCompactorJobRunner.this.lateOutputRecordCountProvider.constructLateFilePath(targetFileName, MRCompactorJobRunner.this.fs, outputDirectory); HadoopUtils.copyPath (MRCompactorJobRunner.this.fs, filePath, MRCompactorJobRunner.this.fs, outPath, true, MRCompactorJobRunner.this.fs.getConf()); LOG.debug(String.format("Copied %s to %s.", filePath, outPath)); return null; } });
/** * Copies data from a src {@link Path} to a dst {@link Path}. * * <p> * This method should be used in preference to * {@link FileUtil#copy(FileSystem, Path, FileSystem, Path, boolean, boolean, Configuration)}, which does not handle * clean up of incomplete files if there is an error while copying data. * </p> * * <p> * TODO this method does not handle cleaning up any local files leftover by writing to S3. * </p> * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the {@link Path} to copy from the source {@link FileSystem} * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to copy data to */ public static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, Configuration conf) throws IOException { copyPath(srcFs, src, dstFs, dst, false, false, conf); }
/** * Copies data from a src {@link Path} to a dst {@link Path}. * * <p> * This method should be used in preference to * {@link FileUtil#copy(FileSystem, Path, FileSystem, Path, boolean, boolean, Configuration)}, which does not handle * clean up of incomplete files if there is an error while copying data. * </p> * * <p> * TODO this method does not handle cleaning up any local files leftover by writing to S3. * </p> * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the {@link Path} to copy from the source {@link FileSystem} * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to copy data to * @param overwrite true if the destination should be overwritten; otherwise, false */ public static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite, Configuration conf) throws IOException { copyPath(srcFs, src, dstFs, dst, false, overwrite, conf); }
public static void moveSelectFiles(String extension, String source, String destination) throws IOException { FileSystem fs = getFileSystem(); fs.mkdirs(new Path(destination)); FileStatus[] fileStatuses = fs.listStatus(new Path(source)); for (FileStatus fileStatus : fileStatuses) { Path path = fileStatus.getPath(); if (!fileStatus.isDirectory() && path.toString().toLowerCase().endsWith(extension.toLowerCase())) { HadoopUtils.deleteIfExists(fs, new Path(destination), true); HadoopUtils.copyPath(fs, path, fs, new Path(destination), getConfiguration()); } } } }
/** * Moves a src {@link Path} from a srcFs {@link FileSystem} to a dst {@link Path} on a dstFs {@link FileSystem}. If * the srcFs and the dstFs have the same scheme, and neither of them or S3 schemes, then the {@link Path} is simply * renamed. Otherwise, the data is from the src {@link Path} to the dst {@link Path}. So this method can handle copying * data between different {@link FileSystem} implementations. * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the source {@link Path} which will me moved * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to move data to * @param overwrite true if the destination should be overwritten; otherwise, false */ public static void movePath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite, Configuration conf) throws IOException { if (srcFs.getUri().getScheme().equals(dstFs.getUri().getScheme()) && !FS_SCHEMES_NON_ATOMIC.contains(srcFs.getUri().getScheme()) && !FS_SCHEMES_NON_ATOMIC.contains(dstFs.getUri().getScheme())) { renamePath(srcFs, src, dst); } else { copyPath(srcFs, src, dstFs, dst, true, overwrite, conf); } }
@Override public Void call() throws Exception { Path convertedFilePath = MRCompactorJobRunner.this.outputRecordCountProvider.convertPath( LateFileRecordCountProvider.restoreFilePath(filePath), MRCompactorJobRunner.this.outputExtension, MRCompactorJobRunner.this.inputRecordCountProvider); String targetFileName = convertedFilePath.getName(); Path outPath = MRCompactorJobRunner.this.lateOutputRecordCountProvider.constructLateFilePath(targetFileName, MRCompactorJobRunner.this.fs, outputDirectory); HadoopUtils.copyPath (MRCompactorJobRunner.this.fs, filePath, MRCompactorJobRunner.this.fs, outPath, true, MRCompactorJobRunner.this.fs.getConf()); LOG.debug(String.format("Copied %s to %s.", filePath, outPath)); return null; } });