public static Configuration getFsConfiguration(State state) { return HadoopUtils.getConfFromState(state, Optional.of(WRITER_ENCRYPTED_CONFIG_PATH)); } }
public static void moveSelectFiles(String extension, String source, String destination) throws IOException { FileSystem fs = getFileSystem(); fs.mkdirs(new Path(destination)); FileStatus[] fileStatuses = fs.listStatus(new Path(source)); for (FileStatus fileStatus : fileStatuses) { Path path = fileStatus.getPath(); if (!fileStatus.isDirectory() && path.toString().toLowerCase().endsWith(extension.toLowerCase())) { HadoopUtils.deleteIfExists(fs, new Path(destination), true); HadoopUtils.copyPath(fs, path, fs, new Path(destination), getConfiguration()); } } } }
/*** * Get source {@link FileSystem} * @return Source {@link FileSystem} * @throws IOException Issue in fetching {@link FileSystem} */ private static FileSystem getSourceFs() throws IOException { return FileSystem.get(HadoopUtils.newConfiguration()); }
protected FileSystem getSourceFileSystem (State state) throws IOException { Configuration conf = HadoopUtils.getConfFromState(state); String uri = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI); return HadoopUtils.getOptionallyThrottledFileSystem(FileSystem.get(URI.create(uri), conf), state); } }
/** * Moves a src {@link Path} from a srcFs {@link FileSystem} to a dst {@link Path} on a dstFs {@link FileSystem}. If * the srcFs and the dstFs have the same scheme, and neither of them or S3 schemes, then the {@link Path} is simply * renamed. Otherwise, the data is from the src {@link Path} to the dst {@link Path}. So this method can handle copying * data between different {@link FileSystem} implementations. * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the source {@link Path} which will me moved * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to move data to * @param overwrite true if the destination should be overwritten; otherwise, false */ public static void movePath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean overwrite, Configuration conf) throws IOException { if (srcFs.getUri().getScheme().equals(dstFs.getUri().getScheme()) && !FS_SCHEMES_NON_ATOMIC.contains(srcFs.getUri().getScheme()) && !FS_SCHEMES_NON_ATOMIC.contains(dstFs.getUri().getScheme())) { renamePath(srcFs, src, dst); } else { copyPath(srcFs, src, dstFs, dst, true, overwrite, conf); } }
@Override public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException { if (!this.recordRead) { Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state); FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf); this.recordRead = true; return new FileAwareInputStream(this.file, MeteredInputStream.builder().in(fsFromFile.open(this.file.getFileStatus().getPath())).build()); } return null; }
copyFile(srcFs, src, dstFs, dst, overwriteDst, conf); } else { copyFile(srcFs, src, dstFs, tmp, overwriteDst, conf); try { boolean renamed = false; if (overwriteDst && dstFs.exists(dst)) { try { deletePath(dstFs, dst, true); } finally { renamePath(dstFs, tmp, dst); renamed = true; renamePath(dstFs, tmp, dst); deletePath(dstFs, tmp, true);
/** * Writes the <code>config</code> to {@link #storeMetadataFilePath}. Creates a backup file at * <code>storeMetadataFilePath + ".bkp"</code> to recover old metadata in case of unexpected deployment failures * * @param config to be serialized * @throws IOException if there was any problem writing the <code>config</code> to the store metadata file. */ void writeMetadata(Config config) throws IOException { Path storeMetadataFileBkpPath = new Path(this.storeMetadataFilePath.getParent(), this.storeMetadataFilePath.getName() + ".bkp"); // Delete old backup file if exists HadoopUtils.deleteIfExists(this.fs, storeMetadataFileBkpPath, true); // Move current storeMetadataFile to backup if (this.fs.exists(this.storeMetadataFilePath)) { HadoopUtils.renamePath(this.fs, this.storeMetadataFilePath, storeMetadataFileBkpPath); } // Write new storeMetadataFile try (FSDataOutputStream outputStream = FileSystem.create(this.fs, this.storeMetadataFilePath, FsDeploymentConfig.DEFAULT_STORE_PERMISSIONS);) { outputStream.write(config.root().render(ConfigRenderOptions.concise()).getBytes(Charsets.UTF_8)); } catch (Exception e) { // Restore from backup HadoopUtils.deleteIfExists(this.fs, this.storeMetadataFilePath, true); HadoopUtils.renamePath(this.fs, storeMetadataFileBkpPath, this.storeMetadataFilePath); throw new IOException( String.format("Failed to write store metadata at %s. Restored existing store metadata file from backup", this.storeMetadataFilePath), e); } }
HadoopUtils.deletePath(fs, jobStagingPath, true); HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true); HadoopUtils.deletePath(fs, jobOutputPath, true); HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true); Path jobErrPath = new Path(state.getProp(ConfigurationKeys.ROW_LEVEL_ERR_FILE)); log.info("Cleaning up err directory : " + jobErrPath); HadoopUtils.deleteIfExists(fs, jobErrPath, true);
/** * Delete the temporary {@link #schemaDir} */ public void cleanupTempSchemas() throws IOException { HadoopUtils.deleteIfExists(this.fs, this.schemaDir, true); }
/** * A wrapper around {@link FileSystem#rename(Path, Path)} which throws {@link IOException} if * {@link FileSystem#rename(Path, Path)} returns False. */ public static void renamePath(FileSystem fs, Path oldName, Path newName) throws IOException { renamePath(fs, oldName, newName, false); }
@Override public Void call() throws Exception { Lock lock = ParallelRunner.this.locks.get(src.toString()); lock.lock(); try { if (ParallelRunner.this.fs.exists(src)) { HadoopUtils.renamePath(ParallelRunner.this.fs, src, dst); if (group.isPresent()) { HadoopUtils.setGroup(ParallelRunner.this.fs, dst, group.get()); } } return null; } catch (FileAlreadyExistsException e) { LOGGER.warn(String.format("Failed to rename %s to %s: dst already exists", src, dst), e); return null; } finally { lock.unlock(); } } }), "Rename " + src + " to " + dst));
private static FileSystem getTargetFileSystem(State state) throws IOException { return HadoopUtils.getOptionallyThrottledFileSystem(WriterUtils.getWriterFS(state, 1, 0), state); }
/** * A wrapper around {@link FileSystem#delete(Path, boolean)} that only deletes a given {@link Path} if it is present * on the given {@link FileSystem}. */ public static void deleteIfExists(FileSystem fs, Path path, boolean recursive) throws IOException { if (fs.exists(path)) { deletePath(fs, path, recursive); } }
private void deleteFilesByPaths(Set<Path> paths) throws IOException { for (Path path : paths) { HadoopUtils.deletePathAndEmptyAncestors(this.fs, path, true); } }
/** * Copies data from a src {@link Path} to a dst {@link Path}. * * <p> * This method should be used in preference to * {@link FileUtil#copy(FileSystem, Path, FileSystem, Path, boolean, boolean, Configuration)}, which does not handle * clean up of incomplete files if there is an error while copying data. * </p> * * <p> * TODO this method does not handle cleaning up any local files leftover by writing to S3. * </p> * * @param srcFs the source {@link FileSystem} where the src {@link Path} exists * @param src the {@link Path} to copy from the source {@link FileSystem} * @param dstFs the destination {@link FileSystem} where the dst {@link Path} should be created * @param dst the {@link Path} to copy data to */ public static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, Configuration conf) throws IOException { copyPath(srcFs, src, dstFs, dst, false, false, conf); }
@CliObjectSupport(argumentNames = {"jobName"}) public EmbeddedGobblin(String name) { HadoopUtils.addGobblinSite(); this.specBuilder = new JobSpec.Builder(name); this.userConfigMap = Maps.newHashMap(); this.builtConfigMap = Maps.newHashMap(); this.sysConfigOverrides = Maps.newHashMap(); this.defaultSysConfig = getDefaultSysConfig(); this.distributedJars = Maps.newHashMap(); loadCoreGobblinJarsToDistributedJars(); this.distributeJarsFunction = new Runnable() { @Override public void run() { // NOOP } }; }
@Override public void createAlias(String storeName, String original, String alias) throws IOException { Path originalTablePath = new Path(new Path(this.storeRootDir, storeName), original); if (!this.fs.exists(originalTablePath)) { throw new IOException(String.format("State file %s does not exist for table %s", originalTablePath, original)); } Path aliasTablePath = new Path(new Path(this.storeRootDir, storeName), alias); Path tmpAliasTablePath = new Path(aliasTablePath.getParent(), new Path(TMP_FILE_PREFIX, aliasTablePath.getName())); // Make a copy of the original table as a work-around because // Hadoop version 1.2.1 has no support for symlink yet. HadoopUtils.copyFile(this.fs, originalTablePath, this.fs, aliasTablePath, tmpAliasTablePath, true, this.conf); }
protected FileSystem getSourceFileSystem(State state) throws IOException { Configuration conf = HadoopUtils.getConfFromState(state); String uri = state.getProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, ConfigurationKeys.LOCAL_FS_URI); return HadoopUtils.getOptionallyThrottledFileSystem(FileSystem.get(URI.create(uri), conf), state); }
private static void copyPath(FileSystem srcFs, Path src, FileSystem dstFs, Path dst, boolean deleteSource, boolean overwrite, Configuration conf) throws IOException { Preconditions.checkArgument(srcFs.exists(src), String.format("Cannot copy from %s to %s because src does not exist", src, dst)); Preconditions.checkArgument(overwrite || !dstFs.exists(dst), String.format("Cannot copy from %s to %s because dst exists", src, dst)); try { boolean isSourceFileSystemLocal = srcFs instanceof LocalFileSystem || srcFs instanceof RawLocalFileSystem; if (isSourceFileSystemLocal) { try { dstFs.copyFromLocalFile(deleteSource, overwrite, src, dst); } catch (IOException e) { throw new IOException(String.format("Failed to copy %s to %s", src, dst), e); } } else if (!FileUtil.copy(srcFs, src, dstFs, dst, deleteSource, overwrite, conf)) { throw new IOException(String.format("Failed to copy %s to %s", src, dst)); } } catch (Throwable t1) { try { deleteIfExists(dstFs, dst, true); } catch (Throwable t2) { // Do nothing } throw t1; } }