public static void cleanUpOldJobData(State state, Logger logger, boolean stagingDirProvided, boolean outputDirProvided) throws IOException { Set<Path> jobPaths = new HashSet<>(); String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = FileSystem.get(URI.create(writerFsUri), WriterUtils.getFsConfiguration(state)); Path jobPath; if (stagingDirProvided) { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR)).getParent(); } else { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR)).getParent().getParent(); } jobPaths.add(jobPath); if (outputDirProvided) { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)).getParent(); } else { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)).getParent().getParent(); } jobPaths.add(jobPath); for (Path jobPathToDelete : jobPaths) { logger.info("Cleaning up old job directory " + jobPathToDelete); HadoopUtils.deletePath(fs, jobPathToDelete, true); } } /**
/** * Cleanup staging data of a Gobblin task. * * @param state a {@link State} instance storing task configuration properties * @param logger a {@link Logger} used for logging */ public static void cleanTaskStagingData(State state, Logger logger) throws IOException { int numBranches = state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1); for (int branchId = 0; branchId < numBranches; branchId++) { String writerFsUri = state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); Path stagingPath = WriterUtils.getWriterStagingDir(state, numBranches, branchId); if (fs.exists(stagingPath)) { logger.info("Cleaning up staging directory " + stagingPath.toUri().getPath()); if (!fs.delete(stagingPath, true)) { throw new IOException("Clean up staging directory " + stagingPath.toUri().getPath() + " failed"); } } Path outputPath = WriterUtils.getWriterOutputDir(state, numBranches, branchId); if (fs.exists(outputPath)) { logger.info("Cleaning up output directory " + outputPath.toUri().getPath()); if (!fs.delete(outputPath, true)) { throw new IOException("Clean up output directory " + outputPath.toUri().getPath() + " failed"); } } } }
public static FileSystem getWriterFS(State state, int numBranches, int branchId) throws IOException { URI uri = URI.create(state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI)); Configuration hadoopConf = getFsConfiguration(state); if (state.getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { // Initialize file system for a proxy user. String authMethod = state.getProp(ConfigurationKeys.FS_PROXY_AUTH_METHOD, ConfigurationKeys.DEFAULT_FS_PROXY_AUTH_METHOD); if (authMethod.equalsIgnoreCase(ConfigurationKeys.TOKEN_AUTH)) { return getWriterFsUsingToken(state, uri); } else if (authMethod.equalsIgnoreCase(ConfigurationKeys.KERBEROS_AUTH)) { return getWriterFsUsingKeytab(state, uri); } } // Initialize file system as the current user. return FileSystem.get(uri, hadoopConf); }
FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state));
ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); ParallelRunner parallelRunner = getParallelRunner(fs, closer, parallelRunnerThreads, parallelRunners);
ConfigurationKeys.LOCAL_FS_URI); Configuration conf = WriterUtils.getFsConfiguration(state); URI uri = URI.create(uriStr); this.fs = FileSystem.get(uri, conf);
/** * Build a new {@link CopyDataPublisher} from {@link State}. The constructor expects the following to be set in the * {@link State}, * <ul> * <li>{@link ConfigurationKeys#WRITER_OUTPUT_DIR} * <li>{@link ConfigurationKeys#WRITER_FILE_SYSTEM_URI} * </ul> * */ public CopyDataPublisher(State state) throws IOException { super(state); // Extract LineageInfo from state if (state instanceof SourceState) { lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent(); } String uri = this.state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); this.fs = FileSystem.get(URI.create(uri), WriterUtils.getFsConfiguration(state)); FileAwareInputStreamDataWriterBuilder.setJobSpecificOutputPaths(state); this.writerOutputDir = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)); MetricContext metricContext = Instrumented.getMetricContext(state, CopyDataPublisher.class, GobblinMetrics.getCustomTagsFromState(state)); this.eventSubmitter = new EventSubmitter.Builder(metricContext, "org.apache.gobblin.copy.CopyDataPublisher").build(); this.recoveryHelper = new RecoveryHelper(this.fs, state); this.recoveryHelper.purgeOldPersistedFile(); }
public static void cleanUpOldJobData(State state, Logger logger, boolean stagingDirProvided, boolean outputDirProvided) throws IOException { Set<Path> jobPaths = new HashSet<>(); String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = FileSystem.get(URI.create(writerFsUri), WriterUtils.getFsConfiguration(state)); Path jobPath; if (stagingDirProvided) { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR)).getParent(); } else { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR)).getParent().getParent(); } jobPaths.add(jobPath); if (outputDirProvided) { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)).getParent(); } else { jobPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)).getParent().getParent(); } jobPaths.add(jobPath); for (Path jobPathToDelete : jobPaths) { logger.info("Cleaning up old job directory " + jobPathToDelete); HadoopUtils.deletePath(fs, jobPathToDelete, true); } } /**
/** * Cleanup staging data of a Gobblin task. * * @param state a {@link State} instance storing task configuration properties * @param logger a {@link Logger} used for logging */ public static void cleanTaskStagingData(State state, Logger logger) throws IOException { int numBranches = state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1); for (int branchId = 0; branchId < numBranches; branchId++) { String writerFsUri = state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); Path stagingPath = WriterUtils.getWriterStagingDir(state, numBranches, branchId); if (fs.exists(stagingPath)) { logger.info("Cleaning up staging directory " + stagingPath.toUri().getPath()); if (!fs.delete(stagingPath, true)) { throw new IOException("Clean up staging directory " + stagingPath.toUri().getPath() + " failed"); } } Path outputPath = WriterUtils.getWriterOutputDir(state, numBranches, branchId); if (fs.exists(outputPath)) { logger.info("Cleaning up output directory " + outputPath.toUri().getPath()); if (!fs.delete(outputPath, true)) { throw new IOException("Clean up output directory " + outputPath.toUri().getPath() + " failed"); } } } }
public static FileSystem getWriterFS(State state, int numBranches, int branchId) throws IOException { URI uri = URI.create(state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI)); Configuration hadoopConf = getFsConfiguration(state); if (state.getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { // Initialize file system for a proxy user. String authMethod = state.getProp(ConfigurationKeys.FS_PROXY_AUTH_METHOD, ConfigurationKeys.DEFAULT_FS_PROXY_AUTH_METHOD); if (authMethod.equalsIgnoreCase(ConfigurationKeys.TOKEN_AUTH)) { return getWriterFsUsingToken(state, uri); } else if (authMethod.equalsIgnoreCase(ConfigurationKeys.KERBEROS_AUTH)) { return getWriterFsUsingKeytab(state, uri); } } // Initialize file system as the current user. return FileSystem.get(uri, hadoopConf); }
FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state));
ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); ParallelRunner parallelRunner = getParallelRunner(fs, closer, parallelRunnerThreads, parallelRunners);
ConfigurationKeys.LOCAL_FS_URI); Configuration conf = WriterUtils.getFsConfiguration(state); URI uri = URI.create(uriStr); this.fs = FileSystem.get(uri, conf);
/** * Build a new {@link CopyDataPublisher} from {@link State}. The constructor expects the following to be set in the * {@link State}, * <ul> * <li>{@link ConfigurationKeys#WRITER_OUTPUT_DIR} * <li>{@link ConfigurationKeys#WRITER_FILE_SYSTEM_URI} * </ul> * */ public CopyDataPublisher(State state) throws IOException { super(state); // Extract LineageInfo from state if (state instanceof SourceState) { lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent(); } String uri = this.state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); this.fs = FileSystem.get(URI.create(uri), WriterUtils.getFsConfiguration(state)); FileAwareInputStreamDataWriterBuilder.setJobSpecificOutputPaths(state); this.writerOutputDir = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)); MetricContext metricContext = Instrumented.getMetricContext(state, CopyDataPublisher.class, GobblinMetrics.getCustomTagsFromState(state)); this.eventSubmitter = new EventSubmitter.Builder(metricContext, "org.apache.gobblin.copy.CopyDataPublisher").build(); this.recoveryHelper = new RecoveryHelper(this.fs, state); this.recoveryHelper.purgeOldPersistedFile(); }