/** * Get the staging {@link Path} for {@link gobblin.writer.DataWriter} that has attemptId in the path. */ public static Path getWriterStagingDir(State state, int numBranches, int branchId, String attemptId) { Preconditions.checkArgument(attemptId != null && !attemptId.isEmpty(), "AttemptId cannot be null or empty: " + attemptId); return new Path(getWriterStagingDir(state, numBranches, branchId), attemptId); }
/** * Cleanup staging data of a Gobblin task. * * @param state a {@link State} instance storing task configuration properties * @param logger a {@link Logger} used for logging */ public static void cleanTaskStagingData(State state, Logger logger) throws IOException { int numBranches = state.getPropAsInt(ConfigurationKeys.FORK_BRANCHES_KEY, 1); for (int branchId = 0; branchId < numBranches; branchId++) { String writerFsUri = state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); Path stagingPath = WriterUtils.getWriterStagingDir(state, numBranches, branchId); if (fs.exists(stagingPath)) { logger.info("Cleaning up staging directory " + stagingPath.toUri().getPath()); if (!fs.delete(stagingPath, true)) { throw new IOException("Clean up staging directory " + stagingPath.toUri().getPath() + " failed"); } } Path outputPath = WriterUtils.getWriterOutputDir(state, numBranches, branchId); if (fs.exists(outputPath)) { logger.info("Cleaning up output directory " + outputPath.toUri().getPath()); if (!fs.delete(outputPath, true)) { throw new IOException("Clean up output directory " + outputPath.toUri().getPath() + " failed"); } } } }
public FileAwareInputStreamDataWriter(State state, int numBranches, int branchId, String writerAttemptId) throws IOException { super(state); if (numBranches > 1) { throw new IOException("Distcp can only operate with one branch."); } if (!(state instanceof WorkUnitState)) { throw new RuntimeException(String.format("Distcp requires a %s on construction.", WorkUnitState.class.getSimpleName())); } this.state = (WorkUnitState) state; this.taskBroker = this.state.getTaskBroker(); this.writerAttemptIdOptional = Optional.fromNullable(writerAttemptId); String uri = this.state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, numBranches, branchId), ConfigurationKeys.LOCAL_FS_URI); this.fs = FileSystem.get(URI.create(uri), WriterUtils.getFsConfiguration(state)); this.stagingDir = this.writerAttemptIdOptional.isPresent() ? WriterUtils .getWriterStagingDir(state, numBranches, branchId, this.writerAttemptIdOptional.get()) : WriterUtils.getWriterStagingDir(state, numBranches, branchId); this.outputDir = getOutputDir(state); this.copyableDatasetMetadata = CopyableDatasetMetadata.deserialize(state.getProp(CopySource.SERIALIZED_COPYABLE_DATASET)); this.recoveryHelper = new RecoveryHelper(this.fs, state); this.actualProcessedCopyableFile = Optional.absent(); this.copySpeedMeter = getMetricContext().meter(GOBBLIN_COPY_BYTES_COPIED_METER); this.bufferSize = state.getPropAsInt(CopyConfiguration.BUFFER_SIZE, StreamCopier.DEFAULT_BUFFER_SIZE); this.encryptionConfig = EncryptionConfigParser .getConfigForBranch(EncryptionConfigParser.EntityType.WRITER, this.state, numBranches, branchId); }
ParallelRunner parallelRunner = getParallelRunner(fs, closer, parallelRunnerThreads, parallelRunners); Path stagingPath = WriterUtils.getWriterStagingDir(state, numBranches, branchId); if (fs.exists(stagingPath)) { logger.info("Cleaning up staging directory " + stagingPath.toUri().getPath());