/** * Get the {@link Path} corresponding the to the directory a given {@link org.apache.gobblin.writer.DataWriter} should be writing * its output data. The output data directory is determined by combining the * {@link ConfigurationKeys#WRITER_OUTPUT_DIR} and the {@link ConfigurationKeys#WRITER_FILE_PATH}. * @param state is the {@link State} corresponding to a specific {@link org.apache.gobblin.writer.DataWriter}. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.writer.DataWriter} will write to. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.writer.DataWriter} will write to. */ public static Path getWriterOutputDir(State state, int numBranches, int branchId) { String writerOutputDirKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, numBranches, branchId); Preconditions.checkArgument(state.contains(writerOutputDirKey), "Missing required property " + writerOutputDirKey); return new Path(state.getProp(writerOutputDirKey), WriterUtils.getWriterFilePath(state, numBranches, branchId)); }
/** * Get the {@link Path} corresponding the to the directory a given {@link org.apache.gobblin.writer.DataWriter} should be writing * its staging data. The staging data directory is determined by combining the * {@link ConfigurationKeys#WRITER_STAGING_DIR} and the {@link ConfigurationKeys#WRITER_FILE_PATH}. * @param state is the {@link State} corresponding to a specific {@link org.apache.gobblin.writer.DataWriter}. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.writer.DataWriter} will write to. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.writer.DataWriter} will write to. */ public static Path getWriterStagingDir(State state, int numBranches, int branchId) { String writerStagingDirKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, numBranches, branchId); Preconditions.checkArgument(state.contains(writerStagingDirKey), "Missing required property " + writerStagingDirKey); return new Path( state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, numBranches, branchId)), WriterUtils.getWriterFilePath(state, numBranches, branchId)); }
@Test public void testGetWriterFilePath() { Extract extract = new Extract(TableType.SNAPSHOT_ONLY, "org.apache.gobblin.dbNamespace", "tableName"); WorkUnit state = WorkUnit.create(extract); state.setProp(ConfigurationKeys.WRITER_FILE_PATH, TEST_WRITER_FILE_PATH); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), TEST_WRITER_FILE_PATH); state.setProp(ConfigurationKeys.WRITER_FILE_PATH + ".0", TEST_WRITER_FILE_PATH); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 1, 1), TEST_WRITER_FILE_PATH); state.removeProp(ConfigurationKeys.WRITER_FILE_PATH); state.setProp(ConfigurationKeys.WRITER_FILE_PATH_TYPE, "tablename"); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path("tableName")); state.setProp(ConfigurationKeys.WRITER_FILE_PATH_TYPE, "namespace_table"); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path("org/apache/gobblin/dbNamespace/tableName")); }
@Test public void testGetDefaultWriterFilePath() { String namespace = "gobblin.test"; String tableName = "test-table"; SourceState sourceState = new SourceState(); WorkUnit state = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 0, 0), new Path(state.getExtract().getOutputFilePath())); Assert.assertEquals(WriterUtils.getWriterFilePath(state, 2, 0), new Path(state.getExtract().getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0")); }
/** * Get the {@link Path} corresponding the to the directory a given {@link org.apache.gobblin.publisher.BaseDataPublisher} should * commits its output data. The final output data directory is determined by combining the * {@link ConfigurationKeys#DATA_PUBLISHER_FINAL_DIR} and the {@link ConfigurationKeys#WRITER_FILE_PATH}. * @param state is the {@link State} corresponding to a specific {@link org.apache.gobblin.writer.DataWriter}. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.publisher.BaseDataPublisher} will publish. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.publisher.BaseDataPublisher} will publish. */ public static Path getDataPublisherFinalDir(State state, int numBranches, int branchId) { String dataPublisherFinalDirKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, numBranches, branchId); Preconditions.checkArgument(state.contains(dataPublisherFinalDirKey), "Missing required property " + dataPublisherFinalDirKey); if (state.getPropAsBoolean(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR)) { return new Path(state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, numBranches, branchId)), WriterUtils.getWriterFilePath(state, numBranches, branchId)); } else { return new Path(state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, numBranches, branchId))); } }
@Test public void testGetDefaultWriterFilePathWithWorkUnitState() { String namespace = "gobblin.test"; String tableName = "test-table"; SourceState sourceState = new SourceState(); WorkUnit workUnit = WorkUnit.create(new Extract(sourceState, TableType.APPEND_ONLY, namespace, tableName)); WorkUnitState workUnitState = new WorkUnitState(workUnit); Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 0, 0), new Path(workUnitState.getExtract() .getOutputFilePath())); Assert.assertEquals(WriterUtils.getWriterFilePath(workUnitState, 2, 0), new Path(workUnitState.getExtract() .getOutputFilePath(), ConfigurationKeys.DEFAULT_FORK_BRANCH_NAME + "0")); }
/** * Get the {@link Path} corresponding the to the directory a given {@link org.apache.gobblin.writer.DataWriter} should be writing * its output data. The output data directory is determined by combining the * {@link ConfigurationKeys#WRITER_OUTPUT_DIR} and the {@link ConfigurationKeys#WRITER_FILE_PATH}. * @param state is the {@link State} corresponding to a specific {@link org.apache.gobblin.writer.DataWriter}. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.writer.DataWriter} will write to. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.writer.DataWriter} will write to. */ public static Path getWriterOutputDir(State state, int numBranches, int branchId) { String writerOutputDirKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, numBranches, branchId); Preconditions.checkArgument(state.contains(writerOutputDirKey), "Missing required property " + writerOutputDirKey); return new Path(state.getProp(writerOutputDirKey), WriterUtils.getWriterFilePath(state, numBranches, branchId)); }
/** * Get the {@link Path} corresponding the to the directory a given {@link org.apache.gobblin.writer.DataWriter} should be writing * its staging data. The staging data directory is determined by combining the * {@link ConfigurationKeys#WRITER_STAGING_DIR} and the {@link ConfigurationKeys#WRITER_FILE_PATH}. * @param state is the {@link State} corresponding to a specific {@link org.apache.gobblin.writer.DataWriter}. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.writer.DataWriter} will write to. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.writer.DataWriter} will write to. */ public static Path getWriterStagingDir(State state, int numBranches, int branchId) { String writerStagingDirKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, numBranches, branchId); Preconditions.checkArgument(state.contains(writerStagingDirKey), "Missing required property " + writerStagingDirKey); return new Path( state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, numBranches, branchId)), WriterUtils.getWriterFilePath(state, numBranches, branchId)); }
/** * Get the {@link Path} corresponding the to the directory a given {@link org.apache.gobblin.publisher.BaseDataPublisher} should * commits its output data. The final output data directory is determined by combining the * {@link ConfigurationKeys#DATA_PUBLISHER_FINAL_DIR} and the {@link ConfigurationKeys#WRITER_FILE_PATH}. * @param state is the {@link State} corresponding to a specific {@link org.apache.gobblin.writer.DataWriter}. * @param numBranches is the total number of branches for the given {@link State}. * @param branchId is the id for the specific branch that the {@link org.apache.gobblin.publisher.BaseDataPublisher} will publish. * @return a {@link Path} specifying the directory where the {@link org.apache.gobblin.publisher.BaseDataPublisher} will publish. */ public static Path getDataPublisherFinalDir(State state, int numBranches, int branchId) { String dataPublisherFinalDirKey = ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, numBranches, branchId); Preconditions.checkArgument(state.contains(dataPublisherFinalDirKey), "Missing required property " + dataPublisherFinalDirKey); if (state.getPropAsBoolean(ConfigurationKeys.DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_APPEND_EXTRACT_TO_FINAL_DIR)) { return new Path(state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, numBranches, branchId)), WriterUtils.getWriterFilePath(state, numBranches, branchId)); } else { return new Path(state.getProp( ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, numBranches, branchId))); } }