@Test public void testNoOutputWhenDisabled() throws IOException { State s = buildDefaultState(1); WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); wuState.setProp(ConfigurationKeys.WRITER_METADATA_KEY, "abcdefg"); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(Collections.singletonList(wuState)); File mdFile = openMetadataFile(s, 1, 0); Assert.assertFalse(mdFile.exists(), "Internal metadata from writer should not be written out if no merger is set in config"); }
private FsWriterMetrics buildWriterMetrics(String fileName, String partitionKey, int writerId, int numRecords) { return buildWriterMetrics(fileName, partitionKey, writerId, 0, numRecords); }
State s = buildDefaultState(1); String md = new GlobalMetadata().toJson(); FsWriterMetrics metrics1 = buildWriterMetrics("newfile.json", null, 0, 90); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1); checkMetadata(new File(publishPath.getAbsolutePath(), "metadata.json"), 4,185, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10),
@Test public void testNoOutputWhenDisabledWithPartitions() throws IOException { File publishPath = Files.createTempDir(); State s = buildDefaultState(1); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE); s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath()); WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); wuState.setProp(ConfigurationKeys.WRITER_METADATA_KEY, "abcdefg"); FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", "1-2-3-4", 0, 10); FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8",10, 20); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_0", "1-2-3-4"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _0", metrics2.toJson()); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_1", "5-6-7-8"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _1", metrics2.toJson()); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(Collections.singletonList(wuState)); String[] filesInPublishDir = publishPath.list(); Assert.assertEquals(0, filesInPublishDir.length, "Expected 0 files to be output to publish path"); }
/** * Test that an exception is properly thrown if we configure a merger that doesn't actually implement * MetadataMerger */ @Test(expectedExceptions = IllegalArgumentException.class) public void testBogusMetadataMerger() throws IOException { State s = buildDefaultState(1); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_KEY, "true"); s.setProp(ConfigurationKeys.DATA_PUBLISH_WRITER_METADATA_MERGER_NAME_KEY, "java.lang.String"); s.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar"); WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(Collections.singletonList(wuState)); }
int numPartitionsPerBranch = 2; WorkUnitState state = buildTaskState(numBranches); LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); LineageEventBuilder event = find(events, "partition" + i + j); Assert.assertTrue(null != event); Assert.assertEquals(event.getSource(), source);
/** * Test lineage info is set on publishing multiple tasks */ @Test public void testPublishMultiTasks() throws IOException { WorkUnitState state1 = buildTaskState(2); WorkUnitState state2 = buildTaskState(2); LineageInfo lineageInfo = LineageInfo.getLineageInfo(state1.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state1); lineageInfo.setSource(source, state2); BaseDataPublisher publisher = new BaseDataPublisher(state1); publisher.publishData(ImmutableList.of(state1, state2)); Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.1.destination")); Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.1.destination")); }
File publishPath = Files.createTempDir(); try { State s = buildDefaultState(1); String md = new GlobalMetadata().toJson(); FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", null, 0, 10); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1); FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", null, 1, 30); wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson()); addStateToWorkunit(s, wuState2); FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", null, 2, 55); wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson()); addStateToWorkunit(s, wuState3); checkMetadata(new File(publishPath.getAbsolutePath(), "metadata.json"), 3, 95, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10),
part2.mkdir(); State s = buildDefaultState(1); String md = new GlobalMetadata().toJson(); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1); addStateToWorkunit(s, wuState2);
/** * Test lineage info is set on publishing single task */ @Test public void testPublishSingleTask() throws IOException { WorkUnitState state = buildTaskState(1); LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state); BaseDataPublisher publisher = new BaseDataPublisher(state); publisher.publishData(state); Assert.assertTrue(state.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertFalse(state.contains("gobblin.event.lineage.branch.1.destination")); }
State s = buildDefaultState(2); String md = new GlobalMetadata().toJson(); FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", "1-2-3-4", 0, 10); FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8",10, 20); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0", "1-2-3-4"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0", metrics1.toJson()); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0_1", metrics2.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".0", md); addStateToWorkunit(s, wuState1); FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", "1-2-3-4", 1, 1, 30); wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".1", "1-2-3-4"); wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".1", md); wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".1", metrics3.toJson()); addStateToWorkunit(s, wuState2); FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", "5-6-7-8", 2, 55); wuState3.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0", "5-6-7-8"); wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".0", md); wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0", metrics4.toJson()); addStateToWorkunit(s, wuState3); checkMetadata(new File(branchPaths.get(0)[0], "metadata.json.0"), 1, 10, new FsWriterMetrics.FileInfo("foo1.json", 10)); checkMetadata(new File(branchPaths.get(0)[1], "metadata.json.0"), 2, 75, new FsWriterMetrics.FileInfo("foo1.json", 20), new FsWriterMetrics.FileInfo("foo4.json", 55));
/** * Test DATA_PUBLISHER_METADATA_STR: a user should be able to put an arbitrary metadata string in job configuration * and have that written out. */ @Test public void testMetadataStrOneBranch() throws IOException { State s = buildDefaultState(1); WorkUnitState wuState = new WorkUnitState(); wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar"); addStateToWorkunit(s, wuState); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(wuState); try (InputStream mdStream = new FileInputStream(openMetadataFile(s, 1, 0))) { String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8); Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string"); } }
State s = buildDefaultState(1); String md = new GlobalMetadata().toJson(); FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", "1-2-3-4", 0, 10); FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8",10, 20); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _1", metrics2.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1); FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", "1-2-3-4", 1, 30); wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson()); addStateToWorkunit(s, wuState2); FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", "5-6-7-8", 2, 55); wuState3.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8"); wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson()); addStateToWorkunit(s, wuState3); checkMetadata(new File(part1, "metadata.json"), 2, 40, new FsWriterMetrics.FileInfo("foo3.json", 30), new FsWriterMetrics.FileInfo("foo1.json", 10)); checkMetadata(new File(part2, "metadata.json"), 2, 75, new FsWriterMetrics.FileInfo("foo1.json", 20),
/** * Test that DATA_PUBLISHER_METADATA_STR functionality works across multiple branches. */ @Test public void testMetadataStrMultipleWorkUnitsAndBranches() throws IOException { final int numBranches = 3; State s = buildDefaultState(numBranches); List<WorkUnitState> workUnits = new ArrayList<>(); for (int i = 0; i < numBranches; i++) { WorkUnitState wuState = new WorkUnitState(); wuState.setProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_STR, "foobar"); addStateToWorkunit(s, wuState); workUnits.add(wuState); } BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(workUnits); for (int branch = 0; branch < numBranches; branch++) { try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) { String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8); Assert.assertEquals(mdBytes, "foobar", "Expected to read back metadata from string"); } } }
final int numWorkUnits = 10; State s = buildDefaultState(numBranches); for (int workUnitId = 0; workUnitId < numWorkUnits; workUnitId++) { WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); try (InputStream mdStream = new FileInputStream(openMetadataFile(s, numBranches, branch))) { String mdBytes = IOUtils.toString(mdStream, StandardCharsets.UTF_8); Assert.assertEquals(mdBytes, String.valueOf(expectedSum), "Expected to read back correctly merged metadata from string");