@Test public void testSerialization() throws IOException { final String WRITER_ID = "foobar123"; final PartitionIdentifier PARTITION_KEY = new PartitionIdentifier("_partitionInfo", 3); final Set<FsWriterMetrics.FileInfo> FILE_INFOS = ImmutableSet.of( new FsWriterMetrics.FileInfo("file1", 1234), new FsWriterMetrics.FileInfo("file2", 4321) ); String metricsJson = new FsWriterMetrics(WRITER_ID, PARTITION_KEY, FILE_INFOS).toJson(); FsWriterMetrics parsedMetrics = FsWriterMetrics.fromJson(metricsJson); Assert.assertEquals(parsedMetrics.writerId, WRITER_ID); Assert.assertEquals(parsedMetrics.partitionInfo, PARTITION_KEY); Assert.assertEquals(parsedMetrics.fileInfos, FILE_INFOS); } }
@Override public void update(FsWriterMetrics metrics) { long numRecords = mergedMetadata.getNumRecords(); int numFiles = mergedMetadata.getNumFiles(); for (FsWriterMetrics.FileInfo fileInfo: metrics.getFileInfos()) { numRecords += fileInfo.getNumRecords(); numFiles += 1; mergedMetadata.setFileMetadata(fileInfo.getFileName(), GlobalMetadata.NUM_RECORDS_KEY, Long.valueOf(fileInfo.getNumRecords())); } mergedMetadata.setNumRecords(numRecords); mergedMetadata.setNumOutputFiles(numFiles); }
} else if (((String) property.getKey()).startsWith(FsDataWriter.FS_WRITER_METRICS_KEY)) { try { FsWriterMetrics parsedMetrics = FsWriterMetrics.fromJson((String) property.getValue()); partitionPaths.add(parsedMetrics.getPartitionInfo().getPartitionKey()); Set<FsWriterMetrics> metricsForPartition = metricsByPartition.computeIfAbsent(parsedMetrics.getPartitionInfo(), k -> new HashSet<>()); metricsForPartition.add(parsedMetrics); } catch (IOException e) {
FsWriterMetrics metrics = new FsWriterMetrics( this.id, new PartitionIdentifier(this.partitionKey, this.branchId), ImmutableSet.of(new FsWriterMetrics.FileInfo(this.outputFile.getName(), recordsWritten())) ); this.properties.setProp(FS_WRITER_METRICS_KEY, metrics.toJson());
@Test public void testNoOutputWhenDisabledWithPartitions() throws IOException { File publishPath = Files.createTempDir(); State s = buildDefaultState(1); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_DIR); s.removeProp(ConfigurationKeys.DATA_PUBLISHER_METADATA_OUTPUT_FILE); s.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, publishPath.getAbsolutePath()); WorkUnitState wuState = new WorkUnitState(); addStateToWorkunit(s, wuState); wuState.setProp(ConfigurationKeys.WRITER_METADATA_KEY, "abcdefg"); FsWriterMetrics metrics1 = buildWriterMetrics("foo1.json", "1-2-3-4", 0, 10); FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8",10, 20); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_0", "1-2-3-4"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _0", metrics2.toJson()); wuState.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_1", "5-6-7-8"); wuState.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _1", metrics2.toJson()); BaseDataPublisher publisher = new BaseDataPublisher(s); publisher.publishMetadata(Collections.singletonList(wuState)); String[] filesInPublishDir = publishPath.list(); Assert.assertEquals(0, filesInPublishDir.length, "Expected 0 files to be output to publish path"); }
private FsWriterMetrics buildWriterMetrics(String fileName, String partitionKey, int writerId, int branchId, int numRecords) { return new FsWriterMetrics( String.format("writer%d", writerId), new PartitionIdentifier(partitionKey, branchId), ImmutableList.of(new FsWriterMetrics.FileInfo(fileName, numRecords)) ); }
FsWriterMetrics metrics = FsWriterMetrics.fromJson(properties.getProp(FsDataWriter.FS_WRITER_METRICS_KEY)); Assert.assertEquals(metrics.fileInfos.size(),1); FsWriterMetrics.FileInfo fileInfo = metrics.fileInfos.iterator().next();
@Override public void close() throws IOException { this.closer.close(); if (this.shouldIncludeRecordCountInFileName) { String filePathWithRecordCount = addRecordCountToFileName(); this.properties.appendToSetProp(this.allOutputFilesPropName, filePathWithRecordCount); } else { this.properties.appendToSetProp(this.allOutputFilesPropName, getOutputFilePath()); } FsWriterMetrics metrics = new FsWriterMetrics( this.id, new PartitionIdentifier(this.partitionKey, this.branchId), ImmutableSet.of(new FsWriterMetrics.FileInfo(this.outputFile.getName(), recordsWritten())) ); this.properties.setProp(FS_WRITER_METRICS_KEY, metrics.toJson()); }
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1);
FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8",10, 20); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0", "1-2-3-4"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0", metrics1.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0_0", "1-2-3-4"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0_0", metrics2.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0" + "_1", "5-6-7-8"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0_1", metrics2.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".0", md); addStateToWorkunit(s, wuState1); wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".1", "1-2-3-4"); wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".1", md); wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".1", metrics3.toJson()); addStateToWorkunit(s, wuState2); wuState3.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + ".0", "5-6-7-8"); wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY + ".0", md); wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + ".0", metrics4.toJson()); addStateToWorkunit(s, wuState3);
} else if (((String) property.getKey()).startsWith(FsDataWriter.FS_WRITER_METRICS_KEY)) { try { FsWriterMetrics parsedMetrics = FsWriterMetrics.fromJson((String) property.getValue()); partitionPaths.add(parsedMetrics.getPartitionInfo().getPartitionKey()); Set<FsWriterMetrics> metricsForPartition = metricsByPartition.computeIfAbsent(parsedMetrics.getPartitionInfo(), k -> new HashSet<>()); metricsForPartition.add(parsedMetrics); } catch (IOException e) {
@Override public void update(FsWriterMetrics metrics) { long numRecords = mergedMetadata.getNumRecords(); int numFiles = mergedMetadata.getNumFiles(); for (FsWriterMetrics.FileInfo fileInfo: metrics.getFileInfos()) { numRecords += fileInfo.getNumRecords(); numFiles += 1; mergedMetadata.setFileMetadata(fileInfo.getFileName(), GlobalMetadata.NUM_RECORDS_KEY, Long.valueOf(fileInfo.getNumRecords())); } mergedMetadata.setNumRecords(numRecords); mergedMetadata.setNumOutputFiles(numFiles); }
FsWriterMetrics metrics2 = buildWriterMetrics("foo1.json", "5-6-7-8",10, 20); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_0", "1-2-3-4"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _0", metrics2.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY + "_1", "5-6-7-8"); wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY + " _1", metrics2.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1); wuState2.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "1-2-3-4"); wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson()); addStateToWorkunit(s, wuState2); wuState3.setProp(ConfigurationKeys.WRITER_PARTITION_PATH_KEY, "5-6-7-8"); wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson()); addStateToWorkunit(s, wuState3);
wuState1.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics1.toJson()); wuState1.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); addStateToWorkunit(s, wuState1); FsWriterMetrics metrics3 = buildWriterMetrics("foo3.json", null, 1, 30); wuState2.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState2.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics3.toJson()); addStateToWorkunit(s, wuState2); FsWriterMetrics metrics4 = buildWriterMetrics("foo4.json", null, 2, 55); wuState3.setProp(ConfigurationKeys.WRITER_METADATA_KEY, md); wuState3.setProp(FsDataWriter.FS_WRITER_METRICS_KEY, metrics4.toJson()); addStateToWorkunit(s, wuState3);