@Override public void initPreviousRunState(@NonNull final IMetadataManager<StringValue> metadataManager) { try { final Optional<StringValue> latestCheckpoint = metadataManager.get(MetadataConstants.CHECKPOINT_KEY); log.info("Get latest change point: {}", latestCheckpoint); this.nextPartition = this.partitionManager.getNextPartition(latestCheckpoint); } catch (final IOException e) { throw new JobRuntimeException("Unable to get the next partition. Error message: " + this.nextPartition, e); } }
/*** * Returns the metadata for the given DAG * @param key * @return * @throws IOException */ public Optional<Map<String, String>> get(@NotEmpty final String key) throws IOException { final Optional<StringValue> metadataValues = this.metadataManager.get(key); if (metadataValues.isPresent()) { return Optional.of(mapper.readValue(metadataValues.get().getValue(), typeRef)); } return Optional.absent(); }
/*** * Checks if metadata for a given DAG already exists * @param key * @return */ public boolean contains(@NotEmpty final String key) { return this.metadataManager.get(key).isPresent() ? true : false; }
@Test public void testGetNextPartitionCheckpointIsLargerThanPartition() throws InterruptedException, IOException { final Path partition2Path = new Path(RAW_DATA_PATH, PARTITION2); this.fileSystem.mkdirs(new Path(partition2Path, FILE1)); final StringValue val1 = new StringValue(PARTITION2); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); metadataManager.saveChanges(); final Path partition1Path = new Path(RAW_DATA_PATH, PARTITION1); this.fileSystem.mkdirs(new Path(partition1Path, FILE1)); // Checkpoint value is greater than the partitions in the data folder so nothing new to process Assert.assertFalse(pm.getNextPartition(getLatestCheckpoint(metadataManager)).isPresent()); }
@Test public void testHDFSOverwriteCheckpointValue() throws IOException, InterruptedException { final StringValue val1 = new StringValue("testVal"); this.metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); final StringValue val2 = new StringValue("testVal2"); this.metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val2); final Optional<StringValue> readValue = this.metadataManager.get(MetadataConstants.CHECKPOINT_KEY); Assert.assertTrue(readValue.isPresent()); Assert.assertTrue(readValue.get().getValue().equals("testVal2")); this.metadataManager.saveChanges(); final Optional<FileStatus> fs = this.metadataManager.getLatestMetadataFile(); Assert.assertTrue(fs.isPresent()); final Map<String, StringValue> loadedMap = this.metadataManager.loadMetadata(fs.get().getPath()); validateDeserializedMapEqualsInMemoryMap(loadedMap); }
@Test public void testDeletionIsPropagated() throws Exception { final StringValue val1 = new StringValue("testVal"); this.metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); this.metadataManager.saveChanges(); Optional<FileStatus> fs = this.metadataManager.getLatestMetadataFile(); Assert.assertTrue(fs.isPresent()); Map<String, StringValue> loadedMap = this.metadataManager.loadMetadata(fs.get().getPath()); validateDeserializedMapEqualsInMemoryMap(loadedMap); // reload the configuration setupTest(); Assert.assertTrue(this.metadataManager.get(MetadataConstants.CHECKPOINT_KEY).isPresent()); this.metadataManager.remove(MetadataConstants.CHECKPOINT_KEY); Assert.assertFalse(this.metadataManager.get(MetadataConstants.CHECKPOINT_KEY).isPresent()); this.metadataManager.saveChanges(); fs = this.metadataManager.getLatestMetadataFile(); Assert.assertFalse(this.metadataManager.get(MetadataConstants.CHECKPOINT_KEY).isPresent()); loadedMap = this.metadataManager.loadMetadata(fs.get().getPath()); validateDeserializedMapEqualsInMemoryMap(loadedMap); }
@Test public void testGetExistingPartitionsOnlyFilesExist() throws IOException { final Path partition0File = new Path(RAW_DATA_PATH, PARTITION0); this.fileSystem.create(partition0File); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); Assert.assertEquals(1, pm.getExistingPartitions().size()); Assert.assertEquals(RAW_DATA_PATH, pm.getNextPartition(getLatestCheckpoint(metadataManager)).get()); }
private List<String> listPartitionsAfterCheckpoint(final Optional<StringValue> checkpoint) throws IOException { final List<String> partitions = getExistingPartitions(); if (checkpoint.isPresent()) { return partitions.stream() .filter(partition -> partition.compareTo(checkpoint.get().getValue()) > 0) .collect(Collectors.toList()); } else { return partitions; } }
private void validateDeserializedMapEqualsInMemoryMap(final Map<String, StringValue> deserializedMap) { for (Map.Entry<String, StringValue> entry : deserializedMap.entrySet()) { final Optional<StringValue> valueInMemory = this.metadataManager.get(entry.getKey()); Assert.assertTrue(valueInMemory.isPresent()); Assert.assertEquals(valueInMemory.get().getValue(), entry.getValue().getValue()); } Assert.assertEquals(this.metadataManager.getAllKeys(), deserializedMap.keySet()); }
/** * Set the metadata for this DAG, if not empty * @param key * @param value */ public void set(@NotEmpty final String key, @NonNull final Map<String, String> value) { try { if (!value.isEmpty()) { this.metadataManager.set(key, new StringValue(mapper.writeValueAsString(value))); } } catch (JsonProcessingException e) { throw new MetadataException("Unable to set the JobManager metadata for key :" + key); } }
private boolean checkpointGreaterThanNextPartition(@NonNull final Optional<StringValue> checkPoint) { if (checkPoint.isPresent() && checkPoint.get().getValue().compareTo(this.nextPartition.get()) > 0) { return true; } return false; } }
public HDFSMetadataManager(@NonNull final FileSystem fs, @NotEmpty final String baseMetadataPath, @NonNull final AtomicBoolean shouldSaveChanges) throws IOException { this.fileSystem = fs; this.baseMetadataPath = baseMetadataPath; this.shouldSaveChanges = shouldSaveChanges; this.metadataMap = loadMetadata(); }
/** * @param shouldSaveChanges {@link AtomicBoolean} which {@link #saveChanges} will use to determine if * it should create new commit and save changes or not. It will save changes into new commit only if CAS operation * succeeds in setting it to true (from false). * @param hoodieConf {@link HoodieConfiguration} */ public HoodieBasedMetadataManager(@NonNull final HoodieConfiguration hoodieConf, @NonNull final AtomicBoolean shouldSaveChanges, @NonNull final JavaSparkContext jsc) throws IOException { this.hoodieConf = hoodieConf; this.saveChanges = shouldSaveChanges; this.jsc = Optional.of(jsc); this.metadataMap = readMetadataInfo(this.hoodieConf); }
public MockHoodieSink(@NonNull final HoodieConfiguration hoodieConf, @NonNull final HoodieSinkDataConverter hoodieKeyGenerator, @NonNull final JavaSparkContext jsc, @NonNull final HoodieSinkOp op, @NonNull final IMetadataManager metadataMgr) { super(hoodieConf, hoodieKeyGenerator, jsc, op, metadataMgr); }
@Test public void testGetNextPartitionSinglePartition() throws IOException, InterruptedException { final Path partitionPath = new Path(RAW_DATA_PATH, PARTITION2); final Path filePath = new Path(partitionPath, FILE1); this.fileSystem.create(filePath); final StringValue val1 = new StringValue(PARTITION1); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); metadataManager.saveChanges(); final Optional<StringValue> latestCheckpoint = getLatestCheckpoint(metadataManager); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).isPresent()); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).get().equals(PARTITION2)); }
@Test public void testHDFSReadWriteSingleMetadataFile() throws IOException { // Test in memory final StringValue val = new StringValue("testVal"); this.metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val); final Optional<StringValue> readValue = this.metadataManager.get(MetadataConstants.CHECKPOINT_KEY); Assert.assertTrue(readValue.isPresent()); Assert.assertTrue(readValue.get().getValue().equals("testVal")); this.metadataManager.set("foo", new StringValue("bar")); // Serialize the metadata map to a file this.metadataManager.saveChanges(); final Optional<FileStatus> fs = this.metadataManager.getLatestMetadataFile(); Assert.assertTrue(fs.isPresent()); // Deserialize the metadata map and check contents are the same final Map<String, StringValue> loadedMap = this.metadataManager.loadMetadata(fs.get().getPath()); validateDeserializedMapEqualsInMemoryMap(loadedMap); }
@Test public void testGetNextPartitionWithNonExistentCheckpoint() throws InterruptedException, IOException { final Path partitionPath = new Path(RAW_DATA_PATH, PARTITION1); final Path filePath = new Path(partitionPath, FILE1); this.fileSystem.create(filePath); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); final Optional<String> partition = pm.getNextPartition(getLatestCheckpoint(metadataManager)); Assert.assertTrue(partition.isPresent()); Assert.assertEquals(PARTITION1, partition.get()); }
public void init() { final Optional<StringValue> serialisedStats = this.metadataManager.get(getMetakey()); if (serialisedStats.isPresent()) { final Map<String, String> statHistory = MapUtil.deserializeMap(serialisedStats.get().getValue()); for (int i = 0; i < statHistory.size(); i++) { this.sinkStatQ.add(SinkStat.deserialize(statHistory.get(Integer.toString(i)))); } } }
private Optional<StringValue> getLatestCheckpoint(@NonNull HDFSMetadataManager metadataManager) throws IOException { final Map<String, StringValue> metadataMap = metadataManager.loadMetadata(); return metadataMap.containsKey(MetadataConstants.CHECKPOINT_KEY) ? Optional.of(metadataMap.get(MetadataConstants.CHECKPOINT_KEY)) : Optional.absent(); } }
@Test public void testGetNextPartitionMultipleDataPartitions() throws IOException, InterruptedException { final StringValue val1 = new StringValue(PARTITION1); final Path partition2Path = new Path(RAW_DATA_PATH, PARTITION2); final Path partition3Path = new Path(RAW_DATA_PATH, PARTITION3); this.fileSystem.create(new Path(partition2Path, FILE1)); this.fileSystem.create(new Path(partition3Path, FILE1)); final HDFSPartitionManager pm = new HDFSPartitionManager(JOBNAME, HDFSTestConstants.BASE_METADATA_PATH, RAW_DATA_PATH, this.fileSystem); final HDFSMetadataManager metadataManager = new HDFSMetadataManager(this.fileSystem, new Path(HDFSTestConstants.BASE_METADATA_PATH, JOBNAME).toString(), new AtomicBoolean(true)); metadataManager.set(MetadataConstants.CHECKPOINT_KEY, val1); metadataManager.saveChanges(); final Optional<StringValue> latestCheckpoint = getLatestCheckpoint(metadataManager); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).isPresent()); Assert.assertTrue(pm.getNextPartition(latestCheckpoint).get().equals(PARTITION2)); }