/** * Initializes the side input storage manager. */ public void init() { LOG.info("Initializing side input stores."); Map<SystemStreamPartition, String> fileOffsets = getFileOffsets(); LOG.info("File offsets for the task {}: ", taskName, fileOffsets); Map<SystemStreamPartition, String> oldestOffsets = getOldestOffsets(); LOG.info("Oldest offsets for the task {}: ", taskName, fileOffsets); startingOffsets = getStartingOffsets(fileOffsets, oldestOffsets); LOG.info("Starting offsets for the task {}: {}", taskName, startingOffsets); lastProcessedOffsets.putAll(fileOffsets); LOG.info("Last processed offsets for the task {}: {}", taskName, lastProcessedOffsets); initializeStoreDirectories(); }
/** * Initializes the store directories for all the stores: * 1. Cleans up the directories for invalid stores. * 2. Ensures that the directories exist. */ private void initializeStoreDirectories() { LOG.info("Initializing side input store directories."); stores.keySet().forEach(storeName -> { File storeLocation = getStoreLocation(storeName); String storePath = storeLocation.toPath().toString(); if (!isValidSideInputStore(storeName, storeLocation)) { LOG.info("Cleaning up the store directory at {} for {}", storePath, storeName); FileUtil.rm(storeLocation); } if (isPersistedStore(storeName) && !storeLocation.exists()) { LOG.info("Creating {} as the store directory for the side input store {}", storePath, storeName); storeLocation.mkdirs(); } }); }
/** * Flushes the contents of the underlying store and writes the offset file to disk. */ public void flush() { LOG.info("Flushing the side input stores."); stores.values().forEach(StorageEngine::flush); writeOffsetFiles(); }
@Test public void testFlush() { final String storeName = "test-flush-store"; final String taskName = "test-flush-task"; final SystemStreamPartition ssp = new SystemStreamPartition("test-system", "test-stream", new Partition(0)); final String offset = "123"; TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, LOGGED_STORE_DIR) .addLoggedStore(storeName, ImmutableSet.of(ssp)) .build(); Map<String, StorageEngine> stores = new HashMap<>(); initializeSideInputStorageManager(testSideInputStorageManager); testSideInputStorageManager.updateLastProcessedOffset(ssp, offset); testSideInputStorageManager.flush(); for (StorageEngine storageEngine : stores.values()) { verify(storageEngine).flush(); } verify(testSideInputStorageManager).writeOffsetFiles(); File storeDir = testSideInputStorageManager.getStoreLocation(storeName); assertTrue("Store directory: " + storeDir.getPath() + " is missing.", storeDir.exists()); Map<SystemStreamPartition, String> fileOffsets = testSideInputStorageManager.getFileOffsets(); assertTrue("Failed to get offset for ssp: " + ssp.toString() + " from file.", fileOffsets.containsKey(ssp)); assertEquals("Mismatch between last processed offset and file offset.", fileOffsets.get(ssp), offset); }
/** * Writes the offset files for all side input stores one by one. There is one offset file per store. * Its contents are a JSON encoded mapping from each side input SSP to its last processed offset, and a checksum. */ @VisibleForTesting void writeOffsetFiles() { storeToSSps.entrySet().stream() .filter(entry -> isPersistedStore(entry.getKey())) // filter out in-memory side input stores .forEach((entry) -> { String storeName = entry.getKey(); Map<SystemStreamPartition, String> offsets = entry.getValue().stream() .filter(lastProcessedOffsets::containsKey) .collect(Collectors.toMap(Function.identity(), lastProcessedOffsets::get)); try { String fileContents = OBJECT_WRITER.writeValueAsString(offsets); File offsetFile = new File(getStoreLocation(storeName), OFFSET_FILE); FileUtil.writeWithChecksum(offsetFile, fileContents); } catch (Exception e) { throw new SamzaException("Failed to write offset file for side input store: " + storeName, e); } }); }
/** * Gets the side input SSP offsets for all stores from their local offset files. * * @return a {@link Map} of {@link SystemStreamPartition} to offset in the offset files. */ @SuppressWarnings("unchecked") @VisibleForTesting Map<SystemStreamPartition, String> getFileOffsets() { LOG.info("Loading initial offsets from the file for side input stores."); Map<SystemStreamPartition, String> fileOffsets = new HashMap<>(); stores.keySet().forEach(storeName -> { LOG.debug("Reading local offsets for store: {}", storeName); File storeLocation = getStoreLocation(storeName); if (isValidSideInputStore(storeName, storeLocation)) { try { String fileContents = StorageManagerUtil.readOffsetFile(storeLocation, OFFSET_FILE); Map<SystemStreamPartition, String> offsets = OBJECT_MAPPER.readValue(fileContents, OFFSETS_TYPE_REFERENCE); fileOffsets.putAll(offsets); } catch (Exception e) { LOG.warn("Failed to load the offset file for side input store:" + storeName, e); } } }); return fileOffsets; }
@Test public void testGetFileOffsets() { final String storeName = "test-get-file-offsets-store"; final String taskName = "test-get-file-offsets-task"; final String offset = "123"; Set<SystemStreamPartition> ssps = IntStream.range(1, 6) .mapToObj(idx -> new SystemStreamPartition("test-system", "test-stream", new Partition(idx))) .collect(Collectors.toSet()); TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, LOGGED_STORE_DIR) .addLoggedStore(storeName, ssps) .build(); initializeSideInputStorageManager(testSideInputStorageManager); ssps.forEach(ssp -> testSideInputStorageManager.updateLastProcessedOffset(ssp, offset)); testSideInputStorageManager.writeOffsetFiles(); Map<SystemStreamPartition, String> fileOffsets = testSideInputStorageManager.getFileOffsets(); ssps.forEach(ssp -> { assertTrue("Failed to get offset for ssp: " + ssp.toString() + " from file.", fileOffsets.containsKey(ssp)); assertEquals("Mismatch between last processed offset and file offset.", fileOffsets.get(ssp), offset); }); }
@Test public void testWriteOffsetFilesForNonPersistedStore() { final String storeName = "test-write-offset-non-persisted-store"; final String taskName = "test-write-offset-for-non-persisted-task"; TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, NON_LOGGED_STORE_DIR) .addInMemoryStore(storeName, ImmutableSet.of()) .build(); initializeSideInputStorageManager(testSideInputStorageManager); testSideInputStorageManager.writeOffsetFiles(); // should be no-op File storeDir = testSideInputStorageManager.getStoreLocation(storeName); assertFalse("Store directory: " + storeDir.getPath() + " should not be created for non-persisted store", storeDir.exists()); }
private boolean isValidSideInputStore(String storeName, File storeLocation) { return isPersistedStore(storeName) && !StorageManagerUtil.isStaleStore(storeLocation, OFFSET_FILE, STORE_DELETE_RETENTION_MS, clock.currentTimeMillis()) && StorageManagerUtil.isOffsetFileValid(storeLocation, OFFSET_FILE); }
public TaskSideInputStorageManager( TaskName taskName, StreamMetadataCache streamMetadataCache, String storeBaseDir, Map<String, StorageEngine> sideInputStores, Map<String, SideInputsProcessor> storesToProcessor, Map<String, Set<SystemStreamPartition>> storesToSSPs, SystemAdmins systemAdmins, Config config, Clock clock) { this.clock = clock; this.stores = sideInputStores; this.storeBaseDir = storeBaseDir; this.storeToSSps = storesToSSPs; this.streamMetadataCache = streamMetadataCache; this.systemAdmins = systemAdmins; this.taskName = taskName; this.storeToProcessor = storesToProcessor; validateStoreConfiguration(); this.sspsToStores = new HashMap<>(); storesToSSPs.forEach((store, ssps) -> { for (SystemStreamPartition ssp: ssps) { sspsToStores.computeIfAbsent(ssp, key -> new HashSet<>()); sspsToStores.computeIfPresent(ssp, (key, value) -> { value.add(store); return value; }); } }); }
@Test public void testStop() { final String storeName = "test-stop-store"; final String taskName = "test-stop-task"; TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, NON_LOGGED_STORE_DIR) .addInMemoryStore(storeName, ImmutableSet.of()) .build(); initializeSideInputStorageManager(testSideInputStorageManager); testSideInputStorageManager.stop(); verify(testSideInputStorageManager.getStore(storeName)).stop(); verify(testSideInputStorageManager).writeOffsetFiles(); }
private void initializeSideInputStorageManager(TaskSideInputStorageManager testSideInputStorageManager) { doReturn(new HashMap<>()).when(testSideInputStorageManager).getStartingOffsets(any(), any()); testSideInputStorageManager.init(); }
@Test public void testInit() { final String storeName = "test-init-store"; final String taskName = "test-init-task"; TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, LOGGED_STORE_DIR) .addLoggedStore(storeName, ImmutableSet.of()) .build(); initializeSideInputStorageManager(testSideInputStorageManager); File storeDir = testSideInputStorageManager.getStoreLocation(storeName); assertTrue("Store directory: " + storeDir.getPath() + " is missing.", storeDir.exists()); }
@Test public void testGetStartingOffsets() { final String storeName = "test-get-starting-offset-store"; final String taskName = "test-get-starting-offset-task"; Set<SystemStreamPartition> ssps = IntStream.range(1, 6) .mapToObj(idx -> new SystemStreamPartition("test-system", "test-stream", new Partition(idx))) .collect(Collectors.toSet()); TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, LOGGED_STORE_DIR) .addLoggedStore(storeName, ssps) .build(); initializeSideInputStorageManager(testSideInputStorageManager); Map<SystemStreamPartition, String> fileOffsets = ssps.stream() .collect(Collectors.toMap(Function.identity(), ssp -> { int partitionId = ssp.getPartition().getPartitionId(); int offset = partitionId % 2 == 0 ? partitionId + 10 : partitionId; return String.valueOf(offset); })); Map<SystemStreamPartition, String> oldestOffsets = ssps.stream() .collect(Collectors.toMap(Function.identity(), ssp -> { int partitionId = ssp.getPartition().getPartitionId(); int offset = partitionId % 2 == 0 ? partitionId : partitionId + 10; return String.valueOf(offset); })); doCallRealMethod().when(testSideInputStorageManager).getStartingOffsets(fileOffsets, oldestOffsets); Map<SystemStreamPartition, String> startingOffsets = testSideInputStorageManager.getStartingOffsets(fileOffsets, oldestOffsets); assertTrue("Failed to get starting offsets for all ssps", startingOffsets.size() == 5); }
@Test public void testWriteOffsetFilesForPersistedStore() { final String storeName = "test-write-offset-persisted-store"; final String storeName2 = "test-write-offset-persisted-store-2"; final String taskName = "test-write-offset-for-persisted-task"; final String offset = "123"; final SystemStreamPartition ssp = new SystemStreamPartition("test-system", "test-stream", new Partition(0)); final SystemStreamPartition ssp2 = new SystemStreamPartition("test-system2", "test-stream2", new Partition(0)); TaskSideInputStorageManager testSideInputStorageManager = new MockTaskSideInputStorageManagerBuilder(taskName, LOGGED_STORE_DIR) .addLoggedStore(storeName, ImmutableSet.of(ssp)) .addLoggedStore(storeName2, ImmutableSet.of(ssp2)) .build(); initializeSideInputStorageManager(testSideInputStorageManager); testSideInputStorageManager.updateLastProcessedOffset(ssp, offset); testSideInputStorageManager.updateLastProcessedOffset(ssp2, offset); testSideInputStorageManager.writeOffsetFiles(); File storeDir = testSideInputStorageManager.getStoreLocation(storeName); assertTrue("Store directory: " + storeDir.getPath() + " is missing.", storeDir.exists()); Map<SystemStreamPartition, String> fileOffsets = testSideInputStorageManager.getFileOffsets(); assertTrue("Failed to get offset for ssp: " + ssp.toString() + " from file.", fileOffsets.containsKey(ssp)); assertEquals("Mismatch between last processed offset and file offset.", fileOffsets.get(ssp), offset); assertTrue("Failed to get offset for ssp: " + ssp2.toString() + " from file.", fileOffsets.containsKey(ssp2)); assertEquals("Mismatch between last processed offset and file offset.", fileOffsets.get(ssp2), offset); }
/** * Writes the offset files for all side input stores one by one. There is one offset file per store. * Its contents are a JSON encoded mapping from each side input SSP to its last processed offset, and a checksum. */ @VisibleForTesting void writeOffsetFiles() { storeToSSps.entrySet().stream() .filter(entry -> isPersistedStore(entry.getKey())) // filter out in-memory side input stores .forEach((entry) -> { String storeName = entry.getKey(); Map<SystemStreamPartition, String> offsets = entry.getValue().stream() .filter(lastProcessedOffsets::containsKey) .collect(Collectors.toMap(Function.identity(), lastProcessedOffsets::get)); try { String fileContents = OBJECT_WRITER.writeValueAsString(offsets); File offsetFile = new File(getStoreLocation(storeName), OFFSET_FILE); FileUtil.writeWithChecksum(offsetFile, fileContents); } catch (Exception e) { throw new SamzaException("Failed to write offset file for side input store: " + storeName, e); } }); }
/** * Gets the side input SSP offsets for all stores from their local offset files. * * @return a {@link Map} of {@link SystemStreamPartition} to offset in the offset files. */ @SuppressWarnings("unchecked") @VisibleForTesting Map<SystemStreamPartition, String> getFileOffsets() { LOG.info("Loading initial offsets from the file for side input stores."); Map<SystemStreamPartition, String> fileOffsets = new HashMap<>(); stores.keySet().forEach(storeName -> { LOG.debug("Reading local offsets for store: {}", storeName); File storeLocation = getStoreLocation(storeName); if (isValidSideInputStore(storeName, storeLocation)) { try { String fileContents = StorageManagerUtil.readOffsetFile(storeLocation, OFFSET_FILE); Map<SystemStreamPartition, String> offsets = OBJECT_MAPPER.readValue(fileContents, OFFSETS_TYPE_REFERENCE); fileOffsets.putAll(offsets); } catch (Exception e) { LOG.warn("Failed to load the offset file for side input store:" + storeName, e); } } }); return fileOffsets; }
private boolean isValidSideInputStore(String storeName, File storeLocation) { return isPersistedStore(storeName) && !StorageManagerUtil.isStaleStore(storeLocation, OFFSET_FILE, STORE_DELETE_RETENTION_MS, clock.currentTimeMillis()) && StorageManagerUtil.isOffsetFileValid(storeLocation, OFFSET_FILE); }
public TaskSideInputStorageManager( TaskName taskName, StreamMetadataCache streamMetadataCache, String storeBaseDir, Map<String, StorageEngine> sideInputStores, Map<String, SideInputsProcessor> storesToProcessor, Map<String, Set<SystemStreamPartition>> storesToSSPs, SystemAdmins systemAdmins, Config config, Clock clock) { this.clock = clock; this.stores = sideInputStores; this.storeBaseDir = storeBaseDir; this.storeToSSps = storesToSSPs; this.streamMetadataCache = streamMetadataCache; this.systemAdmins = systemAdmins; this.taskName = taskName; this.storeToProcessor = storesToProcessor; validateStoreConfiguration(); this.sspsToStores = new HashMap<>(); storesToSSPs.forEach((store, ssps) -> { for (SystemStreamPartition ssp: ssps) { sspsToStores.computeIfAbsent(ssp, key -> new HashSet<>()); sspsToStores.computeIfPresent(ssp, (key, value) -> { value.add(store); return value; }); } }); }
/** * Initializes the side input storage manager. */ public void init() { LOG.info("Initializing side input stores."); Map<SystemStreamPartition, String> fileOffsets = getFileOffsets(); LOG.info("File offsets for the task {}: ", taskName, fileOffsets); Map<SystemStreamPartition, String> oldestOffsets = getOldestOffsets(); LOG.info("Oldest offsets for the task {}: ", taskName, fileOffsets); startingOffsets = getStartingOffsets(fileOffsets, oldestOffsets); LOG.info("Starting offsets for the task {}: {}", taskName, startingOffsets); lastProcessedOffsets.putAll(fileOffsets); LOG.info("Last processed offsets for the task {}: {}", taskName, lastProcessedOffsets); initializeStoreDirectories(); }