public SegmentLoaderLocalCacheManager withConfig(SegmentLoaderConfig config) { return new SegmentLoaderLocalCacheManager(indexIO, config, jsonMapper); }
@Test public void testGetAndCleanSegmentFiles() throws Exception { final File localStorageFolder = tmpFolder.newFolder("local_storage_folder"); final DataSegment segmentToDownload = dataSegmentWithInterval("2014-10-20T00:00:00Z/P1D").withLoadSpec( ImmutableMap.of( "type", "local", "path", localStorageFolder.getCanonicalPath() + "/test_segment_loader" + "/2014-10-20T00:00:00.000Z_2014-10-21T00:00:00.000Z/2015-05-27T03:38:35.683Z" + "/0/index.zip" ) ); // manually create a local segment under localStorageFolder final File localSegmentFile = new File( localStorageFolder, "test_segment_loader/2014-10-20T00:00:00.000Z_2014-10-21T00:00:00.000Z/2015-05-27T03:38:35.683Z/0" ); localSegmentFile.mkdirs(); final File indexZip = new File(localSegmentFile, "index.zip"); indexZip.createNewFile(); Assert.assertFalse("Expect cache miss before downloading segment", manager.isSegmentLoaded(segmentToDownload)); manager.getSegmentFiles(segmentToDownload); Assert.assertTrue("Expect cache hit after downloading segment", manager.isSegmentLoaded(segmentToDownload)); manager.cleanup(segmentToDownload); Assert.assertFalse("Expect cache miss after dropping segment", manager.isSegmentLoaded(segmentToDownload)); }
public SegmentLoader manufacturate(File storageDir) { return loader.withConfig( new SegmentLoaderConfig().withLocations(Collections.singletonList(new StorageLocationConfig().setPath(storageDir))) ); } }
@Override public File getSegmentFiles(DataSegment segment) throws SegmentLoadingException { StorageLocation loc = findStorageLocationIfLoaded(segment); String storageDir = DataSegmentPusher.getDefaultStorageDir(segment, false); if (loc == null) { loc = loadSegmentWithRetry(segment, storageDir); } loc.addSegment(segment); return new File(loc.getPath(), storageDir); }
@Override public void cleanup(DataSegment segment) { if (!config.isDeleteOnRemove()) { return; } StorageLocation loc = findStorageLocationIfLoaded(segment); if (loc == null) { log.info("Asked to cleanup something[%s] that didn't exist. Skipping.", segment); return; } // If storageDir.mkdirs() success, but downloadStartMarker.createNewFile() failed, // in this case, findStorageLocationIfLoaded() will think segment is located in the failed storageDir which is actually not. // So we should always clean all possible locations here for (StorageLocation location : getSortedList(locations)) { File localStorageDir = new File(location.getPath(), DataSegmentPusher.getDefaultStorageDir(segment, false)); if (localStorageDir.exists()) { // Druid creates folders of the form dataSource/interval/version/partitionNum. // We need to clean up all these directories if they are all empty. cleanupCacheFiles(location.getPath(), localStorageDir); location.removeSegment(segment); } } }
/** * location may fail because of IO failure, most likely in two cases:<p> * 1. druid don't have the write access to this location, most likely the administrator doesn't config it correctly<p> * 2. disk failure, druid can't read/write to this disk anymore */ private StorageLocation loadSegmentWithRetry(DataSegment segment, String storageDirStr) throws SegmentLoadingException { for (StorageLocation loc : getSortedList(locations)) { if (loc.canHandle(segment)) { File storageDir = new File(loc.getPath(), storageDirStr); try { loadInLocationWithStartMarker(segment, storageDir); return loc; } catch (SegmentLoadingException e) { log.makeAlert( e, "Failed to load segment in current location %s, try next location if any", loc.getPath().getAbsolutePath() ) .addData("location", loc.getPath().getAbsolutePath()) .emit(); cleanupCacheFiles(loc.getPath(), storageDir); } } } throw new SegmentLoadingException("Failed to load segment %s in all locations.", segment.getId()); }
locations.add(locationConfig2); manager = new SegmentLoaderLocalCacheManager( TestHelper.getTestIndexIO(), new SegmentLoaderConfig().withLocations(locations), indexZip.createNewFile(); Assert.assertFalse("Expect cache miss before downloading segment", manager.isSegmentLoaded(segmentToDownload)); File segmentFile = manager.getSegmentFiles(segmentToDownload); Assert.assertTrue(segmentFile.getAbsolutePath().contains("/local_storage_folder2/")); Assert.assertTrue("Expect cache hit after downloading segment", manager.isSegmentLoaded(segmentToDownload)); manager.cleanup(segmentToDownload); Assert.assertFalse("Expect cache miss after dropping segment", manager.isSegmentLoaded(segmentToDownload));
@Test public void testFetchSegments() throws SegmentLoadingException, IOException { File expectedFile = temporaryFolder.newFile(); EasyMock .expect(mockSegmentLoaderLocalCacheManager.getSegmentFiles(EasyMock.anyObject())) .andReturn(expectedFile).anyTimes(); EasyMock .expect(mockSegmentLoaderLocalCacheManager.withConfig(EasyMock.anyObject())) .andReturn(mockSegmentLoaderLocalCacheManager).anyTimes(); EasyMock.replay(mockSegmentLoaderLocalCacheManager); DataSegment dataSegment = DataSegment.builder().dataSource("source").interval(Intervals.of("2012-01-01/P1D")).version("1").size(1).build(); List<DataSegment> segments = ImmutableList.of ( dataSegment ); Map actualFetchedSegment = taskToolbox.build(task).fetchSegments(segments); Assert.assertEquals(expectedFile, actualFetchedSegment.get(dataSegment)); } @Test
@Override public Segment getSegment(DataSegment segment) throws SegmentLoadingException { File segmentFiles = getSegmentFiles(segment); File factoryJson = new File(segmentFiles, "factory.json"); final SegmentizerFactory factory; if (factoryJson.exists()) { try { factory = jsonMapper.readValue(factoryJson, SegmentizerFactory.class); } catch (IOException e) { throw new SegmentLoadingException(e, "%s", e.getMessage()); } } else { factory = new MMappedQueryableSegmentizerFactory(indexIO); } return factory.factorize(segment, segmentFiles); }
@Override public boolean isSegmentLoaded(final DataSegment segment) { return findStorageLocationIfLoaded(segment) != null; }
private StorageLocation findStorageLocationIfLoaded(final DataSegment segment) { for (StorageLocation location : getSortedList(locations)) { File localStorageDir = new File(location.getPath(), DataSegmentPusher.getDefaultStorageDir(segment, false)); if (localStorageDir.exists()) { return location; } } return null; }
private void loadInLocationWithStartMarker(DataSegment segment, File storageDir) throws SegmentLoadingException { // We use a marker to prevent the case where a segment is downloaded, but before the download completes, // the parent directories of the segment are removed final File downloadStartMarker = new File(storageDir, "downloadStartMarker"); synchronized (lock) { if (!storageDir.mkdirs()) { log.debug("Unable to make parent file[%s]", storageDir); } try { if (!downloadStartMarker.createNewFile()) { throw new SegmentLoadingException("Was not able to create new download marker for [%s]", storageDir); } } catch (IOException e) { throw new SegmentLoadingException(e, "Unable to create marker file for [%s]", storageDir); } } loadInLocation(segment, storageDir); if (!downloadStartMarker.delete()) { throw new SegmentLoadingException("Unable to remove marker file for [%s]", storageDir); } }
private void cleanupCacheFiles(File baseFile, File cacheFile) { if (cacheFile.equals(baseFile)) { return; } synchronized (lock) { log.info("Deleting directory[%s]", cacheFile); try { FileUtils.deleteDirectory(cacheFile); } catch (Exception e) { log.error("Unable to remove file[%s]", cacheFile); } } File parent = cacheFile.getParentFile(); if (parent != null) { File[] children = parent.listFiles(); if (children == null || children.length == 0) { cleanupCacheFiles(baseFile, parent); } } }
@Test public void testIfSegmentIsLoaded() { final DataSegment cachedSegment = dataSegmentWithInterval("2014-10-20T00:00:00Z/P1D"); final File cachedSegmentFile = new File( localSegmentCacheFolder, "test_segment_loader/2014-10-20T00:00:00.000Z_2014-10-21T00:00:00.000Z/2015-05-27T03:38:35.683Z/0" ); cachedSegmentFile.mkdirs(); Assert.assertTrue("Expect cache hit", manager.isSegmentLoaded(cachedSegment)); final DataSegment uncachedSegment = dataSegmentWithInterval("2014-10-21T00:00:00Z/P1D"); Assert.assertFalse("Expect cache miss", manager.isSegmentLoaded(uncachedSegment)); }
locations.add(locationConfig2); manager = new SegmentLoaderLocalCacheManager( TestHelper.getTestIndexIO(), new SegmentLoaderConfig().withLocations(locations), indexZip.createNewFile(); Assert.assertFalse("Expect cache miss before downloading segment", manager.isSegmentLoaded(segmentToDownload)); File segmentFile = manager.getSegmentFiles(segmentToDownload); Assert.assertTrue(segmentFile.getAbsolutePath().contains("/local_storage_folder/")); Assert.assertTrue("Expect cache hit after downloading segment", manager.isSegmentLoaded(segmentToDownload)); indexZip2.createNewFile(); File segmentFile2 = manager.getSegmentFiles(segmentToDownload2); Assert.assertTrue(segmentFile2.getAbsolutePath().contains("/local_storage_folder2/")); Assert.assertTrue("Expect cache hit after downloading segment", manager.isSegmentLoaded(segmentToDownload2)); manager.cleanup(segmentToDownload2); Assert.assertFalse("Expect cache miss after dropping segment", manager.isSegmentLoaded(segmentToDownload2));
@Override public Segment getSegment(DataSegment segment) throws SegmentLoadingException { File segmentFiles = getSegmentFiles(segment); File factoryJson = new File(segmentFiles, "factory.json"); final SegmentizerFactory factory; if (factoryJson.exists()) { try { factory = jsonMapper.readValue(factoryJson, SegmentizerFactory.class); } catch (IOException e) { throw new SegmentLoadingException(e, "%s", e.getMessage()); } } else { factory = new MMappedQueryableSegmentizerFactory(indexIO); } return factory.factorize(segment, segmentFiles); }
@Override public void cleanup(DataSegment segment) { if (!config.isDeleteOnRemove()) { return; } StorageLocation loc = findStorageLocationIfLoaded(segment); if (loc == null) { log.info("Asked to cleanup something[%s] that didn't exist. Skipping.", segment); return; } // If storageDir.mkdirs() success, but downloadStartMarker.createNewFile() failed, // in this case, findStorageLocationIfLoaded() will think segment is located in the failed storageDir which is actually not. // So we should always clean all possible locations here for (StorageLocation location : getSortedList(locations)) { File localStorageDir = new File(location.getPath(), DataSegmentPusher.getDefaultStorageDir(segment, false)); if (localStorageDir.exists()) { // Druid creates folders of the form dataSource/interval/version/partitionNum. // We need to clean up all these directories if they are all empty. cleanupCacheFiles(location.getPath(), localStorageDir); location.removeSegment(segment); } } }
/** * location may fail because of IO failure, most likely in two cases:<p> * 1. druid don't have the write access to this location, most likely the administrator doesn't config it correctly<p> * 2. disk failure, druid can't read/write to this disk anymore */ private StorageLocation loadSegmentWithRetry(DataSegment segment, String storageDirStr) throws SegmentLoadingException { for (StorageLocation loc : getSortedList(locations)) { if (loc.canHandle(segment)) { File storageDir = new File(loc.getPath(), storageDirStr); try { loadInLocationWithStartMarker(segment, storageDir); return loc; } catch (SegmentLoadingException e) { log.makeAlert( e, "Failed to load segment in current location %s, try next location if any", loc.getPath().getAbsolutePath() ) .addData("location", loc.getPath().getAbsolutePath()) .emit(); cleanupCacheFiles(loc.getPath(), storageDir); } } } throw new SegmentLoadingException("Failed to load segment %s in all locations.", segment.getIdentifier()); }
@Override public File getSegmentFiles(DataSegment segment) throws SegmentLoadingException { StorageLocation loc = findStorageLocationIfLoaded(segment); String storageDir = DataSegmentPusher.getDefaultStorageDir(segment, false); if (loc == null) { loc = loadSegmentWithRetry(segment, storageDir); } loc.addSegment(segment); return new File(loc.getPath(), storageDir); }
@Override public boolean isSegmentLoaded(final DataSegment segment) { return findStorageLocationIfLoaded(segment) != null; }