@Override public CompactionSegmentIterator reset( Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources, Map<String, List<Interval>> skipIntervals ) { return new NewestSegmentFirstIterator(compactionConfigs, dataSources, skipIntervals); } }
NewestSegmentFirstIterator( Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources, Map<String, List<Interval>> skipIntervals ) { this.compactionConfigs = compactionConfigs; this.dataSources = dataSources; this.timelineIterators = new HashMap<>(dataSources.size()); for (Entry<String, VersionedIntervalTimeline<String, DataSegment>> entry : dataSources.entrySet()) { final String dataSource = entry.getKey(); final VersionedIntervalTimeline<String, DataSegment> timeline = entry.getValue(); final DataSourceCompactionConfig config = compactionConfigs.get(dataSource); if (config != null && !timeline.isEmpty()) { final List<Interval> searchIntervals = findInitialSearchInterval(timeline, config.getSkipOffsetFromLatest(), skipIntervals.get(dataSource)); if (!searchIntervals.isEmpty()) { timelineIterators.put(dataSource, new CompactibleTimelineObjectHolderCursor(timeline, searchIntervals)); } } } for (Entry<String, DataSourceCompactionConfig> entry : compactionConfigs.entrySet()) { final String dataSourceName = entry.getKey(); final DataSourceCompactionConfig config = entry.getValue(); if (config == null) { throw new ISE("Unknown dataSource[%s]", dataSourceName); } updateQueue(dataSourceName, config); } }
/** * Find the next segments to compact for the given dataSource and add them to the queue. * {@link #timelineIterators} is updated according to the found segments. That is, the found segments are removed from * the timeline of the given dataSource. */ private void updateQueue(String dataSourceName, DataSourceCompactionConfig config) { final CompactibleTimelineObjectHolderCursor compactibleTimelineObjectHolderCursor = timelineIterators.get( dataSourceName ); if (compactibleTimelineObjectHolderCursor == null) { log.warn("Cannot find timeline for dataSource[%s]. Skip this dataSource", dataSourceName); return; } final SegmentsToCompact segmentsToCompact = findSegmentsToCompact( compactibleTimelineObjectHolderCursor, config ); if (segmentsToCompact.getNumSegments() > 1) { queue.add(new QueueEntry(segmentsToCompact.segments)); } }
@Override public List<DataSegment> next() { if (!hasNext()) { throw new NoSuchElementException(); } final QueueEntry entry = queue.poll(); if (entry == null) { throw new NoSuchElementException(); } final List<DataSegment> resultSegments = entry.segments; Preconditions.checkState(!resultSegments.isEmpty(), "Queue entry must not be empty"); final String dataSource = resultSegments.get(0).getDataSource(); updateQueue(dataSource, compactionConfigs.get(dataSource)); return resultSegments; }
final List<Interval> fullSkipIntervals = sortAndAddSkipIntervalFromLatest( last.getInterval().getEnd(), skipOffset, final List<Interval> filteredInterval = filterSkipIntervals(totalInterval, fullSkipIntervals); final List<Interval> searchIntervals = new ArrayList<>();
@Test public void testAddSkipIntervalFromLatestAndSort() { final List<Interval> expectedIntervals = ImmutableList.of( Intervals.of("2018-12-24/2018-12-25"), Intervals.of("2018-12-29/2019-01-01") ); final List<Interval> fullSkipIntervals = NewestSegmentFirstIterator.sortAndAddSkipIntervalFromLatest( DateTimes.of("2019-01-01"), new Period(72, 0, 0, 0), ImmutableList.of( Intervals.of("2018-12-30/2018-12-31"), Intervals.of("2018-12-24/2018-12-25") ) ); Assert.assertEquals(expectedIntervals, fullSkipIntervals); } }
@Test public void testFilterSkipIntervals() { final Interval totalInterval = Intervals.of("2018-01-01/2019-01-01"); final List<Interval> expectedSkipIntervals = ImmutableList.of( Intervals.of("2018-01-15/2018-03-02"), Intervals.of("2018-07-23/2018-10-01"), Intervals.of("2018-10-02/2018-12-25"), Intervals.of("2018-12-31/2019-01-01") ); final List<Interval> skipIntervals = NewestSegmentFirstIterator.filterSkipIntervals( totalInterval, Lists.newArrayList( Intervals.of("2017-12-01/2018-01-15"), Intervals.of("2018-03-02/2018-07-23"), Intervals.of("2018-10-01/2018-10-02"), Intervals.of("2018-12-25/2018-12-31") ) ); Assert.assertEquals(expectedSkipIntervals, skipIntervals); }
@Override public List<DataSegment> next() { if (!hasNext()) { throw new NoSuchElementException(); } final QueueEntry entry = queue.poll(); if (entry == null) { throw new NoSuchElementException(); } final List<DataSegment> resultSegments = entry.segments; Preconditions.checkState(!resultSegments.isEmpty(), "Queue entry must not be empty"); final String dataSource = resultSegments.get(0).getDataSource(); updateQueue(dataSource, compactionConfigs.get(dataSource)); return resultSegments; }
NewestSegmentFirstIterator( Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources ) { this.compactionConfigs = compactionConfigs; this.dataSources = dataSources; this.timelineIterators = new HashMap<>(dataSources.size()); for (Entry<String, VersionedIntervalTimeline<String, DataSegment>> entry : dataSources.entrySet()) { final String dataSource = entry.getKey(); final VersionedIntervalTimeline<String, DataSegment> timeline = entry.getValue(); final DataSourceCompactionConfig config = compactionConfigs.get(dataSource); if (config != null && !timeline.isEmpty()) { final Interval searchInterval = findInitialSearchInterval(timeline, config.getSkipOffsetFromLatest()); timelineIterators.put(dataSource, new CompactibleTimelineObjectHolderCursor(timeline, searchInterval)); } } for (Entry<String, DataSourceCompactionConfig> entry : compactionConfigs.entrySet()) { final String dataSourceName = entry.getKey(); final DataSourceCompactionConfig config = entry.getValue(); if (config == null) { throw new ISE("Unknown dataSource[%s]", dataSourceName); } updateQueue(dataSourceName, config); } }
@Override public CompactionSegmentIterator reset( Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources ) { return new NewestSegmentFirstIterator(compactionConfigs, dataSources); } }
/** * Find the next segments to compact for the given dataSource and add them to the queue. * {@link #timelineIterators} is updated according to the found segments. That is, the found segments are removed from * the timeline of the given dataSource. */ private void updateQueue(String dataSourceName, DataSourceCompactionConfig config) { final CompactibleTimelineObjectHolderCursor compactibleTimelineObjectHolderCursor = timelineIterators.get( dataSourceName ); if (compactibleTimelineObjectHolderCursor == null) { log.warn("Cannot find timeline for dataSource[%s]. Skip this dataSource", dataSourceName); return; } final SegmentsToCompact segmentsToCompact = findSegmentsToCompact( compactibleTimelineObjectHolderCursor, config ); if (segmentsToCompact.getNumSegments() > 1) { queue.add(new QueueEntry(segmentsToCompact.segments)); } }