private Map<K, A> recomputeWindow(long frameTs) { Map<K, A> window = new HashMap<>(); for (long ts = frameTs - winPolicy.windowSize() + winPolicy.frameSize(); ts <= frameTs; ts += winPolicy.frameSize() ) { assert combineFn != null : "combineFn == null"; for (Entry<K, A> entry : tsToKeyToAcc.getOrDefault(ts, emptyMap()).entrySet()) { combineFn.accept( window.computeIfAbsent(entry.getKey(), k -> aggrOp.createFn().get()), entry.getValue()); } } return window; }
private void completeWindow(long frameTs) { long frameToEvict = frameTs - winPolicy.windowSize() + winPolicy.frameSize(); Map<K, A> evictedFrame = tsToKeyToAcc.remove(frameToEvict); if (evictedFrame != null) { lazyAdd(totalKeysInFrames, -evictedFrame.size()); lazyAdd(totalFrames, -1); if (!winPolicy.isTumbling() && aggrOp.deductFn() != null) { // deduct trailing-edge frame patchSlidingWindow(aggrOp.deductFn(), evictedFrame); } } assert tsToKeyToAcc.values().stream().mapToInt(Map::size).sum() == totalKeysInFrames.get() : "totalKeysInFrames mismatch, expected=" + tsToKeyToAcc.values().stream().mapToInt(Map::size).sum() + ", actual=" + totalKeysInFrames.get(); }
private boolean flushBuffers() { if (flushTraverser == null) { if (tsToKeyToAcc.isEmpty()) { return true; } flushTraverser = windowTraverserAndEvictor(topTs + winPolicy.windowSize() - winPolicy.frameSize()) .onFirstNull(() -> flushTraverser = null); } return emitFromTraverser(flushTraverser); }
private Traverser<Object> windowTraverserAndEvictor(long wm) { long rangeStart; if (nextWinToEmit != Long.MIN_VALUE) { rangeStart = nextWinToEmit; } else { if (tsToKeyToAcc.isEmpty()) { // no item was observed, but initialize nextWinToEmit to the next window return Traversers.empty(); } // This is the first watermark we are acting upon. Find the lowest frame // timestamp that can be emitted: at most the top existing timestamp lower // than wm, but even lower than that if there are older frames on record. // The above guarantees that the sliding window can be correctly // initialized using the "add leading/deduct trailing" approach because we // start from a window that covers at most one existing frame -- the lowest // one on record. long bottomTs = tsToKeyToAcc .keySet().stream() .min(naturalOrder()) .orElseThrow(() -> new AssertionError("Failed to find the min key in a non-empty map")); rangeStart = min(bottomTs, winPolicy.floorFrameTs(wm)); } return traverseStream(range(rangeStart, wm, winPolicy.frameSize()).boxed()) .flatMap(winEnd -> traverseIterable(computeWindow(winEnd).entrySet()) .map(e -> mapToOutputFn.apply( winEnd - winPolicy.windowSize(), winEnd, e.getKey(), aggrOp.finishFn().apply(e.getValue()))) .onFirstNull(() -> completeWindow(winEnd))); }
@Override public boolean finishSnapshotRestore() { // In the first stage we should theoretically have saved `nextWinToEmit` // to the snapshot. We don't bother since the first stage is effectively a // tumbling window and it makes no difference in that case. So we don't // restore and remain at MIN_VALUE. if (isLastStage) { // if nextWinToEmit is not on frame boundary, push it to next boundary nextWinToEmit = minRestoredNextWinToEmit > Long.MIN_VALUE ? winPolicy.higherFrameTs(minRestoredNextWinToEmit - 1) : minRestoredNextWinToEmit; logFine(getLogger(), "Restored nextWinToEmit from snapshot to: %s", nextWinToEmit); // Delete too old restored frames. This can happen when restoring from exported state and new job // has smaller window size if (nextWinToEmit > Long.MIN_VALUE + winPolicy.windowSize()) { for (long ts = minRestoredFrameTs; ts <= nextWinToEmit - winPolicy.windowSize(); ts += winPolicy.frameSize()) { Map<K, A> removed = tsToKeyToAcc.remove(ts); if (removed != null) { lazyAdd(totalFrames, -1); lazyAdd(totalKeysInFrames, -removed.size()); } } } } return true; }