/** * Returns fine partition that should have unprocessed entries in the Sweepable Cells table. * * @param shardStrategy desired shard and strategy * @param lastSweptTs exclusive minimum timestamp to check for * @param sweepTs exclusive maximum timestamp to check for * @return Optional containing the fine partition, or Optional.empty() if there are no more candidates before * sweepTs */ Optional<Long> nextSweepableTimestampPartition(ShardAndStrategy shardStrategy, long lastSweptTs, long sweepTs) { long minFineInclusive = SweepQueueUtils.tsPartitionFine(lastSweptTs + 1); long maxFineInclusive = SweepQueueUtils.tsPartitionFine(sweepTs - 1); return nextSweepablePartition(shardStrategy, minFineInclusive, maxFineInclusive); }
private SweepableTimestampsTable.SweepableTimestampsColumn computeColumn(PartitionInfo info) { return SweepableTimestampsTable.SweepableTimestampsColumn.of(SweepQueueUtils.tsPartitionFine(info.timestamp())); }
private long getTimestampOrPartition(PartitionInfo info, boolean isDedicatedRow) { return isDedicatedRow ? info.timestamp() : SweepQueueUtils.tsPartitionFine(info.timestamp()); }
private long endOfFinePartitionForTs(long timestamp) { return SweepQueueUtils.maxTsForFinePartition(tsPartitionFine(timestamp)); }
private void cleanSweepableCells(ShardAndStrategy shardStrategy, long oldProgress, long newProgress) { if (firstIterationOfSweep(oldProgress)) { return; } long lastSweptPartitionPreviously = SweepQueueUtils.tsPartitionFine(oldProgress); long minimumSweepPartitionNextIteration = SweepQueueUtils.tsPartitionFine(newProgress + 1); if (minimumSweepPartitionNextIteration > lastSweptPartitionPreviously) { // This is present for backcompat; we now clean up dedicated rows early, but this is cleanDedicatedRows(shardStrategy, lastSweptPartitionPreviously); cleanNonDedicatedRow(shardStrategy, lastSweptPartitionPreviously); log.info("Deleted persisted sweep queue information in table {} for partition {}.", LoggingArgs.tableRef(TargetedSweepTableFactory.of().getSweepableCellsTable(null).getTableRef()), SafeArg.of("partition", lastSweptPartitionPreviously)); } }
@Test public void fineToCoarseIsCorrect() { long prime = 674506111L; Assertions.assertThat(SweepQueueUtils.partitionFineToCoarse(SweepQueueUtils.tsPartitionFine(prime))) .isEqualTo(SweepQueueUtils.tsPartitionCoarse(prime)); } }
private void assertSweepableCellsHasNoEntriesInPartitionOfTimestamp(long timestamp) { SweepBatch batch = sweepableCells.getBatchForPartition( ShardAndStrategy.conservative(CONS_SHARD), tsPartitionFine(timestamp), -1L, Long.MAX_VALUE); assertThat(batch.writes()).isEmpty(); }
@Test public void getCorrectNextTimestampWhenMultipleCandidates() { for (long timestamp = 1000L; tsPartitionFine(timestamp) < 10L; timestamp += TS_FINE_GRANULARITY / 5) { writeToDefaultCellCommitted(sweepableTimestamps, timestamp, TABLE_CONS); } assertThat(readConservative(shardCons)).contains(tsPartitionFine(1000L)); progress.updateLastSweptTimestamp(conservative(shardCons), 2L * TS_FINE_GRANULARITY); assertThat(readConservative(shardCons)).contains(tsPartitionFine(2L * TS_FINE_GRANULARITY + 1000L)); setSweepTimestampAndGet(4 * TS_FINE_GRANULARITY); assertThat(readConservative(shardCons)).contains(tsPartitionFine(2L * TS_FINE_GRANULARITY + 1000L)); }
@Test public void canReadForAbortedTransactionMultipleTimes() { long timestamp = 2 * TS_FINE_GRANULARITY + 1L; writeToDefaultCellAborted(sweepableTimestamps, timestamp, TABLE_CONS); assertThat(readConservative(CONS_SHARD)).contains(tsPartitionFine(timestamp)); assertThat(readConservative(CONS_SHARD)).contains(tsPartitionFine(timestamp)); }
@Test public void canReadForUncommittedTransactionMultipleTimes() { long timestamp = 3 * TS_FINE_GRANULARITY + 1L; writeToDefaultCellUncommitted(sweepableTimestamps, timestamp, TABLE_CONS); assertThat(readConservative(CONS_SHARD)).contains(tsPartitionFine(timestamp)); assertThat(readConservative(CONS_SHARD)).contains(tsPartitionFine(timestamp)); }
@Test public void noNextTimestampWhenImmutableTsInSmallerPartitionForEitherSweepStrategy() { immutableTs = TS - TS_FINE_GRANULARITY; assertThat(tsPartitionFine(immutableTs)).isLessThan(TS_FINE_PARTITION); assertThat(readConservative(shardCons)).isEmpty(); assertThat(tsPartitionFine(immutableTs)).isLessThan(TS2_FINE_PARTITION); assertThat(readThorough(shardThor)).isEmpty(); }
@Test public void noNextTimestampWhenUnreadableTsInSmallerPartitionForConservativeOnly() { unreadableTs = TS - TS_FINE_GRANULARITY; assertThat(tsPartitionFine(unreadableTs)).isLessThan(TS_FINE_PARTITION); assertThat(readConservative(shardCons)).isEmpty(); assertThat(tsPartitionFine(unreadableTs)).isLessThan(TS2_FINE_PARTITION); assertThat(readThorough(shardThor)).contains(TS2_FINE_PARTITION); }
private void assertSweepableCellsHasEntryForTimestamp(long timestamp) { SweepBatch batch = sweepableCells.getBatchForPartition( ShardAndStrategy.conservative(CONS_SHARD), tsPartitionFine(timestamp), -1L, timestamp + 1); assertThat(batch.writes()).containsExactly(WriteInfo.write(TABLE_CONS, DEFAULT_CELL, timestamp)); }
@Test public void canReadNextTsForTombstone() { long timestamp = 10L; putTombstoneToDefaultCommitted(sweepableTimestamps, timestamp, TABLE_CONS); assertThat(readConservative(CONS_SHARD)).contains(tsPartitionFine(timestamp)); }
@Test public void noNextTimestampWhenSweepTimestampInSamePartitionAndLower() { immutableTs = minTsForFinePartition(TS_FINE_PARTITION); assertThat(tsPartitionFine(getSweepTsCons())).isEqualTo(TS_FINE_PARTITION); assertThat(getSweepTsCons()).isLessThan(TS); assertThat(readConservative(shardCons)).isEmpty(); }
@Test public void canReadNextTimestampWhenSweepTimestampInSamePartitionAndGreater() { immutableTs = maxTsForFinePartition(TS_FINE_PARTITION); assertThat(tsPartitionFine(getSweepTsCons())).isEqualTo(TS_FINE_PARTITION); assertThat(getSweepTsCons()).isGreaterThan(TS); assertThat(readConservative(shardCons)).contains(TS_FINE_PARTITION); }
@Test public void multipleSweepersSweepDifferentShardsAndCallUnlockAfterwards() throws InterruptedException { int shards = 128; int sweepers = 8; int threads = shards / sweepers; TimelockService stickyLockService = createStickyLockService(); createAndInitializeSweepersAndWaitForOneBackgroundIteration(sweepers, shards, threads, stickyLockService); for (int i = 0; i < shards; i++) { assertProgressUpdatedToTimestamp(maxTsForFinePartition(tsPartitionFine(unreadableTs - 1)), i); verify(stickyLockService, times(1)).unlock(ImmutableSet.of(LockToken.of(new UUID(i, 0L)))); } // minimum: all threads on one host succeed, then on another, etc: // threads + threads * 2 + ... + threads * swepers verify(stickyLockService, atLeast(threads * sweepers * (sweepers - 1) / 2)) .lock(any(LockRequest.class)); // maximum: all but one succeed on each host, and only then those succeed: // shards + shards - 1 + ... + shards - (sweepers - 1) verify(stickyLockService, atMost(sweepers * shards - sweepers * (sweepers - 1) / 2)) .lock(any(LockRequest.class)); }
@Test public void sweepableTimestampsGetsScrubbedWhenLastSweptProgressesInNewCoarsePartition() { long tsSecondPartitionFine = LOW_TS + TS_FINE_GRANULARITY; long largestFirstPartitionCoarse = TS_COARSE_GRANULARITY - 1L; long thirdPartitionCoarse = 2 * TS_COARSE_GRANULARITY; enqueueWriteCommitted(TABLE_CONS, LOW_TS); enqueueWriteCommitted(TABLE_CONS, tsSecondPartitionFine); enqueueWriteCommitted(TABLE_CONS, largestFirstPartitionCoarse); enqueueWriteCommitted(TABLE_CONS, thirdPartitionCoarse); sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertLowestFinePartitionInSweepableTimestampsEquals(tsPartitionFine(LOW_TS)); sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertLowestFinePartitionInSweepableTimestampsEquals(tsPartitionFine(LOW_TS)); // after this sweep we progress to thirdPartitionCoarse - 1 sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertLowestFinePartitionInSweepableTimestampsEquals(tsPartitionFine(thirdPartitionCoarse)); }
@Test public void sweepableTimestampsGetsScrubbedWhenNoMoreToSweepButSweepTsInNewCoarsePartition() { long tsSecondPartitionFine = LOW_TS + TS_FINE_GRANULARITY; long largestFirstPartitionCoarse = TS_COARSE_GRANULARITY - 1L; enqueueWriteCommitted(TABLE_CONS, LOW_TS); enqueueWriteCommitted(TABLE_CONS, tsSecondPartitionFine); enqueueWriteCommitted(TABLE_CONS, largestFirstPartitionCoarse); sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertLowestFinePartitionInSweepableTimestampsEquals(tsPartitionFine(LOW_TS)); sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertLowestFinePartitionInSweepableTimestampsEquals(tsPartitionFine(LOW_TS)); // after this sweep we progress to sweepTsConservative - 1 sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertNoEntriesInSweepableTimestampsBeforeSweepTimestamp(); }
@Test public void sweepProgressesToEndOfPartitionWhenFewValuesAndSweepTsLarge() { long writeTs = getSweepTsCons() - 3 * TS_FINE_GRANULARITY; enqueueWriteCommitted(TABLE_CONS, writeTs); enqueueWriteCommitted(TABLE_CONS, writeTs + 5); sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertProgressUpdatedToTimestamp(maxTsForFinePartition(tsPartitionFine(writeTs))); sweepQueue.sweepNextBatch(ShardAndStrategy.conservative(CONS_SHARD)); assertProgressUpdatedToTimestamp(getSweepTsCons() - 1L); }