@Before public void setup() { handlerLegacy = new CassandraRequestExceptionHandler( () -> MAX_RETRIES_PER_HOST, () -> MAX_RETRIES_TOTAL, () -> false, new Blacklist(config)); handlerConservative = new CassandraRequestExceptionHandler( () -> MAX_RETRIES_PER_HOST, () -> MAX_RETRIES_TOTAL, () -> true, new Blacklist(config)); }
} catch (Exception e) { completelyUnresponsiveHosts.put(host, e); blacklist.add(host);
void checkAndUpdate(Map<InetSocketAddress, CassandraClientPoolingContainer> pools) { // Check blacklist and re-integrate or continue to wait as necessary for (Map.Entry<InetSocketAddress, Long> blacklistedEntry : blacklist.entrySet()) { if (coolOffPeriodExpired(blacklistedEntry)) { InetSocketAddress host = blacklistedEntry.getKey(); if (isHostHealthy(pools.get(host))) { blacklist.remove(host); log.info("Added host {} back into the pool after a waiting period and successful health check.", SafeArg.of("host", CassandraLogHelper.host(host))); } } } }
public InetSocketAddress getRandomHostForKey(byte[] key) { List<InetSocketAddress> hostsForKey = getHostsFor(key); if (hostsForKey == null) { log.info("We attempted to route your query to a cassandra host that already contains the relevant data." + " However, the mapping of which host contains which data is not available yet." + " We will choose a random host instead."); return getRandomGoodHost().getHost(); } Set<InetSocketAddress> liveOwnerHosts = blacklist.filterBlacklistedHostsFrom(hostsForKey); if (!liveOwnerHosts.isEmpty()) { Optional<InetSocketAddress> activeHost = getRandomHostByActiveConnections(liveOwnerHosts); if (activeHost.isPresent()) { return activeHost.get(); } } log.warn("Perf / cluster stability issue. Token aware query routing has failed because there are no known " + "live hosts that claim ownership of the given range. Falling back to choosing a random live node." + " Current host blacklist is {}." + " Current state logged at TRACE", SafeArg.of("blacklistedHosts", blacklist.blacklistDetails())); log.trace("Current ring view is: {}.", SafeArg.of("tokenMap", getRingViewDescription())); return getRandomGoodHost().getHost(); }
@Test public void successfulRequestCausesHostToBeRemovedFromBlacklist() { CassandraClientPool cassandraClientPool = clientPoolWithServersInCurrentPool(ImmutableSet.of(HOST_1)); CassandraClientPoolingContainer container = cassandraClientPool.getCurrentPools().get(HOST_1); AtomicBoolean fail = new AtomicBoolean(true); setConditionalTimeoutFailureForHost(container, unused -> fail.get()); assertThatThrownBy(() -> runNoopWithRetryOnHost(HOST_1, cassandraClientPool)) .isInstanceOf(AtlasDbDependencyException.class); assertThat(blacklist.contains(HOST_1), is(true)); fail.set(false); runNoopWithRetryOnHost(HOST_1, cassandraClientPool); assertThat(blacklist.contains(HOST_1), is(false)); }
private synchronized void refreshPool() { blacklist.checkAndUpdate(cassandra.getPools()); if (config.autoRefreshNodes()) { setServersInPoolTo(cassandra.refreshTokenRangesAndGetServers()); } else { setServersInPoolTo(config.servers()); } cassandra.debugLogStateOfPool(); }
public void removePool(InetSocketAddress removedServerAddress) { blacklist.remove(removedServerAddress); try { currentPools.get(removedServerAddress).shutdownPooling(); } catch (Exception e) { log.warn("While removing a host ({}) from the pool, we were unable to gently cleanup resources.", SafeArg.of("removedServerAddress", CassandraLogHelper.host(removedServerAddress)), e); } currentPools.remove(removedServerAddress); }
public void debugLogStateOfPool() { if (log.isDebugEnabled()) { StringBuilder currentState = new StringBuilder(); currentState.append( String.format("POOL STATUS: Current blacklist = %s,%n current hosts in pool = %s%n", blacklist.describeBlacklistedHosts(), currentPools.keySet().toString())); for (Map.Entry<InetSocketAddress, CassandraClientPoolingContainer> entry : currentPools.entrySet()) { int activeCheckouts = entry.getValue().getActiveCheckouts(); int totalAllowed = entry.getValue().getPoolSize(); currentState.append( String.format("\tPOOL STATUS: Pooled host %s has %s out of %s connections checked out.%n", entry.getKey(), activeCheckouts > 0 ? Integer.toString(activeCheckouts) : "(unknown)", totalAllowed > 0 ? Integer.toString(totalAllowed) : "(not bounded)")); } log.debug("Current pool state: {}", currentState.toString()); } }
public Optional<CassandraClientPoolingContainer> getRandomGoodHostForPredicate( Predicate<InetSocketAddress> predicate) { Map<InetSocketAddress, CassandraClientPoolingContainer> pools = currentPools; Set<InetSocketAddress> filteredHosts = pools.keySet().stream() .filter(predicate) .collect(Collectors.toSet()); if (filteredHosts.isEmpty()) { log.info("No hosts match the provided predicate."); return Optional.empty(); } Set<InetSocketAddress> livingHosts = blacklist.filterBlacklistedHostsFrom(filteredHosts); if (livingHosts.isEmpty()) { log.info("There are no known live hosts in the connection pool matching the predicate. We're choosing" + " one at random in a last-ditch attempt at forward progress."); livingHosts = filteredHosts; } Optional<InetSocketAddress> randomLivingHost = getRandomHostByActiveConnections(livingHosts); return randomLivingHost.flatMap(host -> Optional.ofNullable(pools.get(host))); }
@Test public void resilientToRollingRestarts() { CassandraClientPool cassandraClientPool = clientPoolWithServersInCurrentPool(ImmutableSet.of(HOST_1, HOST_2)); AtomicReference<InetSocketAddress> downHost = new AtomicReference<>(HOST_1); cassandraClientPool.getCurrentPools().values().forEach(pool -> setConditionalTimeoutFailureForHost( pool, container -> container.getHost().equals(downHost.get()))); runNoopWithRetryOnHost(HOST_1, cassandraClientPool); assertThat(blacklist.contains(HOST_1), is(true)); downHost.set(HOST_2); runNoopWithRetryOnHost(HOST_2, cassandraClientPool); assertThat(blacklist.contains(HOST_1), is(false)); }
void checkAndUpdate(Map<InetSocketAddress, CassandraClientPoolingContainer> pools) { // Check blacklist and re-integrate or continue to wait as necessary for (Map.Entry<InetSocketAddress, Long> blacklistedEntry : blacklist.entrySet()) { if (coolOffPeriodExpired(blacklistedEntry)) { InetSocketAddress host = blacklistedEntry.getKey(); if (isHostHealthy(pools.get(host))) { blacklist.remove(host); log.info("Added host {} back into the pool after a waiting period and successful health check.", SafeArg.of("host", CassandraLogHelper.host(host))); } } } }
public InetSocketAddress getRandomHostForKey(byte[] key) { List<InetSocketAddress> hostsForKey = getHostsFor(key); if (hostsForKey == null) { log.info("We attempted to route your query to a cassandra host that already contains the relevant data." + " However, the mapping of which host contains which data is not available yet." + " We will choose a random host instead."); return getRandomGoodHost().getHost(); } Set<InetSocketAddress> liveOwnerHosts = blacklist.filterBlacklistedHostsFrom(hostsForKey); if (!liveOwnerHosts.isEmpty()) { Optional<InetSocketAddress> activeHost = getRandomHostByActiveConnections(liveOwnerHosts); if (activeHost.isPresent()) { return activeHost.get(); } } log.warn("Perf / cluster stability issue. Token aware query routing has failed because there are no known " + "live hosts that claim ownership of the given range. Falling back to choosing a random live node." + " Current host blacklist is {}." + " Current state logged at TRACE", SafeArg.of("blacklistedHosts", blacklist.blacklistDetails())); log.trace("Current ring view is: {}.", SafeArg.of("tokenMap", getRingViewDescription())); return getRandomGoodHost().getHost(); }
private synchronized void refreshPool() { blacklist.checkAndUpdate(cassandra.getPools()); if (config.autoRefreshNodes()) { setServersInPoolTo(cassandra.refreshTokenRangesAndGetServers()); } else { setServersInPoolTo(config.servers()); } cassandra.debugLogStateOfPool(); }
public void removePool(InetSocketAddress removedServerAddress) { blacklist.remove(removedServerAddress); try { currentPools.get(removedServerAddress).shutdownPooling(); } catch (Exception e) { log.warn("While removing a host ({}) from the pool, we were unable to gently cleanup resources.", SafeArg.of("removedServerAddress", CassandraLogHelper.host(removedServerAddress)), e); } currentPools.remove(removedServerAddress); }
public void debugLogStateOfPool() { if (log.isDebugEnabled()) { StringBuilder currentState = new StringBuilder(); currentState.append( String.format("POOL STATUS: Current blacklist = %s,%n current hosts in pool = %s%n", blacklist.describeBlacklistedHosts(), currentPools.keySet().toString())); for (Map.Entry<InetSocketAddress, CassandraClientPoolingContainer> entry : currentPools.entrySet()) { int activeCheckouts = entry.getValue().getActiveCheckouts(); int totalAllowed = entry.getValue().getPoolSize(); currentState.append( String.format("\tPOOL STATUS: Pooled host %s has %s out of %s connections checked out.%n", entry.getKey(), activeCheckouts > 0 ? Integer.toString(activeCheckouts) : "(unknown)", totalAllowed > 0 ? Integer.toString(totalAllowed) : "(not bounded)")); } log.debug("Current pool state: {}", currentState.toString()); } }
public Optional<CassandraClientPoolingContainer> getRandomGoodHostForPredicate( Predicate<InetSocketAddress> predicate) { Map<InetSocketAddress, CassandraClientPoolingContainer> pools = currentPools; Set<InetSocketAddress> filteredHosts = pools.keySet().stream() .filter(predicate) .collect(Collectors.toSet()); if (filteredHosts.isEmpty()) { log.info("No hosts match the provided predicate."); return Optional.empty(); } Set<InetSocketAddress> livingHosts = blacklist.filterBlacklistedHostsFrom(filteredHosts); if (livingHosts.isEmpty()) { log.info("There are no known live hosts in the connection pool matching the predicate. We're choosing" + " one at random in a last-ditch attempt at forward progress."); livingHosts = filteredHosts; } Optional<InetSocketAddress> randomLivingHost = getRandomHostByActiveConnections(livingHosts); return randomLivingHost.flatMap(host -> Optional.ofNullable(pools.get(host))); }
public static CassandraKeyValueService createForTesting( CassandraKeyValueServiceConfig config, Optional<LeaderConfig> leaderConfig) { MetricsManager metricsManager = MetricsManagers.createForTests(); CassandraClientPool clientPool = CassandraClientPoolImpl.createImplForTest(metricsManager, config, CassandraClientPoolImpl.StartupChecks.RUN, new Blacklist(config)); return createOrShutdownClientPool(metricsManager, config, clientPool, leaderConfig, CassandraMutationTimestampProviders.legacyModeForTestsOnly(), LoggerFactory.getLogger(CassandraKeyValueService.class), AtlasDbConstants.DEFAULT_INITIALIZE_ASYNC); }
private <V, K extends Exception> CassandraClientPoolingContainer getPreferredHostOrFallBack( RetryableCassandraRequest<V, K> req) { CassandraClientPoolingContainer hostPool = cassandra.getPools().get(req.getPreferredHost()); if (blacklist.contains(req.getPreferredHost()) || hostPool == null || req.shouldGiveUpOnPreferredHost()) { InetSocketAddress previousHost = hostPool == null ? req.getPreferredHost() : hostPool.getHost(); Optional<CassandraClientPoolingContainer> hostPoolCandidate = cassandra.getRandomGoodHostForPredicate(address -> !req.alreadyTriedOnHost(address)); hostPool = hostPoolCandidate.orElseGet(cassandra::getRandomGoodHost); log.warn("Randomly redirected a query intended for host {} to {}.", SafeArg.of("previousHost", CassandraLogHelper.host(previousHost)), SafeArg.of("randomHost", CassandraLogHelper.host(hostPool.getHost()))); } return hostPool; }