@Before public void setup() { normalTarget = new FailoverFeignTarget<>(SERVERS, 1, Object.class); spiedTarget = spy(new FailoverFeignTarget<>(SERVERS, 100, Object.class)); }
@Override public void continueOrPropagate(RetryableException ex) { ExceptionRetryBehaviour retryBehaviour = ExceptionRetryBehaviour.getRetryBehaviourForException(ex); synchronized (this) { // Only fail over if this failure was to the current server. // This means that no one on another thread has failed us over already. if (mostRecentServerIndex.get() != null && mostRecentServerIndex.get() == failoverCount.get()) { long failures = failuresSinceLastSwitch.incrementAndGet(); if (shouldSwitchNode(retryBehaviour, failures)) { failoverToNextNode(retryBehaviour); } else if (retryBehaviour.shouldRetryInfinitelyManyTimes()) { failuresSinceLastSwitch.set(0); } } } checkAndHandleFailure(ex); if (retryBehaviour.shouldBackoffAndTryOtherNodes()) { int numFailovers = failoverCount.get(); if (numFailovers > 0 && numFailovers % servers.size() == 0) { // We implement some randomness around the expected value of BACKOFF_BEFORE_ROUND_ROBIN_RETRY_MILLIS. // Even though this is not exponential backoff, should be enough to avoid a thundering herd problem. long pauseTimeWithJitter = ThreadLocalRandom.current() .nextLong(BACKOFF_BEFORE_ROUND_ROBIN_RETRY_MILLIS / 2, (BACKOFF_BEFORE_ROUND_ROBIN_RETRY_MILLIS * 3) / 2); pauseForBackoff(ex, pauseTimeWithJitter); } } else { pauseForBackoff(ex); } }
@Test public void doesNotFailOverOnBlockingTimeoutException() { String initialUrl = normalTarget.url(); normalTarget.continueOrPropagate(BLOCKING_TIMEOUT_EXCEPTION); assertThat(normalTarget.url()).isEqualTo(initialUrl); }
@Test public void blockingTimeoutExceptionsDoNotBackoff() { for (int i = 0; i < ITERATIONS; i++) { simulateRequest(spiedTarget); spiedTarget.continueOrPropagate(BLOCKING_TIMEOUT_EXCEPTION); int expectedNumOfCalls = i + 1; verify(spiedTarget, times(expectedNumOfCalls)).pauseForBackoff(any(), eq(0L)); } }
private static <T> T createProxyWithFailover( Optional<TrustContext> trustContext, Optional<ProxySelector> proxySelector, Collection<String> endpointUris, Request.Options feignOptions, int maxBackoffMillis, Class<T> type, String userAgent) { FailoverFeignTarget<T> failoverFeignTarget = new FailoverFeignTarget<>(endpointUris, maxBackoffMillis, type); Client client = failoverFeignTarget.wrapClient( FeignOkHttpClients.newRefreshingOkHttpClient(trustContext, proxySelector, userAgent)); return Feign.builder() .contract(contract) .encoder(encoder) .decoder(decoder) .errorDecoder(errorDecoder) .client(client) .retryer(failoverFeignTarget) .options(feignOptions) .target(failoverFeignTarget); }
private void simulateRequest(FailoverFeignTarget target) { // This method is called as a part of a request being invoked. // We need to update the mostRecentServerIndex, for the FailoverFeignTarget to track failures properly. target.url(); }
private void pauseForBackoff(RetryableException ex) { double exponentialPauseTime = Math.pow( GOLDEN_RATIO, numSwitches.get() * failuresBeforeSwitching + failuresSinceLastSwitch.get()); long cappedPauseTime = Math.min(maxBackoffMillis, Math.round(exponentialPauseTime)); // We use the Full Jitter (https://www.awsarchitectureblog.com/2015/03/backoff.html). // We prioritize a low server load over completion time. long pauseTimeWithJitter = ThreadLocalRandom.current().nextLong(cappedPauseTime); pauseForBackoff(ex, pauseTimeWithJitter); }
public Client wrapClient(final Client client) { return (request, options) -> { Response response = client.execute(request, options); if (response.status() >= 200 && response.status() < 300) { sucessfulCall(); } return response; }; } }
@Test public void rethrowsExceptionWithoutRetryAfterWhenLimitExceeded() { assertThatThrownBy(() -> { for (int i = 0; i < FAILOVERS; i++) { simulateRequest(normalTarget); normalTarget.continueOrPropagate(EXCEPTION_WITHOUT_RETRY_AFTER); } }).isEqualTo(EXCEPTION_WITHOUT_RETRY_AFTER); }
@Test public void exceptionsWithoutRetryAfterBackoffExponentially() { int numIterations = 10; for (int i = 0; i < numIterations; i++) { simulateRequest(spiedTarget); spiedTarget.continueOrPropagate(EXCEPTION_WITHOUT_RETRY_AFTER); int expectedNumOfCalls = i + 1; long cap = Math.round(Math.pow(GOLDEN_RATIO, expectedNumOfCalls)); verify(spiedTarget, times(expectedNumOfCalls)) .pauseForBackoff(any(), longThat(isWithinBounds(0L, cap))); } }
private static <T> T createProxyWithFailover( Optional<TrustContext> trustContext, Optional<ProxySelector> proxySelector, Collection<String> endpointUris, Request.Options feignOptions, int maxBackoffMillis, Class<T> type, String userAgent) { FailoverFeignTarget<T> failoverFeignTarget = new FailoverFeignTarget<>(endpointUris, maxBackoffMillis, type); Client client = failoverFeignTarget.wrapClient( FeignOkHttpClients.newRefreshingOkHttpClient(trustContext, proxySelector, userAgent)); return Feign.builder() .contract(contract) .encoder(encoder) .decoder(decoder) .errorDecoder(errorDecoder) .client(client) .retryer(failoverFeignTarget) .options(feignOptions) .target(failoverFeignTarget); }
@Override public Request apply(RequestTemplate input) { if (input.url().indexOf("http") != 0) { input.insert(0, url()); } return input.request(); }
private void pauseForBackoff(RetryableException ex) { double exponentialPauseTime = Math.pow( GOLDEN_RATIO, numSwitches.get() * failuresBeforeSwitching + failuresSinceLastSwitch.get()); long cappedPauseTime = Math.min(maxBackoffMillis, Math.round(exponentialPauseTime)); // We use the Full Jitter (https://www.awsarchitectureblog.com/2015/03/backoff.html). // We prioritize a low server load over completion time. long pauseTimeWithJitter = ThreadLocalRandom.current().nextLong(cappedPauseTime); pauseForBackoff(ex, pauseTimeWithJitter); }
public Client wrapClient(final Client client) { return (request, options) -> { Response response = client.execute(request, options); if (response.status() >= 200 && response.status() < 300) { sucessfulCall(); } return response; }; } }
@Override public void continueOrPropagate(RetryableException ex) { ExceptionRetryBehaviour retryBehaviour = ExceptionRetryBehaviour.getRetryBehaviourForException(ex); synchronized (this) { // Only fail over if this failure was to the current server. // This means that no one on another thread has failed us over already. if (mostRecentServerIndex.get() != null && mostRecentServerIndex.get() == failoverCount.get()) { long failures = failuresSinceLastSwitch.incrementAndGet(); if (shouldSwitchNode(retryBehaviour, failures)) { failoverToNextNode(retryBehaviour); } else if (retryBehaviour.shouldRetryInfinitelyManyTimes()) { failuresSinceLastSwitch.set(0); } } } checkAndHandleFailure(ex); if (retryBehaviour.shouldBackoffAndTryOtherNodes()) { int numFailovers = failoverCount.get(); if (numFailovers > 0 && numFailovers % servers.size() == 0) { // We implement some randomness around the expected value of BACKOFF_BEFORE_ROUND_ROBIN_RETRY_MILLIS. // Even though this is not exponential backoff, should be enough to avoid a thundering herd problem. long pauseTimeWithJitter = ThreadLocalRandom.current() .nextLong(BACKOFF_BEFORE_ROUND_ROBIN_RETRY_MILLIS / 2, (BACKOFF_BEFORE_ROUND_ROBIN_RETRY_MILLIS * 3) / 2); pauseForBackoff(ex, pauseTimeWithJitter); } } else { pauseForBackoff(ex); } }
@Test public void failsOverMultipleTimesWithFailingLeader() { String initialUrl = normalTarget.url(); for (int i = 0; i < FAILOVERS; i++) { // The 'leader' is the initial node, and fails with non fast-failover exceptions (so without retry after). // The other nodes fail with retry afters. normalTarget.continueOrPropagate( normalTarget.url().equals(initialUrl) ? EXCEPTION_WITHOUT_RETRY_AFTER : EXCEPTION_WITH_RETRY_AFTER); } }
@Test public void exceptionsWithRetryAfterBacksOffAfterQueryingAllNodesInTheCluster() { for (int i = 0; i < CLUSTER_SIZE; i++) { simulateRequest(spiedTarget); spiedTarget.continueOrPropagate(EXCEPTION_WITH_RETRY_AFTER); } verify(spiedTarget, times(1)) .pauseForBackoff(any(), longThat(isWithinBounds(LOWER_BACKOFF_BOUND, UPPER_BACKOFF_BOUND))); }
@Override public Request apply(RequestTemplate input) { if (input.url().indexOf("http") != 0) { input.insert(0, url()); } return input.request(); }
@Test public void doesNotFailOverOnMultipleBlockingTimeoutExceptions() { String initialUrl = normalTarget.url(); for (int i = 0; i < FAILOVERS; i++) { normalTarget.continueOrPropagate(BLOCKING_TIMEOUT_EXCEPTION); assertThat(normalTarget.url()).isEqualTo(initialUrl); } }
@Test public void multipleExceptionsWithRetryAfterBackOffAfterQueryingAllNodesInTheCluster() { for (int i = 0; i < 3 * CLUSTER_SIZE; i++) { simulateRequest(spiedTarget); spiedTarget.continueOrPropagate(EXCEPTION_WITH_RETRY_AFTER); } verify(spiedTarget, times(3)) .pauseForBackoff(any(), longThat(isWithinBounds(LOWER_BACKOFF_BOUND, UPPER_BACKOFF_BOUND))); }