@Override public int waitFor() throws InterruptedException { RetryStrategy retry = RetryStrategies.fixDelay(100, TimeUnit.MILLISECONDS); return Retries.supplyWithRetries(this::exitValue, retry, IllegalThreadStateException.class::isInstance); }
@Override public int waitFor() throws InterruptedException { RetryStrategy retry = RetryStrategies.fixDelay(100, TimeUnit.MILLISECONDS); return Retries.supplyWithRetries(this::exitValue, retry, IllegalThreadStateException.class::isInstance); }
/** * delete dataset instance with retry if dataset service is not available. returns true if deletion is successful * @throws Exception; we throw interrupted exception when we are stopping */ static Void deleteInstanceWithRetry(final DatasetFramework datasetFramework, final DatasetId datasetId) throws Exception { return Retries.callWithRetries(new Retries.Callable<Void, Exception>() { @Override public Void call() throws Exception { if (!stopping) { datasetFramework.deleteInstance(datasetId); return null; } else { throw new InterruptedException( String.format("Giving up delete dataset retry for %s, as we are stopping", datasetId.getDataset())); } } }, RetryStrategies.fixDelay(1, TimeUnit.SECONDS)); }
/** * perform hasInstance check for datasetId with retry, we retry on ServiceUnavailable Exception with a * sleep in-between * @return hasInstance or not * @throws Exception; we throw interrupted exception when we are stopping */ @Nullable static boolean hasInstanceWithRetry(final DatasetFramework datasetFramework, final DatasetId datasetId) throws Exception { return Retries.callWithRetries(new Retries.Callable<Boolean, Exception>() { @Override public Boolean call() throws Exception { if (!stopping) { return datasetFramework.hasInstance(datasetId); } else { throw new InterruptedException( String.format("Giving up hasInstance dataset retry for %s, as we are stopping", datasetId.getDataset())); } } }, RetryStrategies.fixDelay(1, TimeUnit.SECONDS)); }
/** * perform getDataset datasetId with retry, we retry on ServiceUnavailable exception, * returns once dataset is available * sleep in-between * @return MetricsTable * @throws Exception; we throw interrupted exception when we are stopping */ @Nullable static MetricsTable getDatasetWithRetry(final DatasetFramework datasetFramework, final DatasetId datasetId) throws Exception { return Retries.callWithRetries(new Retries.Callable<MetricsTable, Exception>() { @Override public MetricsTable call() throws Exception { if (!stopping) { return datasetFramework.getDataset(datasetId, Collections.<String, String>emptyMap(), null); } else { throw new InterruptedException( String.format("Giving up get dataset retry for %s, as we are stopping", datasetId.getDataset())); } } }, RetryStrategies.fixDelay(1, TimeUnit.SECONDS)); }
@Override public int waitFor(long timeout, TimeUnit unit) throws TimeoutException, InterruptedException { RetryStrategy retry = RetryStrategies.timeLimit(timeout, unit, RetryStrategies.fixDelay(100, TimeUnit.MILLISECONDS)); try { return Retries.supplyWithRetries(this::exitValue, retry, IllegalThreadStateException.class::isInstance); } catch (IllegalThreadStateException e) { throw new TimeoutException("Process is still running"); } }
@Override public int waitFor(long timeout, TimeUnit unit) throws TimeoutException, InterruptedException { RetryStrategy retry = RetryStrategies.timeLimit(timeout, unit, RetryStrategies.fixDelay(100, TimeUnit.MILLISECONDS)); try { return Retries.supplyWithRetries(this::exitValue, retry, IllegalThreadStateException.class::isInstance); } catch (IllegalThreadStateException e) { throw new TimeoutException("Process is still running"); } }
private void deleteLocalDataset(final String namespaceName, final String datasetName, Set<String> activeRuns, Map<String, String> properties) throws Exception { String[] split = datasetName.split("\\."); String runId = split[split.length - 1]; if (activeRuns.contains(runId) || Boolean.parseBoolean(properties.get(Constants.AppFabric.WORKFLOW_KEEP_LOCAL))) { return; } final DatasetId datasetId = new DatasetId(namespaceName, datasetName); try { Retries.callWithRetries(new Retries.Callable<Void, Exception>() { @Override public Void call() throws Exception { datasetFramework.deleteInstance(datasetId); LOG.info("Deleted local dataset instance {}", datasetId); return null; } }, RetryStrategies.fixDelay(Constants.Retry.LOCAL_DATASET_OPERATION_RETRY_DELAY_SECONDS, TimeUnit.SECONDS)); } catch (Exception e) { LOG.warn("Failed to delete the Workflow local dataset instance {}", datasetId, e); } } }
private RetryStrategy getRetryStrategy() { if (isRunning()) { return retryStrategy; } // If failure happen during shutdown, use a retry strategy that only retry fixed number of times return RetryStrategies.timeLimit(5, TimeUnit.SECONDS, RetryStrategies.fixDelay(200, TimeUnit.MILLISECONDS)); } }
private RetryStrategy getRetryStrategy() { if (isRunning()) { return retryStrategy; } // If failure happen during shutdown, use a retry strategy that only retry fixed number of times return RetryStrategies.timeLimit(5, TimeUnit.SECONDS, RetryStrategies.fixDelay(200, TimeUnit.MILLISECONDS)); } }
private boolean isBootstrappedWithRetries() { return Retries.callWithRetries(this::isBootstrapped, RetryStrategies.fixDelay(6, TimeUnit.SECONDS), t -> { // don't retry if we were interrupted, or if the service is not running // normally this is only called when the service is starting, but it can be running in unit test State serviceState = state(); if (serviceState != State.STARTING && serviceState != State.RUNNING) { return false; } if (t instanceof InterruptedException) { return false; } // Otherwise always retry, but log unexpected types of failures // We expect things like SocketTimeoutException or ConnectException // when talking to Dataset Service during startup Throwable rootCause = Throwables.getRootCause(t); if (!(rootCause instanceof SocketTimeoutException || rootCause instanceof ConnectException)) { SAMPLING_LOG.warn("Error checking bootstrap state. " + "Bootstrap steps will not be run until state can be checked.", t); } return true; }); } }
@Inject RemotePluginFinder(CConfiguration cConf, DiscoveryServiceClient discoveryServiceClient, AuthenticationContext authenticationContext, LocationFactory locationFactory) { this.remoteClient = new RemoteClient(discoveryServiceClient, Constants.Service.APP_FABRIC_HTTP, new DefaultHttpRequestConfig(false), String.format("%s", Constants.Gateway.API_VERSION_3)); this.authorizationEnabled = cConf.getBoolean(Constants.Security.Authorization.ENABLED); this.authenticationContext = authenticationContext; this.locationFactory = locationFactory; this.retryStrategy = RetryStrategies.limit(30, RetryStrategies.fixDelay(2, TimeUnit.SECONDS)); }
private void deleteLocalDatasets() { for (final Map.Entry<String, String> entry : datasetFramework.getDatasetNameMapping().entrySet()) { if (keepLocal(entry.getKey())) { continue; } final String localInstanceName = entry.getValue(); final DatasetId instanceId = new DatasetId(workflowRunId.getNamespace(), localInstanceName); LOG.debug("Deleting Workflow local dataset instance: {}", localInstanceName); try { Retries.runWithRetries(() -> datasetFramework.deleteInstance(instanceId), RetryStrategies.fixDelay(Constants.Retry.LOCAL_DATASET_OPERATION_RETRY_DELAY_SECONDS, TimeUnit.SECONDS)); } catch (Exception e) { LOG.warn("Failed to delete the Workflow local dataset instance {}", localInstanceName, e); } } }
@Inject RemotePluginFinder(CConfiguration cConf, DiscoveryServiceClient discoveryServiceClient, AuthenticationContext authenticationContext, LocationFactory locationFactory) { this.remoteClient = new RemoteClient(discoveryServiceClient, Constants.Service.APP_FABRIC_HTTP, new DefaultHttpRequestConfig(false), String.format("%s", Constants.Gateway.API_VERSION_3)); this.authorizationEnabled = cConf.getBoolean(Constants.Security.Authorization.ENABLED); this.authenticationContext = authenticationContext; this.locationFactory = locationFactory; this.retryStrategy = RetryStrategies.limit(30, RetryStrategies.fixDelay(2, TimeUnit.SECONDS)); }
private void deleteLocalDatasets() { for (final Map.Entry<String, String> entry : datasetFramework.getDatasetNameMapping().entrySet()) { if (keepLocal(entry.getKey())) { continue; } final String localInstanceName = entry.getValue(); final DatasetId instanceId = new DatasetId(workflowRunId.getNamespace(), localInstanceName); LOG.debug("Deleting Workflow local dataset instance: {}", localInstanceName); try { Retries.runWithRetries(() -> datasetFramework.deleteInstance(instanceId), RetryStrategies.fixDelay(Constants.Retry.LOCAL_DATASET_OPERATION_RETRY_DELAY_SECONDS, TimeUnit.SECONDS)); } catch (Exception e) { LOG.warn("Failed to delete the Workflow local dataset instance {}", localInstanceName, e); } } }
@Test public void testRetrySucceed() throws InterruptedException { CountDownLatch startLatch = new CountDownLatch(1); Service service = new RetryOnStartFailureService( createServiceSupplier(3, startLatch, new CountDownLatch(1), false), RetryStrategies.fixDelay(10, TimeUnit.MILLISECONDS)); service.startAndWait(); Assert.assertTrue(startLatch.await(1, TimeUnit.SECONDS)); }
@Test public void testStopWhileRetrying() throws InterruptedException { // This test the service can be stopped during failure retry CountDownLatch failureLatch = new CountDownLatch(1); Service service = new RetryOnStartFailureService( createServiceSupplier(1000, new CountDownLatch(1), failureLatch, false), RetryStrategies.fixDelay(10, TimeUnit.MILLISECONDS)); service.startAndWait(); Assert.assertTrue(failureLatch.await(1, TimeUnit.SECONDS)); service.stopAndWait(); }
@Test public void testFailureRetry() throws InterruptedException { CountDownLatch latch = new CountDownLatch(3); Service service = new AbstractRetryableScheduledService(RetryStrategies.fixDelay(1L, TimeUnit.MILLISECONDS)) { private int failureCount = 5; @Override protected long runTask() throws Exception { if (--failureCount % 2 == 0) { throw new Exception("Task failed"); } latch.countDown(); return 1L; } }; service.start(); Assert.assertTrue(latch.await(3, TimeUnit.SECONDS)); service.stopAndWait(); } }
@Test public void testRetryFail() throws InterruptedException { CountDownLatch startLatch = new CountDownLatch(1); Service service = new RetryOnStartFailureService( createServiceSupplier(1000, startLatch, new CountDownLatch(1), false), RetryStrategies.limit(10, RetryStrategies.fixDelay(10, TimeUnit.MILLISECONDS))); final CountDownLatch failureLatch = new CountDownLatch(1); service.addListener(new ServiceListenerAdapter() { @Override public void failed(Service.State from, Throwable failure) { failureLatch.countDown(); } }, Threads.SAME_THREAD_EXECUTOR); service.start(); Assert.assertTrue(failureLatch.await(1, TimeUnit.SECONDS)); Assert.assertFalse(startLatch.await(100, TimeUnit.MILLISECONDS)); }
@Test public void testNoRetry() throws InterruptedException, ExecutionException, TimeoutException { Service service = new AbstractRetryableScheduledService(RetryStrategies.fixDelay(10L, TimeUnit.MILLISECONDS)) { @Override protected long runTask() throws Exception { throw new Exception("Task failed"); } @Override protected boolean shouldRetry(Exception ex) { return false; } }; service.start(); // Wait for the service to fail Tasks.waitFor(Service.State.FAILED, service::state, 5, TimeUnit.SECONDS, 10, TimeUnit.MILLISECONDS); try { service.stopAndWait(); } catch (Exception e) { // The root cause should be the one throw from the runTask. Throwable rootCause = Throwables.getRootCause(e); Assert.assertEquals("Task failed", rootCause.getMessage()); } }