@Override public void shutdown() { ExecutorUtils.gracefulShutdown(restClusterClientConfiguration.getRetryDelay(), TimeUnit.MILLISECONDS, retryExecutorService); this.restClient.shutdown(Time.seconds(5)); ExecutorUtils.gracefulShutdown(5, TimeUnit.SECONDS, this.executorService); try { webMonitorRetrievalService.stop(); } catch (Exception e) { log.error("An error occurred during stopping the webMonitorRetrievalService", e); } try { dispatcherRetrievalService.stop(); } catch (Exception e) { log.error("An error occurred during stopping the dispatcherLeaderRetriever", e); } try { // we only call this for legacy reasons to shutdown components that are started in the ClusterClient constructor super.shutdown(); } catch (Exception e) { log.error("An error occurred during the client shutdown.", e); } }
@Override public void start() throws Exception { synchronized (startupShutdownLock) { leaderRetrievalService.start(retriever); long delay = backPressureStatsTrackerImpl.getCleanUpInterval(); // Scheduled back pressure stats tracker cache cleanup. We schedule // this here repeatedly, because cache clean up only happens on // interactions with the cache. We need it to make sure that we // don't leak memory after completed jobs or long ago accessed stats. netty.getBootstrap().childGroup().scheduleWithFixedDelay(new Runnable() { @Override public void run() { try { backPressureStatsTrackerImpl.cleanUpOperatorStatsCache(); } catch (Throwable t) { LOG.error("Error during back pressure stats cache cleanup.", t); } } }, delay, delay, TimeUnit.MILLISECONDS); } }
@Override public void stop() throws Exception { synchronized (startupShutdownLock) { executionGraphCleanupTask.cancel(false); executionGraphCache.close(); leaderRetrievalService.stop(); netty.shutdown(); stackTraceSamples.shutDown(); backPressureStatsTrackerImpl.shutDown(); cleanup(); } }
@Test public void testClosingReportsToLeader() throws Exception { final Configuration flinkConfig = new Configuration(); try (YarnHighAvailabilityServices services = new YarnIntraNonHaMasterServices(flinkConfig, hadoopConfig)) { final LeaderElectionService elector = services.getResourceManagerLeaderElectionService(); final LeaderRetrievalService retrieval = services.getResourceManagerLeaderRetriever(); final LeaderContender contender = mockContender(elector); final LeaderRetrievalListener listener = mock(LeaderRetrievalListener.class); elector.start(contender); retrieval.start(listener); // wait until the contender has become the leader verify(listener, timeout(1000L).times(1)).notifyLeaderAddress(anyString(), any(UUID.class)); // now we can close the election service services.close(); verify(contender, timeout(1000L).times(1)).handleError(any(Exception.class)); } }
@Override public void postStop() { try { leaderRetrievalService.stop(); } catch (Exception e) { LOG.warn("Could not properly stop the leader retrieval service."); } }
public static InetAddress findConnectingAddress( LeaderRetrievalService leaderRetrievalService, FiniteDuration timeout) throws LeaderRetrievalException { ConnectionUtils.LeaderConnectingAddressListener listener = new ConnectionUtils.LeaderConnectingAddressListener(); try { leaderRetrievalService.start(listener); LOG.info("Trying to select the network interface and address to use " + "by connecting to the leading JobManager."); LOG.info("TaskManager will try to connect for " + timeout + " before falling back to heuristics"); return listener.findConnectingAddress(timeout); } catch (Exception e) { throw new LeaderRetrievalException("Could not find the connecting address by " + "connecting to the current leader.", e); } finally { try { leaderRetrievalService.stop(); } catch (Exception fe) { LOG.warn("Could not stop the leader retrieval service.", fe); } } }
@Override public void postStop() { try { leaderRetriever.stop(); } catch (Throwable t) { LOG.error("Could not cleanly shut down leader retrieval service", t); } }
public static InetAddress findConnectingAddress( LeaderRetrievalService leaderRetrievalService, FiniteDuration timeout) throws LeaderRetrievalException { ConnectionUtils.LeaderConnectingAddressListener listener = new ConnectionUtils.LeaderConnectingAddressListener(); try { leaderRetrievalService.start(listener); LOG.info("Trying to select the network interface and address to use " + "by connecting to the leading JobManager."); LOG.info("TaskManager will try to connect for " + timeout + " before falling back to heuristics"); return listener.findConnectingAddress(timeout); } catch (Exception e) { throw new LeaderRetrievalException("Could not find the connecting address by " + "connecting to the current leader.", e); } finally { try { leaderRetrievalService.stop(); } catch (Exception fe) { LOG.warn("Could not stop the leader retrieval service.", fe); } } }
@Override public void postStop() { try { leaderRetrievalService.stop(); } catch (Exception e) { LOG.warn("Could not properly stop the leader retrieval service."); } }
public static InetAddress findConnectingAddress( LeaderRetrievalService leaderRetrievalService, FiniteDuration timeout) throws LeaderRetrievalException { ConnectionUtils.LeaderConnectingAddressListener listener = new ConnectionUtils.LeaderConnectingAddressListener(); try { leaderRetrievalService.start(listener); LOG.info("Trying to select the network interface and address to use " + "by connecting to the leading JobManager."); LOG.info("TaskManager will try to connect for " + timeout + " before falling back to heuristics"); return listener.findConnectingAddress(timeout); } catch (Exception e) { throw new LeaderRetrievalException("Could not find the connecting address by " + "connecting to the current leader.", e); } finally { try { leaderRetrievalService.stop(); } catch (Exception fe) { LOG.warn("Could not stop the leader retrieval service.", fe); } } }
@Override public void preStart() { try { leaderRetrievalService.start(this); } catch (Exception e) { LOG.error("Could not start the leader retrieval service."); throw new RuntimeException("Could not start the leader retrieval service.", e); } }
@Override public void postStop() { try { leaderRetriever.stop(); } catch (Throwable t) { LOG.error("Could not cleanly shut down leader retrieval service", t); } }
public static InetAddress findConnectingAddress( LeaderRetrievalService leaderRetrievalService, FiniteDuration timeout) throws LeaderRetrievalException { ConnectionUtils.LeaderConnectingAddressListener listener = new ConnectionUtils.LeaderConnectingAddressListener(); try { leaderRetrievalService.start(listener); LOG.info("Trying to select the network interface and address to use " + "by connecting to the leading JobManager."); LOG.info("TaskManager will try to connect for " + timeout + " before falling back to heuristics"); return listener.findConnectingAddress(timeout); } catch (Exception e) { throw new LeaderRetrievalException("Could not find the connecting address by " + "connecting to the current leader.", e); } finally { try { leaderRetrievalService.stop(); } catch (Exception fe) { LOG.warn("Could not stop the leader retrieval service.", fe); } } }
@Override public void preStart() { try { leaderRetrievalService.start(this); } catch (Exception e) { LOG.error("Could not start the leader retrieval service."); throw new RuntimeException("Could not start the leader retrieval service.", e); } }
@Override public void postStop() { try { leaderRetrievalService.stop(); } catch (Exception e) { LOG.warn("Could not properly stop the leader retrieval service."); } }