@Override public void start() throws Exception { synchronized (startupShutdownLock) { leaderRetrievalService.start(retriever); long delay = backPressureStatsTrackerImpl.getCleanUpInterval(); // Scheduled back pressure stats tracker cache cleanup. We schedule // this here repeatedly, because cache clean up only happens on // interactions with the cache. We need it to make sure that we // don't leak memory after completed jobs or long ago accessed stats. netty.getBootstrap().childGroup().scheduleWithFixedDelay(new Runnable() { @Override public void run() { try { backPressureStatsTrackerImpl.cleanUpOperatorStatsCache(); } catch (Throwable t) { LOG.error("Error during back pressure stats cache cleanup.", t); } } }, delay, delay, TimeUnit.MILLISECONDS); } }
@Test public void testClosingReportsToLeader() throws Exception { final Configuration flinkConfig = new Configuration(); try (YarnHighAvailabilityServices services = new YarnIntraNonHaMasterServices(flinkConfig, hadoopConfig)) { final LeaderElectionService elector = services.getResourceManagerLeaderElectionService(); final LeaderRetrievalService retrieval = services.getResourceManagerLeaderRetriever(); final LeaderContender contender = mockContender(elector); final LeaderRetrievalListener listener = mock(LeaderRetrievalListener.class); elector.start(contender); retrieval.start(listener); // wait until the contender has become the leader verify(listener, timeout(1000L).times(1)).notifyLeaderAddress(anyString(), any(UUID.class)); // now we can close the election service services.close(); verify(contender, timeout(1000L).times(1)).handleError(any(Exception.class)); } }
leaderRetrievalService.start(leaderListener);
leaderRetrievalService.start(leaderListener);
@Override public void preStart() { try { leaderRetrievalService.start(this); } catch (Exception e) { LOG.error("Could not start the leader retrieval service."); throw new RuntimeException("Could not start the leader retrieval service.", e); } }
@Override public void preStart() { try { leaderRetrievalService.start(this); } catch (Exception e) { LOG.error("Could not start the leader retrieval service."); throw new RuntimeException("Could not start the leader retrieval service.", e); } }
public void start() { try { leaderRetrievalService.start(this); } catch (Exception e) { LOG.error("Failed to start leader retrieval service", e); throw new RuntimeException(e); } }
@Override public void preStart() { try { leaderRetrievalService.start(this); } catch (Exception e) { LOG.error("Could not start the leader retrieval service."); throw new RuntimeException("Could not start the leader retrieval service.", e); } }
@Override public void preStart() { try { leaderRetrievalService.start(this); } catch (Exception e) { LOG.error("Could not start the leader retrieval service."); throw new RuntimeException("Could not start the leader retrieval service.", e); } }
private JobLeaderIdListener( JobID jobId, JobLeaderIdActions listenerJobLeaderIdActions, LeaderRetrievalService leaderRetrievalService) throws Exception { this.jobId = Preconditions.checkNotNull(jobId); this.listenerJobLeaderIdActions = Preconditions.checkNotNull(listenerJobLeaderIdActions); this.leaderRetrievalService = Preconditions.checkNotNull(leaderRetrievalService); leaderIdFuture = new CompletableFuture<>(); activateTimeout(); // start the leader service we're listening to leaderRetrievalService.start(this); }
private JobLeaderIdListener( JobID jobId, JobLeaderIdActions listenerJobLeaderIdActions, LeaderRetrievalService leaderRetrievalService) throws Exception { this.jobId = Preconditions.checkNotNull(jobId); this.listenerJobLeaderIdActions = Preconditions.checkNotNull(listenerJobLeaderIdActions); this.leaderRetrievalService = Preconditions.checkNotNull(leaderRetrievalService); leaderIdFuture = new CompletableFuture<>(); activateTimeout(); // start the leader service we're listening to leaderRetrievalService.start(this); }
private JobLeaderIdListener( JobID jobId, JobLeaderIdActions listenerJobLeaderIdActions, LeaderRetrievalService leaderRetrievalService) throws Exception { this.jobId = Preconditions.checkNotNull(jobId); this.listenerJobLeaderIdActions = Preconditions.checkNotNull(listenerJobLeaderIdActions); this.leaderRetrievalService = Preconditions.checkNotNull(leaderRetrievalService); leaderIdFuture = new CompletableFuture<>(); activateTimeout(); // start the leader service we're listening to leaderRetrievalService.start(this); }
private JobLeaderIdListener( JobID jobId, JobLeaderIdActions listenerJobLeaderIdActions, LeaderRetrievalService leaderRetrievalService) throws Exception { this.jobId = Preconditions.checkNotNull(jobId); this.listenerJobLeaderIdActions = Preconditions.checkNotNull(listenerJobLeaderIdActions); this.leaderRetrievalService = Preconditions.checkNotNull(leaderRetrievalService); leaderIdFuture = new FlinkCompletableFuture<>(); activateTimeout(); // start the leader service we're listening to leaderRetrievalService.start(this); }
private void startJobMasterServices() throws Exception { // start the slot pool make sure the slot pool now accepts messages for this leader slotPool.start(getFencingToken(), getAddress()); //TODO: Remove once the ZooKeeperLeaderRetrieval returns the stored address upon start // try to reconnect to previously known leader reconnectToResourceManager(new FlinkException("Starting JobMaster component.")); // job is ready to go, try to establish connection with resource manager // - activate leader retrieval for the resource manager // - on notification of the leader, the connection will be established and // the slot pool will start requesting slots resourceManagerLeaderRetriever.start(new ResourceManagerLeaderListener()); }
private void startJobMasterServices() throws Exception { // start the slot pool make sure the slot pool now accepts messages for this leader slotPool.start(getFencingToken(), getAddress()); //TODO: Remove once the ZooKeeperLeaderRetrieval returns the stored address upon start // try to reconnect to previously known leader reconnectToResourceManager(new FlinkException("Starting JobMaster component.")); // job is ready to go, try to establish connection with resource manager // - activate leader retrieval for the resource manager // - on notification of the leader, the connection will be established and // the slot pool will start requesting slots resourceManagerLeaderRetriever.start(new ResourceManagerLeaderListener()); }
private void startJobMasterServices() throws Exception { // start the slot pool make sure the slot pool now accepts messages for this leader slotPool.start(getFencingToken(), getAddress()); //TODO: Remove once the ZooKeeperLeaderRetrieval returns the stored address upon start // try to reconnect to previously known leader reconnectToResourceManager(new FlinkException("Starting JobMaster component.")); // job is ready to go, try to establish connection with resource manager // - activate leader retrieval for the resource manager // - on notification of the leader, the connection will be established and // the slot pool will start requesting slots resourceManagerLeaderRetriever.start(new ResourceManagerLeaderListener()); }
@Override public void start() throws Exception { super.start(); // start by connecting to the ResourceManager try { haServices.getResourceManagerLeaderRetriever().start(new ResourceManagerLeaderListener()); } catch (Exception e) { onFatalErrorAsync(e); } // tell the task slot table who's responsible for the task slot actions taskSlotTable.start(new SlotActionsImpl()); // start the job leader service jobLeaderService.start(getAddress(), getRpcService(), haServices, new JobLeaderListenerImpl()); }