private String getAppAttemptFailedDiagnostics(RMAppEvent event) { String msg = null; RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event; if (this.submissionContext.getUnmanagedAM()) { // RM does not manage the AM. Do not retry msg = "Unmanaged application " + this.getApplicationId() + " failed due to " + failedEvent.getDiagnosticMsg() + ". Failing the application."; } else if (this.isNumAttemptsBeyondThreshold) { msg = "Application " + this.getApplicationId() + " failed " + this.maxAppAttempts + " times due to " + failedEvent.getDiagnosticMsg() + ". Failing the application."; } return msg; }
private String getAppAttemptFailedDiagnostics(RMAppEvent event) { String msg = null; RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event; if (this.submissionContext.getUnmanagedAM()) { // RM does not manage the AM. Do not retry msg = "Unmanaged application " + this.getApplicationId() + " failed due to " + failedEvent.getDiagnosticMsg() + ". Failing the application."; } else if (this.isNumAttemptsBeyondThreshold) { msg = "Application " + this.getApplicationId() + " failed " + this.maxAppAttempts + " times due to " + failedEvent.getDiagnosticMsg() + ". Failing the application."; } return msg; }
public void updateApplicationTimeout( Map<ApplicationTimeoutType, Long> updateTimeout) { this.writeLock.lock(); try { if (COMPLETED_APP_STATES.contains(getState())) { return; } // update monitoring service this.rmContext.getRMAppLifetimeMonitor() .updateApplicationTimeouts(getApplicationId(), updateTimeout); this.applicationTimeouts.putAll(updateTimeout); } finally { this.writeLock.unlock(); } }
private String getAppAttemptFailedDiagnostics(RMAppEvent event) { String msg = null; RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event; if (this.submissionContext.getUnmanagedAM()) { // RM does not manage the AM. Do not retry msg = "Unmanaged application " + this.getApplicationId() + " failed due to " + failedEvent.getDiagnosticMsg() + ". Failing the application."; } else if (this.isNumAttemptsBeyondThreshold) { int globalLimit = conf.getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS); msg = String.format( "Application %s failed %d times%s%s due to %s. Failing the application.", getApplicationId(), maxAppAttempts, (attemptFailuresValidityInterval <= 0 ? "" : (" in previous " + attemptFailuresValidityInterval + " milliseconds")), (globalLimit == maxAppAttempts) ? "" : (" (global limit =" + globalLimit + "; local limit is =" + maxAppAttempts + ")"), failedEvent.getDiagnosticMsg()); } return msg; }
@Override public void transition(RMAppImpl app, RMAppEvent event) { if(app.launchTime == 0) { LOG.info("update the launch time for applicationId: "+ app.getApplicationId()+", attemptId: "+ app.getCurrentAppAttempt().getAppAttemptId()+ "launchTime: "+event.getTimestamp()); app.launchTime = event.getTimestamp(); } } }
@Override public void recover(RMState state) { ApplicationStateData appState = state.getApplicationState().get(getApplicationId()); this.recoveredFinalState = appState.getState(); LOG.info("Recovering app: " + getApplicationId() + " with " + + appState.getAttemptCount() + " attempts and final state = " + this.recoveredFinalState ); this.diagnostics.append(appState.getDiagnostics()); this.storedFinishTime = appState.getFinishTime(); this.startTime = appState.getStartTime(); for(int i=0; i<appState.getAttemptCount(); ++i) { // create attempt createNewAttempt(); ((RMAppAttemptImpl)this.currentAttempt).recover(state); } }
@Override public void recover(RMState state) { ApplicationStateData appState = state.getApplicationState().get(getApplicationId()); this.recoveredFinalState = appState.getState(); LOG.info("Recovering app: " + getApplicationId() + " with " + + appState.getAttemptCount() + " attempts and final state = " + this.recoveredFinalState ); this.diagnostics.append(appState.getDiagnostics()); this.storedFinishTime = appState.getFinishTime(); this.startTime = appState.getStartTime(); for(int i=0; i<appState.getAttemptCount(); ++i) { // create attempt createNewAttempt(); ((RMAppAttemptImpl)this.currentAttempt).recover(state); } }
@Override public RMAppState transition(RMAppImpl app, RMAppEvent event) { Map<ApplicationTimeoutType, Long> timeouts = app.submissionContext.getApplicationTimeouts(); if (timeouts != null && timeouts.size() > 0) { app.rmContext.getRMAppLifetimeMonitor() .unregisterApp(app.getApplicationId(), timeouts.keySet()); } if (app.transitionTodo instanceof SingleArcTransition) { ((SingleArcTransition) app.transitionTodo).transition(app, app.eventCausingFinalSaving); } else if (app.transitionTodo instanceof MultipleArcTransition) { ((MultipleArcTransition) app.transitionTodo).transition(app, app.eventCausingFinalSaving); } return app.targetedFinalState; } }
private RMAppImpl createMockRMApp(ApplicationAttemptId attemptId, RMContext context) { RMAppImpl app = mock(RMAppImpl.class); when(app.getApplicationId()).thenReturn(attemptId.getApplicationId()); RMAppAttemptImpl attempt = mock(RMAppAttemptImpl.class); when(attempt.getAppAttemptId()).thenReturn(attemptId); RMAppAttemptMetrics attemptMetric = mock(RMAppAttemptMetrics.class); when(attempt.getRMAppAttemptMetrics()).thenReturn(attemptMetric); when(app.getCurrentAppAttempt()).thenReturn(attempt); context.getRMApps().putIfAbsent(attemptId.getApplicationId(), app); return app; }
private RMAppImpl createMockRMApp(ApplicationAttemptId attemptId, RMContext context) { RMAppImpl app = mock(RMAppImpl.class); when(app.getApplicationId()).thenReturn(attemptId.getApplicationId()); RMAppAttemptImpl attempt = mock(RMAppAttemptImpl.class); when(attempt.getAppAttemptId()).thenReturn(attemptId); RMAppAttemptMetrics attemptMetric = mock(RMAppAttemptMetrics.class); when(attempt.getRMAppAttemptMetrics()).thenReturn(attemptMetric); when(app.getCurrentAppAttempt()).thenReturn(attempt); context.getRMApps().putIfAbsent(attemptId.getApplicationId(), app); return app; } }
private RMAppImpl createMockRMApp(ApplicationAttemptId attemptId, RMContext context) { RMAppImpl app = mock(RMAppImpl.class); when(app.getApplicationId()).thenReturn(attemptId.getApplicationId()); RMAppAttemptImpl attempt = mock(RMAppAttemptImpl.class); when(attempt.getAppAttemptId()).thenReturn(attemptId); RMAppAttemptMetrics attemptMetric = mock(RMAppAttemptMetrics.class); when(attempt.getRMAppAttemptMetrics()).thenReturn(attemptMetric); when(app.getCurrentAppAttempt()).thenReturn(attempt); ApplicationSubmissionContext submissionContext = mock(ApplicationSubmissionContext.class); when(submissionContext.getUnmanagedAM()).thenReturn(false); when(attempt.getSubmissionContext()).thenReturn(submissionContext); context.getRMApps().putIfAbsent(attemptId.getApplicationId(), app); return app; }
private void removeExcessAttempts(RMAppImpl app) { while (app.nextAttemptId - app.firstAttemptIdInStateStore > app.maxAppAttempts) { // attempts' first element is oldest attempt because it is a // LinkedHashMap ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance( app.getApplicationId(), app.firstAttemptIdInStateStore); RMAppAttempt rmAppAttempt = app.getRMAppAttempt(attemptId); long endTime = app.systemClock.getTime(); if (rmAppAttempt.getFinishTime() < (endTime - app.attemptFailuresValidityInterval)) { app.firstAttemptIdInStateStore++; LOG.info("Remove attempt from state store : " + attemptId); app.rmContext.getStateStore().removeApplicationAttempt(attemptId); } else { break; } } } }
@Override public void handle(RMAppEvent event) { assertEquals(application.getApplicationId(), event.getApplicationId()); if (event instanceof RMAppFailedAttemptEvent) { transferStateFromPreviousAttempt = ((RMAppFailedAttemptEvent) event) .getTransferStateFromPreviousAttempt(); } try { application.handle(event); } catch (Throwable t) { LOG.error("Error in handling event type " + event.getType() + " for application " + application.getApplicationId(), t); } } }
@Override public void handle(RMAppEvent event) { assertEquals(application.getApplicationId(), event.getApplicationId()); if (event instanceof RMAppFailedAttemptEvent) { transferStateFromPreviousAttempt = ((RMAppFailedAttemptEvent) event) .getTransferStateFromPreviousAttempt(); } try { application.handle(event); } catch (Throwable t) { LOG.error("Error in handling event type " + event.getType() + " for application " + application.getApplicationId(), t); } } }
private ApplicationAttemptId appHelper(MockRM rm, CapacityScheduler cs, int clusterTs, int appId, String queue, String user) { ApplicationId appId1 = BuilderUtils.newApplicationId(clusterTs, appId); ApplicationAttemptId appAttemptId1 = BuilderUtils.newApplicationAttemptId( appId1, appId); RMAppAttemptMetrics attemptMetric1 = new RMAppAttemptMetrics(appAttemptId1, rm.getRMContext()); RMAppImpl app1 = mock(RMAppImpl.class); when(app1.getApplicationId()).thenReturn(appId1); RMAppAttemptImpl attempt1 = mock(RMAppAttemptImpl.class); Container container = mock(Container.class); when(attempt1.getMasterContainer()).thenReturn(container); ApplicationSubmissionContext submissionContext = mock( ApplicationSubmissionContext.class); when(attempt1.getSubmissionContext()).thenReturn(submissionContext); when(attempt1.getAppAttemptId()).thenReturn(appAttemptId1); when(attempt1.getRMAppAttemptMetrics()).thenReturn(attemptMetric1); when(app1.getCurrentAppAttempt()).thenReturn(attempt1); rm.getRMContext().getRMApps().put(appId1, app1); SchedulerEvent addAppEvent1 = new AppAddedSchedulerEvent(appId1, queue, user); cs.handle(addAppEvent1); SchedulerEvent addAttemptEvent1 = new AppAttemptAddedSchedulerEvent(appAttemptId1, false); cs.handle(addAttemptEvent1); return appAttemptId1; }
new RMAppAttemptMetrics(appAttemptId, rm.getRMContext()); RMAppImpl app = mock(RMAppImpl.class); when(app.getApplicationId()).thenReturn(appId); RMAppAttemptImpl attempt = mock(RMAppAttemptImpl.class); when(attempt.getAppAttemptId()).thenReturn(appAttemptId);
private void testUnmanagedAMSuccess(String url) { unmanagedAM = true; when(submissionContext.getUnmanagedAM()).thenReturn(true); // submit AM and check it goes to LAUNCHED state scheduleApplicationAttempt(); testAppAttemptLaunchedState(null); verify(amLivelinessMonitor, times(1)).register( applicationAttempt.getAppAttemptId()); // launch AM runApplicationAttempt(null, "host", 8042, url, true); // complete a container Container container = mock(Container.class); when(container.getNodeId()).thenReturn(NodeId.newInstance("host", 1234)); application.handle(new RMAppRunningOnNodeEvent(application.getApplicationId(), container.getNodeId())); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), mock(ContainerStatus.class), container.getNodeId())); // complete AM String diagnostics = "Successful"; FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; applicationAttempt.handle(new RMAppAttemptUnregistrationEvent( applicationAttempt.getAppAttemptId(), url, finalStatus, diagnostics)); testAppAttemptFinishedState(null, finalStatus, url, diagnostics, 1, true); assertFalse(transferStateFromPreviousAttempt); }
private void testUnmanagedAMSuccess(String url) { unmanagedAM = true; when(submissionContext.getUnmanagedAM()).thenReturn(true); // submit AM and check it goes to LAUNCHED state scheduleApplicationAttempt(); testAppAttemptLaunchedState(null, RMAppAttemptState.LAUNCHED); verify(amLivelinessMonitor, times(1)).register( applicationAttempt.getAppAttemptId()); // launch AM runApplicationAttempt(null, "host", 8042, url, true); // complete a container Container container = mock(Container.class); when(container.getNodeId()).thenReturn(NodeId.newInstance("host", 1234)); application.handle(new RMAppRunningOnNodeEvent(application.getApplicationId(), container.getNodeId())); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), mock(ContainerStatus.class), container.getNodeId())); // complete AM String diagnostics = "Successful"; FinalApplicationStatus finalStatus = FinalApplicationStatus.SUCCEEDED; applicationAttempt.handle(new RMAppAttemptUnregistrationEvent( applicationAttempt.getAppAttemptId(), url, finalStatus, diagnostics)); sendAttemptUpdateSavedEvent(applicationAttempt); testAppAttemptFinishedState(null, finalStatus, url, diagnostics, 1, true); assertFalse(transferStateFromPreviousAttempt); }
new RMAppRecoverEvent(application.getApplicationId(), rmState);
public void testRecoverApplication(ApplicationStateData appState, RMState rmState) throws Exception { ApplicationSubmissionContext submissionContext = appState.getApplicationSubmissionContext(); RMAppImpl application = new RMAppImpl( appState.getApplicationSubmissionContext().getApplicationId(), rmContext, conf, submissionContext.getApplicationName(), null, submissionContext.getQueue(), submissionContext, null, null, appState.getSubmitTime(), submissionContext.getApplicationType(), submissionContext.getApplicationTags(), BuilderUtils.newResourceRequest( RMAppAttemptImpl.AM_CONTAINER_PRIORITY, ResourceRequest.ANY, submissionContext.getResource(), 1)); Assert.assertEquals(RMAppState.NEW, application.getState()); RMAppEvent recoverEvent = new RMAppRecoverEvent(application.getApplicationId(), rmState); // Trigger RECOVER event. application.handle(recoverEvent); // Application final status looked from recoveredFinalStatus Assert.assertTrue("Application is not in recoveredFinalStatus.", RMAppImpl.isAppInFinalState(application)); rmDispatcher.await(); RMAppState finalState = appState.getState(); Assert.assertEquals("Application is not in finalState.", finalState, application.getState()); }