@SuppressWarnings("unchecked") private void recoverContainer(RecoveredContainerState rcs) throws IOException { StartContainerRequest req = rcs.getStartRequest(); ContainerLaunchContext launchContext = req.getContainerLaunchContext(); ContainerTokenIdentifier token = BuilderUtils.newContainerTokenIdentifier(req.getContainerToken()); ContainerId containerId = token.getContainerID(); ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId(); LOG.info("Recovering " + containerId + " in state " + rcs.getStatus() + " with exit code " + rcs.getExitCode()); Application app = context.getApplications().get(appId); if (app != null) { Credentials credentials = YarnServerSecurityUtils.parseCredentials(launchContext); Container container = new ContainerImpl(getConfig(), dispatcher, req.getContainerLaunchContext(), credentials, metrics, token, context, rcs); context.getContainers().put(containerId, container); app.handle(new ApplicationContainerInitEvent(container)); } else { if (rcs.getStatus() != RecoveredContainerStatus.COMPLETED) { LOG.warn(containerId + " has no corresponding application!"); } LOG.info("Adding " + containerId + " to recently stopped containers"); nodeStatusUpdater.addCompletedContainer(containerId); } }
@SuppressWarnings("unchecked") private void recoverContainer(RecoveredContainerState rcs) throws IOException { StartContainerRequest req = rcs.getStartRequest(); ContainerLaunchContext launchContext = req.getContainerLaunchContext(); ContainerTokenIdentifier token = BuilderUtils.newContainerTokenIdentifier(req.getContainerToken()); ContainerId containerId = token.getContainerID(); ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId(); LOG.info("Recovering " + containerId + " in state " + rcs.getStatus() + " with exit code " + rcs.getExitCode()); if (context.getApplications().containsKey(appId)) { Credentials credentials = parseCredentials(launchContext); Container container = new ContainerImpl(getConfig(), dispatcher, req.getContainerLaunchContext(), credentials, metrics, token, rcs.getStatus(), rcs.getExitCode(), rcs.getDiagnostics(), rcs.getKilled(), context); context.getContainers().put(containerId, container); dispatcher.getEventHandler().handle( new ApplicationContainerInitEvent(container)); } else { if (rcs.getStatus() != RecoveredContainerStatus.COMPLETED) { LOG.warn(containerId + " has no corresponding application!"); } LOG.info("Adding " + containerId + " to recently stopped containers"); nodeStatusUpdater.addCompletedContainer(containerId); } }
private void waitForContainerToFinishOnNM(ContainerId containerId) { Context nmContet = yarnCluster.getNodeManager(0).getNMContext(); int interval = 4 * 60; // Max time for container token to expire. Assert.assertNotNull(nmContet.getContainers().containsKey(containerId)); while ((interval-- > 0) && !nmContet.getContainers().get(containerId) .cloneAndGetContainerStatus().getState() .equals(ContainerState.COMPLETE)) { try { LOG.info("Waiting for " + containerId + " to complete."); Thread.sleep(1000); } catch (InterruptedException e) { } } // Normally, Containers will be removed from NM context after they are // explicitly acked by RM. Now, manually remove it for testing. yarnCluster.getNodeManager(0).getNodeStatusUpdater() .addCompletedContainer(containerId); nmContet.getContainers().remove(containerId); }
@SuppressWarnings("unchecked") private void recoverContainer(RecoveredContainerState rcs) throws IOException { StartContainerRequest req = rcs.getStartRequest(); ContainerLaunchContext launchContext = req.getContainerLaunchContext(); ContainerTokenIdentifier token = BuilderUtils.newContainerTokenIdentifier(req.getContainerToken()); ContainerId containerId = token.getContainerID(); ApplicationId appId = containerId.getApplicationAttemptId().getApplicationId(); LOG.info("Recovering " + containerId + " in state " + rcs.getStatus() + " with exit code " + rcs.getExitCode()); if (context.getApplications().containsKey(appId)) { Credentials credentials = parseCredentials(launchContext); Container container = new ContainerImpl(getConfig(), dispatcher, req.getContainerLaunchContext(), credentials, metrics, token, rcs.getStatus(), rcs.getExitCode(), rcs.getDiagnostics(), rcs.getKilled(), context); context.getContainers().put(containerId, container); dispatcher.getEventHandler().handle( new ApplicationContainerInitEvent(container)); } else { if (rcs.getStatus() != RecoveredContainerStatus.COMPLETED) { LOG.warn(containerId + " has no corresponding application!"); } LOG.info("Adding " + containerId + " to recently stopped containers"); nodeStatusUpdater.addCompletedContainer(containerId); } }
private void waitForContainerToFinishOnNM(ContainerId containerId) throws InterruptedException { Context nmContext = yarnCluster.getNodeManager(0).getNMContext(); // Max time for container token to expire. final int timeout = 4 * 60 * 1000; // If the container is null, then it has already completed and been removed // from the Context by asynchronous calls. Container waitContainer = nmContext.getContainers().get(containerId); if (waitContainer != null) { try { LOG.info("Waiting for " + containerId + " to get to state " + ContainerState.COMPLETE); GenericTestUtils.waitFor(() -> ContainerState.COMPLETE.equals( waitContainer.cloneAndGetContainerStatus().getState()), 500, timeout); } catch (TimeoutException te) { LOG.error("TimeoutException", te); fail("Was waiting for " + containerId + " to get to state " + ContainerState.COMPLETE + " but was in state " + waitContainer.cloneAndGetContainerStatus().getState() + " after the timeout"); } } // Normally, Containers will be removed from NM context after they are // explicitly acked by RM. Now, manually remove it for testing. yarnCluster.getNodeManager(0).getNodeStatusUpdater() .addCompletedContainer(containerId); LOG.info("Removing container from NMContext, containerID = " + containerId); nmContext.getContainers().remove(containerId); }
nodeStatusUpdater.addCompletedContainer(containerId);