ConcurrentMap<ContainerId, Container> containers = nm.getNMContext().getContainers(); for (Map.Entry<ContainerId, Container> entry : containers.entrySet()) { String command = StringUtils.join(entry.getValue().getLaunchContext().getCommands(), " "); if (command.contains(YarnTaskExecutorRunner.class.getSimpleName())) { taskManagerContainer = entry.getKey();
private void waitForRecoveredContainers() throws InterruptedException { final int sleepMsec = 100; int waitIterations = 100; List<ContainerId> newContainers = new ArrayList<ContainerId>(); while (--waitIterations >= 0) { newContainers.clear(); for (Container container : context.getContainers().values()) { if (container.getContainerState() == org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.NEW) { newContainers.add(container.getContainerId()); } } if (newContainers.isEmpty()) { break; } LOG.info("Waiting for containers: " + newContainers); Thread.sleep(sleepMsec); } if (waitIterations < 0) { LOG.warn("Timeout waiting for recovered containers"); } }
protected Map<Path, List<String>> getLocalizedResources() throws YarnException { Map<Path, List<String>> localResources = container.getLocalizedResources(); if (localResources == null) { throw RPCUtil.getRemoteException( "Unable to get local resources when Container " + container + " is at " + container.getContainerState()); } return localResources; }
private void startContainer(Container container) { LOG.info("Starting container [" + container.getContainerId()+ "]"); // Skip to put into runningContainers and addUtilization when recover if (!runningContainers.containsKey(container.getContainerId())) { runningContainers.put(container.getContainerId(), container); this.utilizationTracker.addContainerResources(container); } if (container.getContainerTokenIdentifier().getExecutionType() == ExecutionType.OPPORTUNISTIC) { this.metrics.startOpportunisticContainer(container.getResource()); } container.sendLaunchEvent(); }
private void shedQueuedOpportunisticContainers() { int numAllowed = this.queuingLimit.getMaxQueueLength(); Iterator<Container> containerIter = queuedOpportunisticContainers.values().iterator(); while (containerIter.hasNext()) { Container container = containerIter.next(); // Do not shed PAUSED containers if (container.getContainerState() != ContainerState.PAUSED) { if (numAllowed <= 0) { container.sendKillEvent( ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, "Container De-queued to meet NM queuing limits."); containerIter.remove(); LOG.info( "Opportunistic container {} will be killed to meet NM queuing" + " limits.", container.getContainerId()); } numAllowed--; } } }
@Override @SuppressWarnings("unchecked") // dispatcher not typed public Integer call() { final ContainerLaunchContext launchContext = container.getLaunchContext(); Map<Path,List<String>> localResources = null; ContainerId containerID = container.getContainerId(); String containerIdStr = ConverterUtils.toString(containerID); final List<String> command = launchContext.getCommands(); if (container.getContainerState() == ContainerState.KILLING) { dispatcher.getEventHandler().handle( new ContainerExitEvent(containerID, localResources = container.getLocalizedResources(); if (localResources == null) { throw RPCUtil.getRemoteException( "Unable to get local resources when Container " + containerID + " is at " + container.getContainerState()); final String user = container.getUser(); Credentials creds = container.getCredentials(); creds.writeTokenStorageToStream(tokensOutStream);
public ContainerInfo(final Context nmContext, final Container container, String requestUri, String pathPrefix, String remoteUser) { this.id = container.getContainerId().toString(); this.nodeId = nmContext.getNodeId().toString(); ContainerStatus containerData = container.cloneAndGetContainerStatus(); this.exitCode = containerData.getExitStatus(); this.exitStatus = (this.exitCode == ContainerExitStatus.INVALID) ? "N/A" : String.valueOf(exitCode); this.state = container.getContainerState().toString(); this.diagnostics = containerData.getDiagnostics(); if (this.diagnostics == null || this.diagnostics.isEmpty()) { this.user = container.getUser(); Resource res = container.getResource(); if (res != null) { this.totalMemoryNeededMB = res.getMemorySize(); container.getUser()); this.containerLogsShortLink); this.containerLogFiles = getContainerLogFiles(container.getContainerId(), remoteUser, nmContext);
@Override public void run() { try { Thread.sleep(delay); containerExecutor.signalContainer(user, pid, signal); } catch (InterruptedException e) { return; } catch (IOException e) { String message = "Exception when user " + user + " killing task " + pid + " in DelayedProcessKiller: " + StringUtils.stringifyException(e); LOG.warn(message); container.handle(new ContainerDiagnosticsUpdateEvent(container .getContainerId(), message)); } } }
String runAsUser = getRunAsUser(user); ContainerId containerId = container.getContainerId(); String containerIdStr = ConverterUtils.toString(containerId); container.getResource()); String resourcesOptions = resourcesHandler.getResourcesOption( containerId); String[] commandArray = command.toArray(new String[command.size()]); shExec = new ShellCommandExecutor(commandArray, null, // NM's cwd container.getLaunchContext().getEnvironment()); // sanitized env if (LOG.isDebugEnabled()) { LOG.debug("launchContainer: " + Arrays.toString(commandArray)); container.handle(new ContainerDiagnosticsUpdateEvent(containerId, diagnostics)); } else { container.handle(new ContainerDiagnosticsUpdateEvent(containerId, "Container killed on request. Exit code is " + exitCode));
EnumSet.of(ContainerState.LOCALIZING, ContainerState.RUNNING, ContainerState.REINITIALIZING); if (!set.contains(c.getContainerState())) { LOG.warn(c.getContainerId() + " is at " + c.getContainerState() + " state, do not localize resources."); return; CacheBuilder.newBuilder().build(FSDownload.createStatusCacheLoader(getConfig())); LocalizerContext ctxt = new LocalizerContext( c.getUser(), c.getContainerId(), c.getCredentials(), statCache); Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrcs = rsrcReqs.getRequestedResources(); rsrcs.entrySet()) { LocalResourcesTracker tracker = getLocalResourcesTracker(e.getKey(), c.getUser(), c.getContainerId().getApplicationAttemptId() .getApplicationId()); for (LocalResourceRequest req : e.getValue()) { if (LOG.isDebugEnabled()) { LOG.debug("Localizing " + req.getPath() + " for container " + c.getContainerId());
final ContainerLaunchContext launchContext = container.getLaunchContext(); ContainerId containerID = container.getContainerId(); String containerIdStr = containerID.toString(); final List<String> command = launchContext.getCommands(); Map<Path, List<String>> localResources = getLocalizedResources(); final String user = container.getUser(); Credentials creds = container.getCredentials(); creds.writeTokenStorageToStream(tokensOutStream);
private void onResourcesReclaimed(Container container) { oppContainersToKill.remove(container.getContainerId()); queuedOpportunisticContainers.remove(container.getContainerId()); if (queued == null) { queuedGuaranteedContainers.remove(container.getContainerId()); if (container.getContainerState() == ContainerState.PAUSED) { if (container.getContainerTokenIdentifier().getExecutionType() == ExecutionType.GUARANTEED) { queuedGuaranteedContainers.put(container.getContainerId(), container); } else { queuedOpportunisticContainers.put( container.getContainerId(), container); .getContainerId()); if (container.getContainerTokenIdentifier().getExecutionType() == ExecutionType.OPPORTUNISTIC) { this.metrics.completeOpportunisticContainer(container.getResource());
List<ContainerStatus> containerStatuses = new ArrayList<ContainerStatus>(); for (Container container : this.context.getContainers().values()) { ContainerId containerId = container.getContainerId(); ApplicationId applicationId = containerId.getApplicationAttemptId() .getApplicationId(); org.apache.hadoop.yarn.api.records.ContainerStatus containerStatus = container.cloneAndGetContainerStatus(); if (containerStatus.getState() == ContainerState.COMPLETE) { if (isApplicationStopped(applicationId)) {
Map<String, String> environment = container.getLaunchContext().getEnvironment(); EnvironmentUtils.putAll(environment); container.handle(new ContainerDiagnosticsUpdateEvent(container.getContainerId(), e.getMessage())); return -1;
/** * For each of the requested resources for a container, determines the * appropriate {@link LocalResourcesTracker} and forwards a * {@link LocalResourceRequest} to that tracker. */ private void handleInitContainerResources( ContainerLocalizationRequestEvent rsrcReqs) { Container c = rsrcReqs.getContainer(); // create a loading cache for the file statuses LoadingCache<Path,Future<FileStatus>> statCache = CacheBuilder.newBuilder().build(FSDownload.createStatusCacheLoader(getConfig())); LocalizerContext ctxt = new LocalizerContext( c.getUser(), c.getContainerId(), c.getCredentials(), statCache); Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrcs = rsrcReqs.getRequestedResources(); for (Map.Entry<LocalResourceVisibility, Collection<LocalResourceRequest>> e : rsrcs.entrySet()) { LocalResourcesTracker tracker = getLocalResourcesTracker(e.getKey(), c.getUser(), c.getContainerId().getApplicationAttemptId() .getApplicationId()); for (LocalResourceRequest req : e.getValue()) { tracker.handle(new ResourceRequestEvent(req, e.getKey(), ctxt)); } } }
/** * Resume the container from pause state. The default implementation ignores * this event. Specific implementations can override this behavior. * @param container * the Container */ public void resumeContainer(Container container) { LOG.warn(container.getContainerId() + " doesn't support resume."); throw new UnsupportedOperationException(); }
private List<NMContainerStatus> getNMContainerStatuses() throws IOException { List<NMContainerStatus> containerStatuses = new ArrayList<NMContainerStatus>(); for (Container container : this.context.getContainers().values()) { ContainerId containerId = container.getContainerId(); ApplicationId applicationId = containerId.getApplicationAttemptId() .getApplicationId(); if (!this.context.getApplications().containsKey(applicationId)) { context.getContainers().remove(containerId); continue; } NMContainerStatus status = container.getNMContainerStatus(); containerStatuses.add(status); if (status.getContainerState() == ContainerState.COMPLETE) { // Adding to finished containers cache. Cache will keep it around at // least for #durationToTrackStoppedContainers duration. In the // subsequent call to stop container it will get removed from cache. addCompletedContainer(containerId); } } LOG.info("Sending out " + containerStatuses.size() + " NM container statuses: " + containerStatuses); return containerStatuses; }
/** * Perform any cleanup before the next launch of the container. * @param container container */ public void cleanupBeforeRelaunch(Container container) throws IOException, InterruptedException { if (container.getLocalizedResources() != null) { Map<Path, Path> symLinks = resolveSymLinks( container.getLocalizedResources(), container.getUser()); for (Map.Entry<Path, Path> symLink : symLinks.entrySet()) { LOG.debug("{} deleting {}", container.getContainerId(), symLink.getValue()); deleteAsUser(new DeletionAsUserContext.Builder() .setUser(container.getUser()) .setSubDir(symLink.getValue()) .build()); } } }
/** * For each of the requested resources for a container, determines the * appropriate {@link LocalResourcesTracker} and forwards a * {@link LocalResourceRequest} to that tracker. */ private void handleInitContainerResources( ContainerLocalizationRequestEvent rsrcReqs) { Container c = rsrcReqs.getContainer(); // create a loading cache for the file statuses LoadingCache<Path,Future<FileStatus>> statCache = CacheBuilder.newBuilder().build(FSDownload.createStatusCacheLoader(getConfig())); LocalizerContext ctxt = new LocalizerContext( c.getUser(), c.getContainerId(), c.getCredentials(), statCache, c.getUserFolder()); Map<LocalResourceVisibility, Collection<LocalResourceRequest>> rsrcs = rsrcReqs.getRequestedResources(); for (Map.Entry<LocalResourceVisibility, Collection<LocalResourceRequest>> e : rsrcs.entrySet()) { LocalResourcesTracker tracker = getLocalResourcesTracker(e.getKey(), c.getUser(), c.getContainerId().getApplicationAttemptId() .getApplicationId()); for (LocalResourceRequest req : e.getValue()) { tracker.handle(new ResourceRequestEvent(req, e.getKey(), ctxt)); if (LOG.isDebugEnabled()) { LOG.debug("Localizing " + req.getPath() + " for container " + c.getContainerId()); } } } }
/** * Reap the docker container. * * @param ctx the {@link ContainerRuntimeContext}. * @throws ContainerExecutionException if the removal fails. */ @Override public void reapContainer(ContainerRuntimeContext ctx) throws ContainerExecutionException { // Clean up the Docker container handleContainerRemove(ctx.getContainer().getContainerId().toString(), ctx.getContainer().getLaunchContext().getEnvironment()); // Cleanup volumes when needed. if (nmContext != null && nmContext.getResourcePluginManager().getNameToPlugins() != null) { for (ResourcePlugin plugin : nmContext.getResourcePluginManager() .getNameToPlugins().values()) { DockerCommandPlugin dockerCommandPlugin = plugin.getDockerCommandPluginInstance(); if (dockerCommandPlugin != null) { DockerVolumeCommand dockerVolumeCommand = dockerCommandPlugin.getCleanupDockerVolumesCommand( ctx.getContainer()); if (dockerVolumeCommand != null) { runDockerVolumeCommand(dockerVolumeCommand, ctx.getContainer()); } } } } }