private void waitForRecoveredContainers() throws InterruptedException { final int sleepMsec = 100; int waitIterations = 100; List<ContainerId> newContainers = new ArrayList<ContainerId>(); while (--waitIterations >= 0) { newContainers.clear(); for (Container container : context.getContainers().values()) { if (container.getContainerState() == org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.NEW) { newContainers.add(container.getContainerId()); } } if (newContainers.isEmpty()) { break; } LOG.info("Waiting for containers: " + newContainers); Thread.sleep(sleepMsec); } if (waitIterations < 0) { LOG.warn("Timeout waiting for recovered containers"); } }
private void waitForRecoveredContainers() throws InterruptedException { final int sleepMsec = 100; int waitIterations = 100; List<ContainerId> newContainers = new ArrayList<ContainerId>(); while (--waitIterations >= 0) { newContainers.clear(); for (Container container : context.getContainers().values()) { if (container.getContainerState() == org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.NEW) { newContainers.add(container.getContainerId()); } } if (newContainers.isEmpty()) { break; } LOG.info("Waiting for containers: " + newContainers); Thread.sleep(sleepMsec); } if (waitIterations < 0) { LOG.warn("Timeout waiting for recovered containers"); } }
private void waitForRecoveredContainers() throws InterruptedException { final int sleepMsec = 100; int waitIterations = 100; List<ContainerId> newContainers = new ArrayList<ContainerId>(); while (--waitIterations >= 0) { newContainers.clear(); for (Container container : context.getContainers().values()) { if (container.getContainerState() == org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.NEW) { newContainers.add(container.getContainerId()); } } if (newContainers.isEmpty()) { break; } LOG.info("Waiting for containers: " + newContainers); Thread.sleep(sleepMsec); } if (waitIterations < 0) { LOG.warn("Timeout waiting for recovered containers"); } }
private void waitForRecoveredContainers() throws InterruptedException { final int sleepMsec = 100; int waitIterations = 100; List<ContainerId> newContainers = new ArrayList<ContainerId>(); while (--waitIterations >= 0) { newContainers.clear(); for (Container container : context.getContainers().values()) { if (container.getContainerState() == org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.NEW) { newContainers.add(container.getContainerId()); } } if (newContainers.isEmpty()) { break; } LOG.info("Waiting for containers: " + newContainers); Thread.sleep(sleepMsec); } if (waitIterations < 0) { LOG.warn("Timeout waiting for recovered containers"); } }
protected Map<Path, List<String>> getLocalizedResources() throws YarnException { Map<Path, List<String>> localResources = container.getLocalizedResources(); if (localResources == null) { throw RPCUtil.getRemoteException( "Unable to get local resources when Container " + container + " is at " + container.getContainerState()); } return localResources; }
private void shedQueuedOpportunisticContainers() { int numAllowed = this.queuingLimit.getMaxQueueLength(); Iterator<Container> containerIter = queuedOpportunisticContainers.values().iterator(); while (containerIter.hasNext()) { Container container = containerIter.next(); // Do not shed PAUSED containers if (container.getContainerState() != ContainerState.PAUSED) { if (numAllowed <= 0) { container.sendKillEvent( ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, "Container De-queued to meet NM queuing limits."); containerIter.remove(); LOG.info( "Opportunistic container {} will be killed to meet NM queuing" + " limits.", container.getContainerId()); } numAllowed--; } } }
@VisibleForTesting @Private public void removeOrTrackCompletedContainersFromContext( List<ContainerId> containerIds) throws IOException { Set<ContainerId> removedContainers = new HashSet<ContainerId>(); pendingContainersToRemove.addAll(containerIds); Iterator<ContainerId> iter = pendingContainersToRemove.iterator(); while (iter.hasNext()) { ContainerId containerId = iter.next(); // remove the container only if the container is at DONE state Container nmContainer = context.getContainers().get(containerId); if (nmContainer == null) { iter.remove(); } else if (nmContainer.getContainerState().equals( org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.DONE)) { context.getContainers().remove(containerId); removedContainers.add(containerId); iter.remove(); } } if (!removedContainers.isEmpty()) { LOG.info("Removed completed containers from NM context: " + removedContainers); } pendingCompletedContainers.clear(); }
@VisibleForTesting @Private public void removeOrTrackCompletedContainersFromContext( List<ContainerId> containerIds) throws IOException { Set<ContainerId> removedContainers = new HashSet<ContainerId>(); pendingContainersToRemove.addAll(containerIds); Iterator<ContainerId> iter = pendingContainersToRemove.iterator(); while (iter.hasNext()) { ContainerId containerId = iter.next(); // remove the container only if the container is at DONE state Container nmContainer = context.getContainers().get(containerId); if (nmContainer == null) { iter.remove(); } else if (nmContainer.getContainerState().equals( org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.DONE)) { context.getContainers().remove(containerId); removedContainers.add(containerId); iter.remove(); } } if (!removedContainers.isEmpty()) { LOG.info("Removed completed containers from NM context: " + removedContainers); } pendingCompletedContainers.clear(); }
@VisibleForTesting @Private public void removeOrTrackCompletedContainersFromContext( List<ContainerId> containerIds) throws IOException { Set<ContainerId> removedContainers = new HashSet<ContainerId>(); pendingContainersToRemove.addAll(containerIds); Iterator<ContainerId> iter = pendingContainersToRemove.iterator(); while (iter.hasNext()) { ContainerId containerId = iter.next(); // remove the container only if the container is at DONE state Container nmContainer = context.getContainers().get(containerId); if (nmContainer == null) { iter.remove(); } else if (nmContainer.getContainerState().equals( org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.DONE)) { context.getContainers().remove(containerId); removedContainers.add(containerId); iter.remove(); } } if (!removedContainers.isEmpty()) { LOG.info("Removed completed containers from NM context: " + removedContainers); } pendingCompletedContainers.clear(); }
@VisibleForTesting @Private public void removeOrTrackCompletedContainersFromContext( List<ContainerId> containerIds) throws IOException { Set<ContainerId> removedContainers = new HashSet<ContainerId>(); pendingContainersToRemove.addAll(containerIds); Iterator<ContainerId> iter = pendingContainersToRemove.iterator(); while (iter.hasNext()) { ContainerId containerId = iter.next(); // remove the container only if the container is at DONE state Container nmContainer = context.getContainers().get(containerId); if (nmContainer == null) { iter.remove(); } else if (nmContainer.getContainerState().equals( org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState.DONE)) { context.getContainers().remove(containerId); removedContainers.add(containerId); iter.remove(); } } if (!removedContainers.isEmpty()) { LOG.info("Removed completed containers from NM context: " + removedContainers); } pendingCompletedContainers.clear(); }
/** * Finds the log file with the given filename for the given container. */ public static File getContainerLogFile(ContainerId containerId, String fileName, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); if (container != null) { checkState(container.getContainerState()); } try { LocalDirsHandlerService dirsHandler = context.getLocalDirsHandler(); String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir( application.getAppId().toString(), containerId.toString()); Path logPath = dirsHandler.getLogPathToRead( relativeContainerLogDir + Path.SEPARATOR + fileName); URI logPathURI = new File(logPath.toString()).toURI(); File logFile = new File(logPathURI.getPath()); return logFile; } catch (IOException e) { LOG.warn("Failed to find log file", e); throw new NotFoundException("Cannot find this log on the local disk."); } }
/** * Finds the log file with the given filename for the given container. */ public static File getContainerLogFile(ContainerId containerId, String fileName, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); if (container != null) { checkState(container.getContainerState()); } try { LocalDirsHandlerService dirsHandler = context.getLocalDirsHandler(); String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir( application.getAppId().toString(), containerId.toString()); Path logPath = dirsHandler.getLogPathToRead( relativeContainerLogDir + Path.SEPARATOR + fileName); URI logPathURI = new File(logPath.toString()).toURI(); File logFile = new File(logPathURI.getPath()); return logFile; } catch (IOException e) { LOG.warn("Failed to find log file", e); throw new NotFoundException("Cannot find this log on the local disk."); } }
/** * Finds the log file with the given filename for the given container. */ public static File getContainerLogFile(ContainerId containerId, String fileName, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); if (container != null) { checkState(container.getContainerState()); } try { LocalDirsHandlerService dirsHandler = context.getLocalDirsHandler(); String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir( application.getAppId().toString(), containerId.toString()); Path logPath = dirsHandler.getLogPathToRead( relativeContainerLogDir + Path.SEPARATOR + fileName); URI logPathURI = new File(logPath.toString()).toURI(); File logFile = new File(logPathURI.getPath()); return logFile; } catch (IOException e) { LOG.warn("Failed to find log file", e); throw new NotFoundException("Cannot find this log on the local disk."); } }
/** * Finds the log file with the given filename for the given container. */ public static File getContainerLogFile(ContainerId containerId, String fileName, String remoteUser, Context context, String userFolder) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); if (container != null) { checkState(container.getContainerState()); } try { LocalDirsHandlerService dirsHandler = context.getLocalDirsHandler(); String relativeContainerLogDir = ContainerLaunch.getRelativeContainerLogDir( application.getAppId().toString(), containerId.toString(), userFolder); Path logPath = dirsHandler.getLogPathToRead( relativeContainerLogDir + Path.SEPARATOR + fileName); URI logPathURI = new File(logPath.toString()).toURI(); File logFile = new File(logPathURI.getPath()); return logFile; } catch (IOException e) { LOG.warn("Failed to find log file", e); throw new NotFoundException("Cannot find this log on the local disk."); } }
/** * Finds the local directories that logs for the given container are stored * on. */ public static List<File> getContainerLogDirs(ContainerId containerId, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); // It is not required to have null check for container ( container == null ) // and throw back exception.Because when container is completed, NodeManager // remove container information from its NMContext.Configuring log // aggregation to false, container log view request is forwarded to NM. NM // does not have completed container information,but still NM serve request for // reading container logs. if (container != null) { checkState(container.getContainerState()); } return getContainerLogDirs(containerId, context.getLocalDirsHandler()); }
/** * Finds the local directories that logs for the given container are stored * on. */ public static List<File> getContainerLogDirs(ContainerId containerId, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); // It is not required to have null check for container ( container == null ) // and throw back exception.Because when container is completed, NodeManager // remove container information from its NMContext.Configuring log // aggregation to false, container log view request is forwarded to NM. NM // does not have completed container information,but still NM serve request for // reading container logs. if (container != null) { checkState(container.getContainerState()); } return getContainerLogDirs(containerId, context.getLocalDirsHandler()); }
/** * Finds the local directories that logs for the given container are stored * on. */ public static List<File> getContainerLogDirs(ContainerId containerId, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); // It is not required to have null check for container ( container == null ) // and throw back exception.Because when container is completed, NodeManager // remove container information from its NMContext.Configuring log // aggregation to false, container log view request is forwarded to NM. NM // does not have completed container information,but still NM serve request for // reading container logs. if (container != null) { checkState(container.getContainerState()); } return getContainerLogDirs(containerId, context.getLocalDirsHandler()); }
/** * Finds the local directories that logs for the given container are stored * on. */ public static List<File> getContainerLogDirs(ContainerId containerId, String remoteUser, Context context) throws YarnException { Container container = context.getContainers().get(containerId); Application application = getApplicationForContainer(containerId, context); checkAccess(remoteUser, application, context); // It is not required to have null check for container ( container == null ) // and throw back exception.Because when container is completed, NodeManager // remove container information from its NMContext.Configuring log // aggregation to false, container log view request is forwarded to NM. NM // does not have completed container information,but still NM serve request for // reading container logs. if (container != null) { checkState(container.getContainerState()); } return getContainerLogDirs(containerId, context.getLocalDirsHandler()); }
protected boolean validateContainerState() { // CONTAINER_KILLED_ON_REQUEST should not be missed if the container // is already at KILLING if (container.getContainerState() == ContainerState.KILLING) { dispatcher.getEventHandler().handle( new ContainerExitEvent(container.getContainerId(), ContainerEventType.CONTAINER_KILLED_ON_REQUEST, Shell.WINDOWS ? ExitCode.FORCE_KILLED.getExitCode() : ExitCode.TERMINATED.getExitCode(), "Container terminated before launch.")); return false; } return true; }
private Container preReInitializeOrLocalizeCheck(ContainerId containerId, ReInitOp op) throws YarnException { UserGroupInformation remoteUgi = getRemoteUgi(); NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi); authorizeUser(remoteUgi, nmTokenIdentifier); if (!nmTokenIdentifier.getApplicationAttemptId().getApplicationId() .equals(containerId.getApplicationAttemptId().getApplicationId())) { throw new YarnException("ApplicationMaster not authorized to perform " + "["+ op + "] on Container [" + containerId + "]!!"); } Container container = context.getContainers().get(containerId); if (container == null) { throw new YarnException("Specified " + containerId + " does not exist!"); } if (!container.isRunning() || container.isReInitializing() || container.getContainerTokenIdentifier().getExecutionType() == ExecutionType.OPPORTUNISTIC) { throw new YarnException("Cannot perform " + op + " on [" + containerId + "]. Current state is [" + container.getContainerState() + ", " + "isReInitializing=" + container.isReInitializing() + "]. Container" + " Execution Type is [" + container.getContainerTokenIdentifier() .getExecutionType() + "]."); } return container; }