@Override public NodeState getState() { return node.getState(); }
private void processNodeUpdate(RMAppNodeUpdateType type, RMNode node) { NodeState nodeState = node.getState(); updatedNodes.add(node); LOG.debug("Received node update event:" + type + " for node:" + node + " with state:" + nodeState); }
private void processNodeUpdate(RMAppNodeUpdateType type, RMNode node) { NodeState nodeState = node.getState(); updatedNodes.add(node); LOG.debug("Received node update event:" + type + " for node:" + node + " with state:" + nodeState); }
private void processNodeUpdate(RMAppNodeUpdateType type, RMNode node) { NodeState nodeState = node.getState(); updatedNodes.put(node, RMAppNodeUpdateType.convertToNodeUpdateType(type)); LOG.debug("Received node update event:" + type + " for node:" + node + " with state:" + nodeState); }
/** * It checks for any nodes in decommissioning state * * @return decommissioning nodes */ public Set<NodeId> checkForDecommissioningNodes() { Set<NodeId> decommissioningNodes = new HashSet<NodeId>(); for (Entry<NodeId, RMNode> entry : rmContext.getRMNodes().entrySet()) { if (entry.getValue().getState() == NodeState.DECOMMISSIONING) { decommissioningNodes.add(entry.getKey()); } } return decommissioningNodes; }
/** * Check if node in decommissioning state. * @param nodeId */ private boolean isNodeInDecommissioning(NodeId nodeId) { RMNode rmNode = this.rmContext.getRMNodes().get(nodeId); if (rmNode != null && rmNode.getState().equals(NodeState.DECOMMISSIONING)) { return true; } return false; }
private void waitForNodesRunning() throws InterruptedException { long startTimeMS = System.currentTimeMillis(); while (true) { int numRunningNodes = 0; for (RMNode node : rm.getRMContext().getRMNodes().values()) { if (node.getState() == NodeState.RUNNING) { numRunningNodes ++; } } if (numRunningNodes == numNMs) { break; } LOG.info(MessageFormat.format("SLSRunner is waiting for all " + "nodes RUNNING. {0} of {1} NMs initialized.", numRunningNodes, numNMs)); Thread.sleep(1000); } LOG.info(MessageFormat.format("SLSRunner takes {0} ms to launch all nodes.", (System.currentTimeMillis() - startTimeMS))); }
@Override public void handle(NodesListManagerEvent event) { nodesListManagerEvent = event; nodesListManagerEventsNodeStateSequence.add(event.getNode().getState()); }
/** * Forcefully decommission the nodes if they are in DECOMMISSIONING state */ public void refreshNodesForcefully() { for (Entry<NodeId, RMNode> entry : rmContext.getRMNodes().entrySet()) { if (entry.getValue().getState() == NodeState.DECOMMISSIONING) { RMNodeEventType nodeEventType = isUntrackedNode(entry.getKey().getHost()) ? RMNodeEventType.SHUTDOWN : RMNodeEventType.DECOMMISSION; this.rmContext.getDispatcher().getEventHandler().handle( new RMNodeEvent(entry.getKey(), nodeEventType)); } } }
public void NMwaitForState(NodeId nodeid, NodeState finalState) throws Exception { RMNode node = getRMContext().getRMNodes().get(nodeid); Assert.assertNotNull("node shouldn't be null", node); int timeoutSecs = 0; while (!finalState.equals(node.getState()) && timeoutSecs++ < 20) { System.out.println("Node State is : " + node.getState() + " Waiting for state : " + finalState); Thread.sleep(500); } System.out.println("Node State is : " + node.getState()); Assert.assertEquals("Node state is not correct (timedout)", finalState, node.getState()); }
private void decrInactiveNMMetrics(RMNode rmNode) { ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics(); switch (rmNode.getState()) { case SHUTDOWN: clusterMetrics.decrNumShutdownNMs(); break; case DECOMMISSIONED: clusterMetrics.decrDecommisionedNMs(); break; case LOST: clusterMetrics.decrNumLostNMs(); break; case REBOOTED: clusterMetrics.decrNumRebootedNMs(); break; default: LOG.debug("Unexpected node state"); } }
public static FiCaSchedulerNode getMockNode(String host, String rack, int port, int memory, int vcores) { NodeId nodeId = NodeId.newInstance(host, port); RMNode rmNode = mock(RMNode.class); when(rmNode.getNodeID()).thenReturn(nodeId); when(rmNode.getTotalCapability()).thenReturn( Resources.createResource(memory, vcores)); when(rmNode.getNodeAddress()).thenReturn(host+":"+port); when(rmNode.getHostName()).thenReturn(host); when(rmNode.getRackName()).thenReturn(rack); when(rmNode.getState()).thenReturn(NodeState.RUNNING); FiCaSchedulerNode node = spy(new FiCaSchedulerNode(rmNode, false)); LOG.info("node = " + host + " avail=" + node.getUnallocatedResource()); when(node.getNodeID()).thenReturn(nodeId); return node; }
private void checkUnealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals("Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
private void checkUnhealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals("Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
private NodeReport createNodeReports(RMNode rmNode) { SchedulerNodeReport schedulerNodeReport = scheduler.getNodeReport(rmNode.getNodeID()); Resource used = BuilderUtils.newResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.getUsedResource(); numContainers = schedulerNodeReport.getNumContainers(); } NodeReport report = BuilderUtils.newNodeReport(rmNode.getNodeID(), rmNode.getState(), rmNode.getHttpAddress(), rmNode.getRackName(), used, rmNode.getTotalCapability(), numContainers, rmNode.getHealthReport(), rmNode.getLastHealthReportTime(), rmNode.getNodeLabels()); return report; }
private NodeReport createNodeReports(RMNode rmNode) { SchedulerNodeReport schedulerNodeReport = scheduler.getNodeReport(rmNode.getNodeID()); Resource used = BuilderUtils.newResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.getUsedResource(); numContainers = schedulerNodeReport.getNumContainers(); } NodeReport report = BuilderUtils.newNodeReport(rmNode.getNodeID(), rmNode.getState(), rmNode.getHttpAddress(), rmNode.getRackName(), used, rmNode.getTotalCapability(), numContainers, rmNode.getHealthReport(), rmNode.getLastHealthReportTime(), rmNode.getNodeLabels()); return report; }
@Test public void testRemovedNodeDecomissioningNode() throws Exception { // Register nodemanager NodeManager nm = registerNode("host_decom", 1234, 2345, NetworkTopology.DEFAULT_RACK, Resources.createResource(8 * GB, 4)); RMNode node = resourceManager.getRMContext().getRMNodes().get(nm.getNodeId()); // Send a heartbeat to kick the tires on the Scheduler NodeUpdateSchedulerEvent nodeUpdate = new NodeUpdateSchedulerEvent(node); resourceManager.getResourceScheduler().handle(nodeUpdate); // force remove the node to simulate race condition ((CapacityScheduler) resourceManager.getResourceScheduler()).getNodeTracker(). removeNode(nm.getNodeId()); // Kick off another heartbeat with the node state mocked to decommissioning RMNode spyNode = Mockito.spy(resourceManager.getRMContext().getRMNodes() .get(nm.getNodeId())); when(spyNode.getState()).thenReturn(NodeState.DECOMMISSIONING); resourceManager.getResourceScheduler().handle( new NodeUpdateSchedulerEvent(spyNode)); }
@Test public void testRemovedNodeDecomissioningNode() throws Exception { // Register nodemanager NodeManager nm = registerNode("host_decom", 1234, 2345, NetworkTopology.DEFAULT_RACK, Resources.createResource(8 * GB, 4)); RMNode node = resourceManager.getRMContext().getRMNodes().get(nm.getNodeId()); // Send a heartbeat to kick the tires on the Scheduler NodeUpdateSchedulerEvent nodeUpdate = new NodeUpdateSchedulerEvent(node); resourceManager.getResourceScheduler().handle(nodeUpdate); // Force remove the node to simulate race condition ((FairScheduler) resourceManager.getResourceScheduler()) .getNodeTracker().removeNode(nm.getNodeId()); // Kick off another heartbeat with the node state mocked to decommissioning RMNode spyNode = Mockito.spy(resourceManager.getRMContext().getRMNodes() .get(nm.getNodeId())); when(spyNode.getState()).thenReturn(NodeState.DECOMMISSIONING); resourceManager.getResourceScheduler().handle( new NodeUpdateSchedulerEvent(spyNode)); }
@Test public void testRemovedNodeDecomissioningNode() throws Exception { // Register nodemanager NodeManager nm = registerNode("host_decom", 1234, 2345, NetworkTopology.DEFAULT_RACK, Resources.createResource(8 * GB, 4)); RMNode node = resourceManager.getRMContext().getRMNodes().get(nm.getNodeId()); // Send a heartbeat to kick the tires on the Scheduler NodeUpdateSchedulerEvent nodeUpdate = new NodeUpdateSchedulerEvent(node); resourceManager.getResourceScheduler().handle(nodeUpdate); // Force remove the node to simulate race condition ((FifoScheduler) resourceManager.getResourceScheduler()) .getNodeTracker().removeNode(nm.getNodeId()); // Kick off another heartbeat with the node state mocked to decommissioning RMNode spyNode = Mockito.spy(resourceManager.getRMContext().getRMNodes() .get(nm.getNodeId())); when(spyNode.getState()).thenReturn(NodeState.DECOMMISSIONING); resourceManager.getResourceScheduler().handle( new NodeUpdateSchedulerEvent(spyNode)); }
private NodeReport createNodeReports(RMNode rmNode) { SchedulerNodeReport schedulerNodeReport = scheduler.getNodeReport(rmNode.getNodeID()); Resource used = BuilderUtils.newResource(0, 0); int numContainers = 0; if (schedulerNodeReport != null) { used = schedulerNodeReport.getUsedResource(); numContainers = schedulerNodeReport.getNumContainers(); } Set<NodeAttribute> attrs = rmNode.getAllNodeAttributes(); NodeReport report = BuilderUtils.newNodeReport(rmNode.getNodeID(), rmNode.getState(), rmNode.getHttpAddress(), rmNode.getRackName(), used, rmNode.getTotalCapability(), numContainers, rmNode.getHealthReport(), rmNode.getLastHealthReportTime(), rmNode.getNodeLabels(), rmNode.getAggregatedContainersUtilization(), rmNode.getNodeUtilization(), rmNode.getDecommissioningTimeout(), null, attrs); return report; }