private static void printTags(Collection<MockNM> nodes, AllocationTagsManager atm){ for (MockNM nm : nodes) { Map<String, Long> nmTags = atm .getAllocationTagsWithCount(nm.getNodeId()); StringBuffer sb = new StringBuffer(); if (nmTags != null) { nmTags.forEach((tag, count) -> sb.append(tag + "(" + count + "),")); LOG.info("nm_" + nm.getNodeId() + ": " + sb.toString()); } } }
private void waitSchedulerNodeHasUpdatedLabels(CapacityScheduler cs, MockNM nm, String partition) throws InterruptedException { FiCaSchedulerNode node = cs.getNode(nm.getNodeId()); int totalWaitTick = 20; // wait 2 sec at most. while (!node.getLabels().contains(partition) && totalWaitTick > 0) { Thread.sleep(100); totalWaitTick--; } }
private void syncNodeGracefulDecommission( MockNM nm, int timeout) throws Exception { rm.sendNodeGracefulDecommission(nm, timeout); rm.waitForState(nm.getNodeId(), NodeState.DECOMMISSIONING); rm.drainEvents(); }
private int checkNumNonAMContainersOnNode(CapacityScheduler cs, MockNM nm) { SchedulerNode node = cs.getNode(nm.getNodeId()); int nonAMContainer = 0; for (RMContainer c : node.getCopiedListOfRunningContainers()) { if (!c.isAMContainer()) { nonAMContainer++; } } return nonAMContainer; }
private void syncNodeLost(MockNM nm) throws Exception { rm.sendNodeStarted(nm); rm.waitForState(nm.getNodeId(), NodeState.RUNNING); rm.sendNodeLost(nm); rm.drainEvents(); }
private void checkUnealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals("Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
private void checkUnhealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals("Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
private void syncNodeLost(MockNM nm) throws Exception { rm.sendNodeStarted(nm); rm.NMwaitForState(nm.getNodeId(), NodeState.RUNNING); rm.sendNodeLost(nm); dispatcher.await(); }
@SuppressWarnings("unchecked") @Test(timeout = 10000) public void testDecommissioningNodeReconnect() throws Exception { MockRM rm = new MockRM(); rm.start(); MockNM nm1 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService()); nm1.registerNode(); rm.waitForState(nm1.getNodeId(), NodeState.RUNNING); rm.getRMContext().getDispatcher().getEventHandler().handle( new RMNodeEvent(nm1.getNodeId(), RMNodeEventType.GRACEFUL_DECOMMISSION)); rm.waitForState(nm1.getNodeId(), NodeState.DECOMMISSIONING); MockNM nm2 = new MockNM("127.0.0.1:1234", 15120, rm.getResourceTrackerService()); RegisterNodeManagerResponse response = nm2.registerNode(); // not SHUTDOWN Assert.assertTrue(response.getNodeAction().equals(NodeAction.NORMAL)); rm.stop(); }
@Test public void testNodesQueryHealthyFalse() throws JSONException, Exception { WebResource r = resource(); MockNM nm1 = rm.registerNode("h1:1234", 5120); MockNM nm2 = rm.registerNode("h2:1235", 5121); rm.sendNodeStarted(nm1); rm.NMwaitForState(nm1.getNodeId(), NodeState.RUNNING); rm.NMwaitForState(nm2.getNodeId(), NodeState.NEW); ClientResponse response = r.path("ws").path("v1").path("cluster") .path("nodes").queryParam("states", "UNHEALTHY") .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("incorrect number of elements", 1, json.length()); assertEquals("nodes is not null", JSONObject.NULL, json.get("nodes")); }
@Test public void testUnhealthyNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm .getResourceTrackerService(); MockNM nm1 = rm.registerNode("host1:1234", 5120); Assert.assertEquals(0, ClusterMetrics.getMetrics().getUnhealthyNMs()); // node healthy nm1.nodeHeartbeat(true); int shutdownNMsCount = ClusterMetrics.getMetrics().getNumShutdownNMs(); // node unhealthy nm1.nodeHeartbeat(false); checkUnhealthyNMCount(rm, nm1, true, 1); UnRegisterNodeManagerRequest request = Records .newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, ++shutdownNMsCount); }
@Test public void testResponseIdOverflow() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); // prepare the responseId that's about to overflow RMNode node = rm.getRMContext().getRMNodes().get(nm1.getNodeId()); node.getLastNodeHeartBeatResponse().setResponseId(Integer.MAX_VALUE); nm1.setResponseId(Integer.MAX_VALUE); // heartbeat twice and check responseId nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); Assert.assertEquals(0, nodeHeartbeat.getResponseId()); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertEquals(NodeAction.NORMAL, nodeHeartbeat.getNodeAction()); Assert.assertEquals(1, nodeHeartbeat.getResponseId()); } }
@Test public void testNodesQueryRunning() throws JSONException, Exception { WebResource r = resource(); MockNM nm1 = rm.registerNode("h1:1234", 5120); MockNM nm2 = rm.registerNode("h2:1235", 5121); rm.sendNodeStarted(nm1); rm.NMwaitForState(nm1.getNodeId(), NodeState.RUNNING); rm.NMwaitForState(nm2.getNodeId(), NodeState.NEW); ClientResponse response = r.path("ws").path("v1").path("cluster") .path("nodes").queryParam("states", "running") .accept(MediaType.APPLICATION_JSON).get(ClientResponse.class); assertEquals(MediaType.APPLICATION_JSON_TYPE, response.getType()); JSONObject json = response.getEntity(JSONObject.class); assertEquals("incorrect number of elements", 1, json.length()); JSONObject nodes = json.getJSONObject("nodes"); assertEquals("incorrect number of elements", 1, nodes.length()); JSONArray nodeArray = nodes.getJSONArray("node"); assertEquals("incorrect number of elements", 1, nodeArray.length()); }
@Test public void testNMUnregistration() throws Exception { Configuration conf = new Configuration(); rm = new MockRM(conf); rm.start(); ResourceTrackerService resourceTrackerService = rm .getResourceTrackerService(); MockNM nm1 = rm.registerNode("host1:1234", 5120); int shutdownNMsCount = ClusterMetrics.getMetrics() .getNumShutdownNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); UnRegisterNodeManagerRequest request = Records .newRecord(UnRegisterNodeManagerRequest.class); request.setNodeId(nm1.getNodeId()); resourceTrackerService.unRegisterNodeManager(request); checkShutdownNMCount(rm, ++shutdownNMsCount); // The RM should remove the node after unregistration, hence send a reboot // command. nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.RESYNC.equals(nodeHeartbeat.getNodeAction())); }