/** * Update the preemption fields for all QueueScheduables, i.e. the times since * each queue last was at its guaranteed share and over its fair share * threshold for each type of task. */ private void updateStarvationStats() { lastPreemptionUpdateTime = clock.getTime(); for (FSLeafQueue sched : queueMgr.getLeafQueues()) { sched.updateStarvationStats(); } }
/** * Update the preemption fields for all QueueScheduables, i.e. the times since * each queue last was at its guaranteed share and over its fair share * threshold for each type of task. */ private void updateStarvationStats() { lastPreemptionUpdateTime = clock.getTime(); for (FSLeafQueue sched : queueMgr.getLeafQueues()) { sched.updateStarvationStats(); } }
@Test public void testHierarchicalQueuesSimilarParents() throws IOException { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); QueueManager queueManager = scheduler.getQueueManager(); FSLeafQueue leafQueue = queueManager.getLeafQueue("parent.child", true); Assert.assertEquals(2, queueManager.getLeafQueues().size()); Assert.assertNotNull(leafQueue); Assert.assertEquals("root.parent.child", leafQueue.getName()); FSLeafQueue leafQueue2 = queueManager.getLeafQueue("parent", true); Assert.assertNull(leafQueue2); Assert.assertEquals(2, queueManager.getLeafQueues().size()); FSLeafQueue leafQueue3 = queueManager.getLeafQueue("parent.child.grandchild", true); Assert.assertNull(leafQueue3); Assert.assertEquals(2, queueManager.getLeafQueues().size()); FSLeafQueue leafQueue4 = queueManager.getLeafQueue("parent.sister", true); Assert.assertNotNull(leafQueue4); Assert.assertEquals("root.parent.sister", leafQueue4.getName()); Assert.assertEquals(3, queueManager.getLeafQueues().size()); }
@Test public void testHierarchicalQueuesSimilarParents() throws IOException { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); QueueManager queueManager = scheduler.getQueueManager(); FSLeafQueue leafQueue = queueManager.getLeafQueue("parent.child", true); Assert.assertEquals(2, queueManager.getLeafQueues().size()); Assert.assertNotNull(leafQueue); Assert.assertEquals("root.parent.child", leafQueue.getName()); FSLeafQueue leafQueue2 = queueManager.getLeafQueue("parent", true); Assert.assertNull(leafQueue2); Assert.assertEquals(2, queueManager.getLeafQueues().size()); FSLeafQueue leafQueue3 = queueManager.getLeafQueue("parent.child.grandchild", true); Assert.assertNull(leafQueue3); Assert.assertEquals(2, queueManager.getLeafQueues().size()); FSLeafQueue leafQueue4 = queueManager.getLeafQueue("parent.sister", true); Assert.assertNotNull(leafQueue4); Assert.assertEquals("root.parent.sister", leafQueue4.getName()); Assert.assertEquals(3, queueManager.getLeafQueues().size()); }
/** * Check for queues that need tasks preempted, either because they have been * below their guaranteed share for minSharePreemptionTimeout or they have * been below their fair share threshold for the fairSharePreemptionTimeout. If * such queues exist, compute how many tasks of each type need to be preempted * and then select the right ones using preemptTasks. */ protected synchronized void preemptTasksIfNecessary() { if (!shouldAttemptPreemption()) { return; } long curTime = getClock().getTime(); if (curTime - lastPreemptCheckTime < preemptionInterval) { return; } lastPreemptCheckTime = curTime; Resource resToPreempt = Resources.clone(Resources.none()); for (FSLeafQueue sched : queueMgr.getLeafQueues()) { Resources.addTo(resToPreempt, resToPreempt(sched, curTime)); } if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterResource, resToPreempt, Resources.none())) { preemptResources(resToPreempt); } }
@Test public void testEmptyQueueName() throws Exception { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); // only default queue assertEquals(1, scheduler.getQueueManager().getLeafQueues().size()); // submit app with empty queue ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1); AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "", "user1"); scheduler.handle(appAddedEvent); // submission rejected assertEquals(1, scheduler.getQueueManager().getLeafQueues().size()); assertNull(scheduler.getSchedulerApp(appAttemptId)); assertEquals(0, resourceManager.getRMContext().getRMApps().size()); }
@Test public void testEmptyQueueName() throws Exception { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); // only default queue assertEquals(1, scheduler.getQueueManager().getLeafQueues().size()); // submit app with empty queue ApplicationAttemptId appAttemptId = createAppAttemptId(1, 1); AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(appAttemptId.getApplicationId(), "", "user1"); scheduler.handle(appAddedEvent); // submission rejected assertEquals(1, scheduler.getQueueManager().getLeafQueues().size()); assertNull(scheduler.getSchedulerApp(appAttemptId)); assertEquals(0, resourceManager.getRMContext().getRMApps().size()); }
/** * Check for queues that need tasks preempted, either because they have been * below their guaranteed share for minSharePreemptionTimeout or they have * been below their fair share threshold for the fairSharePreemptionTimeout. If * such queues exist, compute how many tasks of each type need to be preempted * and then select the right ones using preemptTasks. */ protected synchronized void preemptTasksIfNecessary() { if (!shouldAttemptPreemption()) { return; } long curTime = getClock().getTime(); if (curTime - lastPreemptCheckTime < preemptionInterval) { return; } lastPreemptCheckTime = curTime; Resource resToPreempt = Resources.clone(Resources.none()); for (FSLeafQueue sched : queueMgr.getLeafQueues()) { Resources.addTo(resToPreempt, resToPreempt(sched, curTime)); } if (Resources.greaterThan(RESOURCE_CALCULATOR, clusterResource, resToPreempt, Resources.none())) { preemptResources(resToPreempt); } }
Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues(); assertEquals(3, queues.size());
@Test public void testFairShareNoAppsRunning() throws IOException { int nodeCapacity = 16 * 1024; createClusterWithQueuesAndOneNode(nodeCapacity, "fair"); scheduler.update(); // No apps are running in the cluster,verify if fair share is zero // for all queues under parentA and parentB. Collection<FSLeafQueue> leafQueues = scheduler.getQueueManager() .getLeafQueues(); for (FSLeafQueue leaf : leafQueues) { if (leaf.getName().startsWith("root.parentA")) { assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity, 0); } else if (leaf.getName().startsWith("root.parentB")) { assertEquals(0, (double) leaf.getFairShare().getMemory() / nodeCapacity, 0); } } verifySteadyFairShareMemory(leafQueues, nodeCapacity); }
assertEquals(1, scheduler.getQueueManager().getLeafQueues().size()); scheduler.handle(appAddedEvent1); assertEquals(1, scheduler.getQueueManager().getLeafQueues().size()); assertNull(scheduler.getSchedulerApp(appAttemptId1)); assertEquals(0, resourceManager.getRMContext().getRMApps().size()); scheduler.handle(appAddedEvent2); assertEquals(1, scheduler.getQueueManager().getLeafQueues().size()); assertNull(scheduler.getSchedulerApp(appAttemptId2)); assertEquals(0, resourceManager.getRMContext().getRMApps().size()); scheduler.handle(appAddedEvent3); assertEquals(2, scheduler.getQueueManager().getLeafQueues().size()); assertNull(scheduler.getSchedulerApp(appAttemptId3)); assertEquals(0, resourceManager.getRMContext().getRMApps().size());
@Test public void testFairShareNoAppsRunning() throws IOException { int nodeCapacity = 16 * 1024; createClusterWithQueuesAndOneNode(nodeCapacity, "fair"); scheduler.update(); // No apps are running in the cluster,verify if fair share is zero // for all queues under parentA and parentB. Collection<FSLeafQueue> leafQueues = scheduler.getQueueManager() .getLeafQueues(); for (FSLeafQueue leaf : leafQueues) { if (leaf.getName().startsWith("root.parentA")) { assertEquals(0, (double) leaf.getFairShare().getMemorySize() / nodeCapacity, 0); } else if (leaf.getName().startsWith("root.parentB")) { assertEquals(0, (double) leaf.getFairShare().getMemorySize() / nodeCapacity, 0); } } verifySteadyFairShareMemory(leafQueues, nodeCapacity); }
@Test public void testFairShareMultipleActiveQueuesUnderSameParent() throws IOException { int nodeCapacity = 16 * 1024; createClusterWithQueuesAndOneNode(nodeCapacity, "fair"); // Run apps in childA1,childA2,childA3 createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1"); createSchedulingRequest(2 * 1024, "root.parentA.childA2", "user2"); createSchedulingRequest(2 * 1024, "root.parentA.childA3", "user3"); scheduler.update(); // Verify if fair share is 100 / 3 = 33% for (int i = 1; i <= 3; i++) { assertEquals( 33, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA" + i, false).getFairShare() .getMemory() / nodeCapacity * 100, .9); } verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), nodeCapacity); }
@Test public void testFairShareMultipleActiveQueuesUnderSameParent() throws IOException { int nodeCapacity = 16 * 1024; createClusterWithQueuesAndOneNode(nodeCapacity, "fair"); // Run apps in childA1,childA2,childA3 createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1"); createSchedulingRequest(2 * 1024, "root.parentA.childA2", "user2"); createSchedulingRequest(2 * 1024, "root.parentA.childA3", "user3"); scheduler.update(); // Verify if fair share is 100 / 3 = 33% for (int i = 1; i <= 3; i++) { assertEquals( 33, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA" + i, false).getFairShare() .getMemorySize() / nodeCapacity * 100, .9); } verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), nodeCapacity); }
@Test public void testAppAdditionAndRemoval() throws Exception { ApplicationAttemptId attemptId = createAppAttemptId(1, 1); AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(attemptId.getApplicationId(), "default", "user1"); scheduler.handle(appAddedEvent); AppAttemptAddedSchedulerEvent attemptAddedEvent = new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1), false); scheduler.handle(attemptAddedEvent); // Scheduler should have two queues (the default and the one created for // user1) assertEquals(2, scheduler.getQueueManager().getLeafQueues().size()); // That queue should have one app assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getNumRunnableApps()); AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent(createAppAttemptId(1, 1), RMAppAttemptState.FINISHED, false); // Now remove app scheduler.handle(appRemovedEvent1); // Queue should have no apps assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1", true) .getNumRunnableApps()); }
@Test public void testFairShareOneAppRunning() throws IOException { int nodeCapacity = 16 * 1024; createClusterWithQueuesAndOneNode(nodeCapacity, "fair"); // Run a app in a childA1. Verify whether fair share is 100% in childA1, // since it is the only active queue. // Also verify if fair share is 0 for childA2. since no app is // running in it. createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1"); scheduler.update(); assertEquals( 100, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA1", false).getFairShare() .getMemorySize() / nodeCapacity * 100, 0.1); assertEquals( 0, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA2", false).getFairShare() .getMemorySize() / nodeCapacity, 0.1); verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), nodeCapacity); }
@Test public void testFairShareOneAppRunning() throws IOException { int nodeCapacity = 16 * 1024; createClusterWithQueuesAndOneNode(nodeCapacity, "fair"); // Run a app in a childA1. Verify whether fair share is 100% in childA1, // since it is the only active queue. // Also verify if fair share is 0 for childA2. since no app is // running in it. createSchedulingRequest(2 * 1024, "root.parentA.childA1", "user1"); scheduler.update(); assertEquals( 100, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA1", false).getFairShare() .getMemory() / nodeCapacity * 100, 0.1); assertEquals( 0, (double) scheduler.getQueueManager() .getLeafQueue("root.parentA.childA2", false).getFairShare() .getMemory() / nodeCapacity, 0.1); verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), nodeCapacity); }
@Test public void testAppAdditionAndRemoval() throws Exception { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); ApplicationAttemptId attemptId =createAppAttemptId(1, 1); AppAddedSchedulerEvent appAddedEvent = new AppAddedSchedulerEvent(attemptId.getApplicationId(), "default", "user1"); scheduler.handle(appAddedEvent); AppAttemptAddedSchedulerEvent attemptAddedEvent = new AppAttemptAddedSchedulerEvent(createAppAttemptId(1, 1), false); scheduler.handle(attemptAddedEvent); // Scheduler should have two queues (the default and the one created for user1) assertEquals(2, scheduler.getQueueManager().getLeafQueues().size()); // That queue should have one app assertEquals(1, scheduler.getQueueManager().getLeafQueue("user1", true) .getNumRunnableApps()); AppAttemptRemovedSchedulerEvent appRemovedEvent1 = new AppAttemptRemovedSchedulerEvent( createAppAttemptId(1, 1), RMAppAttemptState.FINISHED, false); // Now remove app scheduler.handle(appRemovedEvent1); // Queue should have no apps assertEquals(0, scheduler.getQueueManager().getLeafQueue("user1", true) .getNumRunnableApps()); }
/ nodeCapacity * 100, .9); verifySteadyFairShareMemory(scheduler.getQueueManager().getLeafQueues(), nodeCapacity);
@Test public void testSimpleFairShareCalculation() throws IOException { scheduler.init(conf); scheduler.start(); scheduler.reinitialize(conf, resourceManager.getRMContext()); // Add one big node (only care about aggregate capacity) RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(10 * 1024), 1, "127.0.0.1"); NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); scheduler.handle(nodeEvent1); // Have two queues which want entire cluster capacity createSchedulingRequest(10 * 1024, "queue1", "user1"); createSchedulingRequest(10 * 1024, "queue2", "user1"); createSchedulingRequest(10 * 1024, "root.default", "user1"); scheduler.update(); scheduler.getQueueManager().getRootQueue() .setSteadyFairShare(scheduler.getClusterResource()); scheduler.getQueueManager().getRootQueue().recomputeSteadyShares(); Collection<FSLeafQueue> queues = scheduler.getQueueManager().getLeafQueues(); assertEquals(3, queues.size()); // Divided three ways - between the two queues and the default queue for (FSLeafQueue p : queues) { assertEquals(3414, p.getFairShare().getMemorySize()); assertEquals(3414, p.getMetrics().getFairShareMB()); assertEquals(3414, p.getSteadyFairShare().getMemorySize()); assertEquals(3414, p.getMetrics().getSteadyFairShareMB()); } }