/** * Assign a single executor to a slot, even if other things are in the slot. * @param ws the slot to assign it to. * @param exec the executor to assign. * @param td the topology for the executor. */ public void assignSingleExecutor(WorkerSlot ws, ExecutorDetails exec, TopologyDetails td) { if (!isAlive) { throw new IllegalStateException("Trying to adding to a dead node " + nodeId); } Collection<WorkerSlot> freeSlots = getFreeSlots(); Set<ExecutorDetails> toAssign = new HashSet<>(); toAssign.add(exec); if (!freeSlots.contains(ws)) { Map<String, Collection<ExecutorDetails>> usedSlots = topIdToUsedSlots.get(td.getId()); if (usedSlots == null) { throw new IllegalArgumentException( "Slot " + ws + " is not availble to schedue " + exec + " on"); } Collection<ExecutorDetails> alreadyHere = usedSlots.get(ws.getId()); if (alreadyHere == null) { throw new IllegalArgumentException( "Slot " + ws + " is not availble to schedue " + exec + " on"); } toAssign.addAll(alreadyHere); free(ws); } assign(ws, td, toAssign); }
/** * Log a bunch of stuff for debugging. */ private void logClusterInfo() { if (LOG.isDebugEnabled()) { LOG.debug("Cluster:"); for (Map.Entry<String, List<String>> clusterEntry : networkTopography.entrySet()) { String rackId = clusterEntry.getKey(); LOG.debug("Rack: {}", rackId); for (String nodeHostname : clusterEntry.getValue()) { for (RAS_Node node : hostnameToNodes(nodeHostname)) { LOG.debug("-> Node: {} {}", node.getHostname(), node.getId()); LOG.debug( "--> Avail Resources: {Mem {}, CPU {} Slots: {}}", node.getAvailableMemoryResources(), node.getAvailableCpuResources(), node.totalSlotsFree()); LOG.debug( "--> Total Resources: {Mem {}, CPU {} Slots: {}}", node.getTotalMemoryResources(), node.getTotalCpuResources(), node.totalSlots()); } } } } }
@Override public String toString() { return "{Node: " + ((sup == null) ? "null (possibly down)" : sup.getHost()) + ", Avail [ Mem: " + getAvailableMemoryResources() + ", CPU: " + getAvailableCpuResources() + ", Slots: " + this.getFreeSlots() + "] Total [ Mem: " + ((sup == null) ? "N/A" : this.getTotalMemoryResources()) + ", CPU: " + ((sup == null) ? "N/A" : this.getTotalCpuResources()) + ", Slots: " + this.slots.values() + " ]}"; }
/** * Free everything on the given slots. * * @param workerSlots the slots to free */ public void freeSlots(Collection<WorkerSlot> workerSlots) { for (RAS_Node node : nodeMap.values()) { for (WorkerSlot ws : node.getUsedSlots()) { if (workerSlots.contains(ws)) { LOG.debug("freeing ws {} on node {}", ws, node); node.free(ws); } } } }
assertEquals("r000s000", node.getId()); assertTrue(node.isAlive()); assertEquals(0, node.getRunningTopologies().size()); assertTrue(node.isTotallyFree()); assertEquals(4, node.totalSlotsFree()); assertEquals(0, node.totalSlotsUsed()); assertEquals(4, node.totalSlots()); node.assign(node.getFreeSlots().iterator().next(), topology1, executors11); assertEquals(1, node.getRunningTopologies().size()); assertFalse(node.isTotallyFree()); assertEquals(3, node.totalSlotsFree()); assertEquals(1, node.totalSlotsUsed()); assertEquals(4, node.totalSlots()); node.assign(node.getFreeSlots().iterator().next(), topology1, executors12); assertEquals(1, node.getRunningTopologies().size()); assertFalse(node.isTotallyFree()); assertEquals(2, node.totalSlotsFree()); assertEquals(2, node.totalSlotsUsed()); assertEquals(4, node.totalSlots()); node.assign(node.getFreeSlots().iterator().next(), topology2, executors21); assertEquals(2, node.getRunningTopologies().size()); assertFalse(node.isTotallyFree()); assertEquals(1, node.totalSlotsFree()); assertEquals(3, node.totalSlotsUsed()); assertEquals(4, node.totalSlots());
if (targetSlot != null) { RAS_Node targetNode = idToNode(targetSlot.getNodeId()); targetNode.assignSingleExecutor(targetSlot, exec, td); scheduledTasks.add(exec); LOG.debug( + "slot: {} on Rack: {}", exec, targetNode.getHostname(), targetNode.getAvailableMemoryResources(), targetNode.getAvailableCpuResources(), targetNode.getTotalMemoryResources(), targetNode.getTotalCpuResources(), targetSlot, nodeToRack(targetNode));
/** * Schedule executor exec from topology td * * @param exec the executor to schedule * @param td the topology executor exec is a part of * @param schedulerAssignmentMap the assignments already calculated * @param scheduledTasks executors that have been scheduled */ private void scheduleExecutor(ExecutorDetails exec, TopologyDetails td, Map<WorkerSlot, Collection<ExecutorDetails>> schedulerAssignmentMap, Collection<ExecutorDetails> scheduledTasks) { WorkerSlot targetSlot = this.findWorkerForExec(exec, td, schedulerAssignmentMap); if (targetSlot != null) { RAS_Node targetNode = this.idToNode(targetSlot.getNodeId()); if (!schedulerAssignmentMap.containsKey(targetSlot)) { schedulerAssignmentMap.put(targetSlot, new LinkedList<ExecutorDetails>()); } schedulerAssignmentMap.get(targetSlot).add(exec); targetNode.consumeResourcesforTask(exec, td); scheduledTasks.add(exec); LOG.debug("TASK {} assigned to Node: {} avail [ mem: {} cpu: {} ] total [ mem: {} cpu: {} ] on slot: {} on Rack: {}", exec, targetNode.getHostname(), targetNode.getAvailableMemoryResources(), targetNode.getAvailableCpuResources(), targetNode.getTotalMemoryResources(), targetNode.getTotalCpuResources(), targetSlot, nodeToRack(targetNode)); } else { LOG.error("Not Enough Resources to schedule Task {}", exec); } }
for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { Assert.assertEquals("assert worker scheduled on rack-0", "rack-0", resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname())); String nodeHostname = nodeHostnames.get(i % nodeHostnames.size()); RAS_Node node = rs.hostnameToNodes(nodeHostname).get(0); WorkerSlot targetSlot = node.getFreeSlots().iterator().next(); ExecutorDetails targetExec = executorIterator.next(); node.assign(targetSlot, topo2, Arrays.asList(targetExec)); for (WorkerSlot ws : assignment.getSlotToExecutors().keySet()) { Assert.assertEquals("assert worker scheduled on rack-1", "rack-1", resolvedSuperVisors.get(rs.idToNode(ws.getNodeId()).getHostname()));
targetNode.assign(targetSlot, td, execsNeedScheduling); td.getName(), execsNeedScheduling, targetNode.getHostname(), targetSlot.getPort()); targetNode.consumeResourcesforTask(exec, td); if (!nodesUsed.contains(targetNode.getId())) { nodesUsed.add(targetNode.getId());
WorkerSlot ws = entry.getKey(); WorkerResources wr = entry.getValue(); double memoryBefore = nodes.get(ws.getNodeId()).getAvailableMemoryResources(); double cpuBefore = nodes.get(ws.getNodeId()).getAvailableCpuResources(); double memoryUsedByWorker = wr.get_mem_on_heap() + wr.get_mem_off_heap(); assertEquals("Check if memory used by worker is calculated correctly", 1000.0, memoryUsedByWorker, 0.001); double cpuUsedByWorker = wr.get_cpu(); assertEquals("Check if CPU used by worker is calculated correctly", 100.0, cpuUsedByWorker, 0.001); nodes.get(ws.getNodeId()).free(ws); double memoryAfter = nodes.get(ws.getNodeId()).getAvailableMemoryResources(); double cpuAfter = nodes.get(ws.getNodeId()).getAvailableCpuResources(); assertEquals("Check if free correctly frees amount of memory", memoryBefore + memoryUsedByWorker, memoryAfter, 0.001); assertEquals("Check if free correctly frees amount of memory", cpuBefore + cpuUsedByWorker, cpuAfter, 0.001);
/** * Get the best worker to assign executor exec on a rack * * @param exec the executor to schedule * @param td the topology that the executor is a part of * @param rackId the rack id of the rack to find a worker on * @param scheduleAssignmentMap already calculated assignments * @return a worker to assign executor exec to. Returns null if a worker cannot be successfully found on rack with rackId */ private WorkerSlot getBestWorker(ExecutorDetails exec, TopologyDetails td, String rackId, Map<WorkerSlot, Collection<ExecutorDetails>> scheduleAssignmentMap) { if (!_rackIdToSortedNodes.containsKey(rackId)) { _rackIdToSortedNodes.put(rackId, sortNodes(this.getAvailableNodesFromRack(rackId), rackId, td.getId(), scheduleAssignmentMap)); } TreeSet<ObjectResources> sortedNodes = _rackIdToSortedNodes.get(rackId); double taskMem = td.getTotalMemReqTask(exec); double taskCPU = td.getTotalCpuReqTask(exec); for (ObjectResources nodeResources : sortedNodes) { RAS_Node n = _nodes.getNodeById(nodeResources.id); if (n.getAvailableCpuResources() >= taskCPU && n.getAvailableMemoryResources() >= taskMem && n.getFreeSlots().size() > 0) { for (WorkerSlot ws : n.getFreeSlots()) { if (checkWorkerConstraints(exec, ws, td, scheduleAssignmentMap)) { return ws; } } } } return null; }
@VisibleForTesting void prepare(Cluster cluster) { this.cluster = cluster; nodes = new RAS_Nodes(cluster); networkTopography = cluster.getNetworkTopography(); Map<String, String> hostToRack = new HashMap<>(); for (Map.Entry<String, List<String>> entry : networkTopography.entrySet()) { String rackId = entry.getKey(); for (String hostName: entry.getValue()) { hostToRack.put(hostName, rackId); } } for (RAS_Node node: nodes.getNodes()) { String superId = node.getId(); String hostName = node.getHostname(); String rackId = hostToRack.getOrDefault(hostName, DNSToSwitchMapping.DEFAULT_RACK); superIdToHostname.put(superId, hostName); superIdToRack.put(superId, rackId); hostnameToNodes.computeIfAbsent(hostName, (hn) -> new ArrayList<>()).add(node); rackIdToNodes.computeIfAbsent(rackId, (hn) -> new ArrayList<>()).add(node); } logClusterInfo(); }
/** * frees a single executor. * * @param exec is the executor to free * @param topo the topology the executor is a part of */ public void freeSingleExecutor(ExecutorDetails exec, TopologyDetails topo) { Map<String, Collection<ExecutorDetails>> usedSlots = topIdToUsedSlots.get(topo.getId()); if (usedSlots == null) { throw new IllegalArgumentException("Topology " + topo + " is not assigned"); } WorkerSlot ws = null; Set<ExecutorDetails> updatedAssignment = new HashSet<>(); for (Entry<String, Collection<ExecutorDetails>> entry : usedSlots.entrySet()) { if (entry.getValue().contains(exec)) { ws = slots.get(entry.getKey()); updatedAssignment.addAll(entry.getValue()); updatedAssignment.remove(exec); break; } } if (ws == null) { throw new IllegalArgumentException( "Executor " + exec + " is not assinged on this node to " + topo); } free(ws); if (!updatedAssignment.isEmpty()) { assign(ws, topo, updatedAssignment); } }
@Override public String toString() { return "{Node: " + ((_sup == null) ? "null (possibly down)" : _sup.getHost()) + ", Avail [ Mem: " + ((_availMemory == null) ? "N/A" : _availMemory.toString()) + ", CPU: " + ((_availCPU == null) ? "N/A" : _availCPU.toString()) + ", Slots: " + this.getFreeSlots() + "] Total [ Mem: " + ((_sup == null) ? "N/A" : this.getTotalMemoryResources()) + ", CPU: " + ((_sup == null) ? "N/A" : this.getTotalCpuResources()) + ", Slots: " + this._slots.values() + " ]}"; }
String superId = rasNode.getId(); ObjectResources node = new ObjectResources(superId); node.availableResources = rasNode.getTotalAvailableResources(); node.totalResources = rasNode.getTotalResources();
public int totalSlotsFree() { return getFreeSlots().size(); }
/** * Get the rack on which a node is a part of. * * @param node the node to find out which rack its on * @return the rack id */ protected String nodeToRack(RAS_Node node) { return superIdToRack.get(node.getId()); }
public static int countFreeSlotsAlive(Collection<RAS_Node> nodes) { int total = 0; for (RAS_Node n : nodes) { if (n.isAlive()) { total += n.totalSlotsFree(); } } return total; }