private boolean isDecommissioned() { Collection<SingularitySlave> slaves = singularityClient.getSlaves(Optional.of(MachineState.DECOMMISSIONED)); boolean decommissioned = false; for (SingularitySlave slave : slaves) { if (slave.getHost().equals(hostname)) { decommissioned = true; } } return decommissioned; }
@JsonIgnore public SingularitySlave withResources(MesosResourcesObject resources) { return new SingularitySlave(getId(), getFirstSeenAt(), getCurrentState(), host, rackId, attributes, Optional.of(resources)); }
@Test public void testDeadSlavesArePurged() { SingularitySlave liveSlave = new SingularitySlave("1", "h1", "r1", ImmutableMap.of("uniqueAttribute", "1"), Optional.absent()); SingularitySlave deadSlave = new SingularitySlave("2", "h1", "r1", ImmutableMap.of("uniqueAttribute", "2"), Optional.absent()); final long now = System.currentTimeMillis(); liveSlave = liveSlave.changeState(new SingularityMachineStateHistoryUpdate("1", MachineState.ACTIVE, 100, Optional.absent(), Optional.absent())); deadSlave = deadSlave.changeState(new SingularityMachineStateHistoryUpdate("2", MachineState.DEAD, now - TimeUnit.HOURS.toMillis(10), Optional.absent(), Optional.absent())); slaveManager.saveObject(liveSlave); slaveManager.saveObject(deadSlave); slaveReconciliationPoller.runActionOnPoll(); Assert.assertEquals(1, slaveManager.getObjectsFiltered(MachineState.ACTIVE).size()); Assert.assertEquals(1, slaveManager.getObjectsFiltered(MachineState.DEAD).size()); configuration.setDeleteDeadSlavesAfterHours(1); slaveReconciliationPoller.runActionOnPoll(); Assert.assertEquals(1, slaveManager.getObjectsFiltered(MachineState.ACTIVE).size()); Assert.assertEquals(0, slaveManager.getObjectsFiltered(MachineState.DEAD).size()); }
@Override public SingularitySlave changeState(SingularityMachineStateHistoryUpdate newState) { return new SingularitySlave(getId(), getFirstSeenAt(), newState, host, rackId, attributes, resources); }
private void checkDeadSlaves() { final long start = System.currentTimeMillis(); final List<SingularitySlave> deadSlaves = slaveManager.getObjectsFiltered(MachineState.DEAD); if (deadSlaves.isEmpty()) { LOG.trace("No dead slaves"); return; } int deleted = 0; final long maxDuration = TimeUnit.HOURS.toMillis(configuration.getDeleteDeadSlavesAfterHours()); for (SingularitySlave deadSlave : slaveManager.getObjectsFiltered(MachineState.DEAD)) { final long duration = System.currentTimeMillis() - deadSlave.getCurrentState().getTimestamp(); if (duration > maxDuration) { SingularityDeleteResult result = slaveManager.deleteObject(deadSlave.getId()); deleted++; LOG.info("Removing dead slave {} ({}) after {} (max {})", deadSlave.getId(), result, JavaUtils.durationFromMillis(duration), JavaUtils.durationFromMillis(maxDuration)); } } LOG.debug("Checked {} dead slaves, deleted {} in {}", deadSlaves.size(), deleted, JavaUtils.duration(start)); }
public List<SingularitySlave> getSlavesToTrackUsageFor() { List<SingularitySlave> slaves = slaveManager.getObjects(); List<SingularitySlave> slavesToTrack = new ArrayList<>(slaves.size()); for (SingularitySlave slave : slaves) { if (slave.getCurrentState().getState().isInactive() || slave.getCurrentState().getState() == MachineState.DECOMMISSIONED) { continue; } slavesToTrack.add(slave); } return slavesToTrack; }
List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getSlaveResourceUsage(slave.getHost(), useShortTimeout); MesosSlaveMetricsSnapshotObject slaveMetricsSnapshot = mesosClient.getSlaveMetricsSnapshot(slave.getHost()); double systemMemTotalBytes = 0; double systemMemFreeBytes = 0; if (!slave.getResources().isPresent() || !slave.getResources().get().getMemoryMegaBytes().isPresent() || !slave.getResources().get().getNumCpus().isPresent()) { LOG.debug("Could not find slave or resources for slave {}", slave.getId()); } else { memoryMbTotal = Optional.of(slave.getResources().get().getMemoryMegaBytes().get().longValue()); cpusTotal = Optional.of(slave.getResources().get().getNumCpus().get().doubleValue()); diskMbTotal = Optional.of(slave.getResources().get().getDiskSpace().get()); List<Long> slaveTimestamps = usageManager.getSlaveUsageTimestamps(slave.getId()); if (slaveTimestamps.size() + 1 > configuration.getNumUsageToKeep()) { usageManager.deleteSpecificSlaveUsage(slave.getId(), slaveTimestamps.get(0)); LOG.debug("Saving slave {} usage {}", slave.getHost(), slaveUsage); usageManager.saveSpecificSlaveUsageAndSetCurrent(slave.getId(), slaveUsage); } catch (Throwable t) { String message = String.format("Could not get slave usage for host %s", slave.getHost()); LOG.error(message, t); exceptionNotifier.notify(message, t);
public void putSlave(SingularitySlave slave) { if (!active) { LOG.warn("putSlave {}, but not active", slave); } slaves.put(slave.getId(), slave); }
public CheckResult checkOffer(Offer offer) { final String slaveId = offer.getAgentId().getValue(); final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer); final String host = slaveAndRackHelper.getMaybeTruncatedHost(offer); final Map<String, String> textAttributes = slaveAndRackHelper.getTextAttributes(offer); final SingularitySlave slave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.absent()); CheckResult result = check(slave, slaveManager); if (result == CheckResult.NEW) { if (inactiveSlaveManager.isInactive(slave.getHost())) { LOG.info("Slave {} on inactive host {} attempted to rejoin. Marking as decommissioned.", slave, host); slaveManager.changeState(slave, MachineState.STARTING_DECOMMISSION, Optional.of(String.format("Slave %s on inactive host %s attempted to rejoin cluster.", slaveId, host)), Optional.absent()); } else { LOG.info("Offer revealed a new slave {}", slave); } } final SingularityRack rack = new SingularityRack(rackId); if (check(rack, rackManager) == CheckResult.NEW) { LOG.info("Offer revealed a new rack {}", rack); } return result; }
if (slave != null && (!slave.getResources().isPresent() || !slave.getResources().get().equals(slaveJsonObject.getResources()))) { LOG.trace("Found updated resources ({}) for slave {}", slaveJsonObject.getResources(), slave); slaveManager.saveObject(slave.withResources(slaveJsonObject.getResources())); SingularitySlave newSlave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.of(slaveJsonObject.getResources()));
void checkStateAfterFinishedTask(SingularityTaskId taskId, String slaveId, SingularityLeaderCache leaderCache) { Optional<SingularitySlave> slave = slaveManager.getSlave(slaveId); if (!slave.isPresent()) { final String message = String.format("Couldn't find slave with id %s for task %s", slaveId, taskId); LOG.warn(message); exceptionNotifier.notify(message, ImmutableMap.of("slaveId", slaveId, "taskId", taskId.toString())); return; } if (slave.get().getCurrentState().getState() == MachineState.DECOMMISSIONING) { if (!hasTaskLeftOnSlave(taskId, slaveId, leaderCache)) { slaveManager.changeState(slave.get(), MachineState.DECOMMISSIONED, slave.get().getCurrentState().getMessage(), slave.get().getCurrentState().getUser()); } } Optional<SingularityRack> rack = rackManager.getObject(slave.get().getRackId()); if (!rack.isPresent()) { final String message = String.format("Couldn't find rack with id %s for task %s", slave.get().getRackId(), taskId); LOG.warn(message); exceptionNotifier.notify(message, ImmutableMap.of("rackId", slave.get().getRackId(), "taskId", taskId.toString())); return; } if (rack.get().getCurrentState().getState() == MachineState.DECOMMISSIONING) { if (!hasTaskLeftOnRack(taskId, leaderCache)) { rackManager.changeState(rack.get(), MachineState.DECOMMISSIONED, rack.get().getCurrentState().getMessage(), rack.get().getCurrentState().getUser()); } } }
private long getNumInstancesWithAttribute(List<SingularityTaskId> taskIds, String attrKey, String attrValue) { return taskIds.stream() .map(id -> leaderCache.getSlave(taskManager.getTask(id).get().getMesosTask().getSlaveId().getValue()).get().getAttributes().get(attrKey)) .filter(Objects::nonNull) .filter(x -> x.equals(attrValue)) .count(); }
@Test public void testExpiringMachineState() { MesosMasterStateObject state = getMasterState(1); singularitySlaveAndRackManager.loadSlavesAndRacksFromMaster(state, true); SingularitySlave slave = slaveManager.getObjects().get(0); slaveResource.freezeSlave(singularityUser, slave.getId(), new SingularityMachineChangeRequest(Optional.of(1L), Optional.absent(), Optional.absent(), Optional.of(MachineState.ACTIVE), Optional.absent())); Assert.assertEquals(MachineState.FROZEN, slaveManager.getObjects().get(0).getCurrentState().getState()); expiringUserActionPoller.runActionOnPoll(); Assert.assertEquals(MachineState.ACTIVE, slaveManager.getObjects().get(0).getCurrentState().getState()); }
@Override public int getNumActive() { if (leaderCache.active()) { return Math.toIntExact(leaderCache.getSlaves().stream().filter(x -> x.getCurrentState().getState().equals(MachineState.ACTIVE)).count()); } return super.getNumActive(); }
public List<SingularityTask> getTasksOnSlave(Collection<SingularityTaskId> activeTaskIds, SingularitySlave slave) { final List<SingularityTask> tasks = Lists.newArrayList(); final String sanitizedHost = JavaUtils.getReplaceHyphensWithUnderscores(slave.getHost()); for (SingularityTaskId activeTaskId : activeTaskIds) { if (activeTaskId.getSanitizedHost().equals(sanitizedHost)) { Optional<SingularityTask> maybeTask = getTask(activeTaskId); if (maybeTask.isPresent() && slave.getId().equals(maybeTask.get().getAgentId().getValue())) { tasks.add(maybeTask.get()); } } } return tasks; }
List<MesosTaskMonitorObject> allTaskUsage = mesosClient.getSlaveResourceUsage(slave.getHost(), useShortTimeout); MesosSlaveMetricsSnapshotObject slaveMetricsSnapshot = mesosClient.getSlaveMetricsSnapshot(slave.getHost()); double systemMemTotalBytes = 0; double systemMemFreeBytes = 0; if (!slave.getResources().isPresent() || !slave.getResources().get().getMemoryMegaBytes().isPresent() || !slave.getResources().get().getNumCpus().isPresent()) { LOG.debug("Could not find slave or resources for slave {}", slave.getId()); } else { memoryMbTotal = Optional.of(slave.getResources().get().getMemoryMegaBytes().get().longValue()); cpusTotal = Optional.of(slave.getResources().get().getNumCpus().get().doubleValue()); diskMbTotal = Optional.of(slave.getResources().get().getDiskSpace().get()); List<Long> slaveTimestamps = usageManager.getSlaveUsageTimestamps(slave.getId()); if (slaveTimestamps.size() + 1 > configuration.getNumUsageToKeep()) { usageManager.deleteSpecificSlaveUsage(slave.getId(), slaveTimestamps.get(0)); LOG.debug("Saving slave {} usage {}", slave.getHost(), slaveUsage); usageManager.saveSpecificSlaveUsageAndSetCurrent(slave.getId(), slaveUsage); } catch (Throwable t) { String message = String.format("Could not get slave usage for host %s", slave.getHost()); LOG.error(message, t); exceptionNotifier.notify(message, t);
public Set<String> getSlaveIdsToTrackUsageFor() { List<SingularitySlave> slaves = getSlavesToTrackUsageFor(); Set<String> slaveIds = new HashSet<>(slaves.size()); for (SingularitySlave slave : slaves) { slaveIds.add(slave.getId()); } return slaveIds; }
public CheckResult checkOffer(Offer offer) { final String slaveId = offer.getAgentId().getValue(); final String rackId = slaveAndRackHelper.getRackIdOrDefault(offer); final String host = slaveAndRackHelper.getMaybeTruncatedHost(offer); final Map<String, String> textAttributes = slaveAndRackHelper.getTextAttributes(offer); final SingularitySlave slave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.absent()); CheckResult result = check(slave, slaveManager); if (result == CheckResult.NEW) { if (inactiveSlaveManager.isInactive(slave.getHost())) { LOG.info("Slave {} on inactive host {} attempted to rejoin. Marking as decommissioned.", slave, host); slaveManager.changeState(slave, MachineState.STARTING_DECOMMISSION, Optional.of(String.format("Slave %s on inactive host %s attempted to rejoin cluster.", slaveId, host)), Optional.absent()); } else { LOG.info("Offer revealed a new slave {}", slave); } } final SingularityRack rack = new SingularityRack(rackId); if (check(rack, rackManager) == CheckResult.NEW) { LOG.info("Offer revealed a new rack {}", rack); } return result; }
if (slave != null && (!slave.getResources().isPresent() || !slave.getResources().get().equals(slaveJsonObject.getResources()))) { LOG.trace("Found updated resources ({}) for slave {}", slaveJsonObject.getResources(), slave); slaveManager.saveObject(slave.withResources(slaveJsonObject.getResources())); SingularitySlave newSlave = new SingularitySlave(slaveId, host, rackId, textAttributes, Optional.of(slaveJsonObject.getResources()));