/** * Returns whether or not service1 depends on service2 in some way for runtime actions, either directly or indirectly. * * @param service1 Service to check dependency for. * @param service2 Service to check dependency on. * @return True if service 1 depends on service 2 directly or indirectly, false if not. */ public boolean runtimeDependsOn(String service1, String service2) { return doesDependOn(service1, service2, runtimeServiceDependencies); }
private Set<String> expandServices(Set<String> services, ClusterAction action) { switch (action) { case START_SERVICES: return expandStartServices(services); case STOP_SERVICES: case RESTART_SERVICES: return expandStopServices(services); default: return services; } }
/** * Sets the status of the given job to {@link ClusterJob.Status#FAILED} and the status of the cluster to the default * failure status as given in {@link co.cask.coopr.scheduler.ClusterAction#getFailureStatus()}. * * @param job Job to fail. * @param cluster Cluster to set the status for. * @throws IOException */ public void failJobAndSetClusterStatus(ClusterJob job, Cluster cluster) throws IOException, IllegalAccessException { failJobAndSetClusterStatus(job, cluster, job.getClusterAction().getFailureStatus(), null); }
/** * Get the rollback task that should run if the given task fails. * * @param task Task that needs to get rolled back. * @return Cluster task that will roll back the given failed task. */ private ClusterTask getRollbackTask(ClusterTask task) { ProvisionerAction rollback = actions.getRollbackActions().get(task.getTaskName()); if (rollback == null) { return null; } TaskId rollbackTaskId = idService.getNewTaskId(JobId.fromString(task.getJobId())); ClusterTask rollbackTask = new ClusterTask(rollback, rollbackTaskId, task.getNodeId(), task.getService(), task.getClusterAction(), task.getClusterTemplateName(), task.getAccount()); return rollbackTask; }
private void testCallbacks(boolean failJob) throws Exception { ClusterScheduler clusterScheduler = injector.getInstance(ClusterScheduler.class); String tenantId = cluster.getAccount().getTenantId(); clusterQueues.add(tenantId, new Element(cluster.getId(), ClusterAction.CLUSTER_CREATE.name())); clusterScheduler.run(); CallbackScheduler callbackScheduler = injector.getInstance(CallbackScheduler.class); // should be no job in the queue until the start callback runs Assert.assertEquals(0, jobQueues.size(tenantId)); waitForCallback(callbackScheduler); Assert.assertEquals(CallbackData.Type.START, mockClusterCallback.getReceivedCallbacks().get(0).getType()); JobScheduler jobScheduler = injector.getInstance(JobScheduler.class); jobScheduler.run(); // take tasks until there are no more TakeTaskRequest takeRequest = new TakeTaskRequest("consumer1", PROVISIONER_ID, tenantId); SchedulableTask task = TestHelper.takeTask(getInternalServerUrl(), takeRequest); while (task != null) { FinishTaskRequest finishRequest = new FinishTaskRequest("consumer1", PROVISIONER_ID, tenantId, task.getTaskId(), null, null, failJob ? 1 : 0, null, null, null); TestHelper.finishTask(getInternalServerUrl(), finishRequest); jobScheduler.run(); jobScheduler.run(); task = TestHelper.takeTask(getInternalServerUrl(), takeRequest); } jobScheduler.run(); waitForCallback(callbackScheduler); // at this point, the failure callback should have run Assert.assertEquals(failJob ? CallbackData.Type.FAILURE : CallbackData.Type.SUCCESS, mockClusterCallback.getReceivedCallbacks().get(1).getType()); }
public ServiceDependencyResolver(Actions actions, Map<String, Service> clusterServices) { this.clusterServices = ImmutableMap.copyOf(clusterServices); this.providesIndex = ImmutableSetMultimap.copyOf(getProvidesIndex()); this.installServiceDependencies = minimizeDependencies( new Function<Service, Set<String>>() { @Override this.runtimeServiceDependencies = minimizeDependencies( new Function<Service, Set<String>>() { @Override this.reversedInstallServiceDependencies = reverseDependencies(installServiceDependencies); this.reversedRuntimeServiceDependencies = reverseDependencies(runtimeServiceDependencies); this.clusterDependencies = HashMultimap.create(); Set<Actions.Dependency> serviceActionDependencies = actions.getActionDependencies(); clusterDependencies.putAll(new ActionOnService(actionDependency.getTo(), service.getName()), getDirectActionDependencies(service, actionDependency));
@Test(timeout = 20000) public void testFalseOnStartStopsJob() throws Exception { String tenantId = "q"; ClusterScheduler clusterScheduler = injector.getInstance(ClusterScheduler.class); clusterQueues.add(tenantId, new Element(cluster.getId(), ClusterAction.CLUSTER_CREATE.name())); clusterScheduler.run(); CallbackScheduler callbackScheduler = injector.getInstance(CallbackScheduler.class); // should be no job in the queue until the start callback runs Assert.assertEquals(0, jobQueues.size(tenantId)); // tell mock callback to return false for onStart callback mockClusterCallback.setReturnOnStart(false); // wait for start callback to finish waitForCallback(callbackScheduler); Assert.assertEquals(CallbackData.Type.START, mockClusterCallback.getReceivedCallbacks().get(0).getType()); // wait for fail callback to finish if (mockClusterCallback.getReceivedCallbacks().size() < 2) { waitForCallback(callbackScheduler); } Assert.assertEquals(CallbackData.Type.FAILURE, mockClusterCallback.getReceivedCallbacks().get(1).getType()); // there also should not be any jobs in the queue Assert.assertEquals(0, jobQueues.size(tenantId)); }
public JobPlanner(ClusterJob job, Set<Node> clusterNodes) { this.clusterAction = job.getClusterAction(); this.nodesToPlan = job.getPlannedNodes(); this.serviceNodeMap = ArrayListMultimap.create(); this.serviceMap = Maps.newHashMap(); this.nodeMap = Maps.newHashMap(); for (Node node : clusterNodes) { for (Service service : node.getServices()) { serviceNodeMap.put(service.getName(), node); serviceMap.put(service.getName(), service); } nodeMap.put(node.getId(), node); } this.dependencyResolver = new ServiceDependencyResolver(actions, serviceMap); if (job.getPlannedServices() != null) { this.servicesToPlan = ImmutableSet.copyOf(expandServices(job.getPlannedServices(), clusterAction)); } else { this.servicesToPlan = null; } }
/** * Creates a DAG (directed acyclic graph) of tasks to execute in order to perform the cluster job. * * @return Task dag for the cluster operation. */ TaskDag createTaskDag() { long start = System.currentTimeMillis(); TaskDag taskDag = new TaskDag(); List<ProvisionerAction> actionOrder = actions.getActionOrder().get(clusterAction); for (Node node : nodeMap.values()) { if (!shouldPlanNode(node.getId())) { continue; } for (Service service : node.getServices()) { if (!shouldPlanService(service.getName())) { continue; } addDependencies(taskDag, actionOrder, service, node); } } long dur = System.currentTimeMillis() - start; LOG.debug("took {} ms to create action plan.", dur); return taskDag; }
@Test public void testOnlyCorrectClustersAreCleaned() throws Exception { long now = System.currentTimeMillis(); for (int i = 0; i < 20; i++) { createCluster(String.valueOf(i), now - 1000, now - 100, Cluster.Status.ACTIVE); } String queueName = account.getTenantId(); ClusterCleanup clusterCleanup = new ClusterCleanup(clusterStore, clusterService, nodeService, taskService, jobQueues, provisionerQueues, -10, 3, 7); Assert.assertEquals(0, Iterators.size(clusterQueues.getQueued(queueName))); clusterCleanup.run(); // clusters 3, 10, and 17 should have been scheduled for deletion Assert.assertEquals(3, Iterators.size(clusterQueues.getQueued(queueName))); Element e1 = clusterQueues.take(queueName, "consumer1"); Element e2 = clusterQueues.take(queueName, "consumer1"); Element e3 = clusterQueues.take(queueName, "consumer1"); Assert.assertEquals(ImmutableSet.of("3", "10", "17"), ImmutableSet.of(e1.getId(), e2.getId(), e3.getId())); Assert.assertEquals(ClusterAction.CLUSTER_DELETE.name(), e1.getValue()); Assert.assertEquals(ClusterAction.CLUSTER_DELETE.name(), e2.getValue()); Assert.assertEquals(ClusterAction.CLUSTER_DELETE.name(), e3.getValue()); }
/** * Create a plan of tasks to be executed in order to perform the cluster operation. Each item in the list represents * a stage of tasks that can be performed. All tasks in a stage may be run in parallel, but every task in a stage * must be successfully completed before moving on to the next stage. * * @return Plan of tasks to be executed in order to perform a cluster operation. */ public List<Set<TaskNode>> linearizeDependentTasks() { TaskDag taskDag = createTaskDag(); long start = System.currentTimeMillis(); List<Set<TaskNode>> linearizedTasks = taskDag.linearize(); long dur = System.currentTimeMillis() - start; LOG.debug("took {} ms to linearize action plan.", dur); return linearizedTasks; }
@Test public void testDedupNodesPerStage() throws Exception { List<Set<ClusterTask>> tasks = ImmutableList.of( createSortedSet(createClusterTask("INSTALL", "1-1-1", "host1"), createClusterTask("CONFIGURE", "1-1-2", "host1"), createClusterTask("INSTALL", "1-1-3", "host3")), createSortedSet(createClusterTask("INSTALL", "1-1-12", "host1"), createClusterTask("INSTALL", "1-1-22", "host2"), createClusterTask("INSTALL", "1-1-32", "host3")), createSortedSet(createClusterTask("INSTALL", "1-1-13", "host1"), createClusterTask("CONFIGURE", "1-1-22", "host1"), createClusterTask("INSTALL", "1-1-32", "host3")) ); List<Set<ClusterTask>> actual = JobPlanner.deDupNodePerStage(tasks); List<Set<ClusterTask>> expected = ImmutableList.of( createSortedSet(createClusterTask("INSTALL", "1-1-1", "host1"), createClusterTask("INSTALL", "1-1-3", "host3")), createSortedSet(createClusterTask("CONFIGURE", "1-1-2", "host1")), createSortedSet(createClusterTask("INSTALL", "1-1-12", "host1"), createClusterTask("INSTALL", "1-1-22", "host2"), createClusterTask("INSTALL", "1-1-32", "host3")), createSortedSet(createClusterTask("INSTALL", "1-1-13", "host1"), createClusterTask("INSTALL", "1-1-32", "host3")), createSortedSet(createClusterTask("CONFIGURE", "1-1-22", "host1")) ); Assert.assertEquals(expected, actual); }
ClusterAction.SOLVE_LAYOUT, 0, 0); solverScheduler.run(); clusterScheduler.run(); jobScheduler.run(); // run scheduler put in queue jobScheduler.run(); // run scheduler take from queue assertResponseStatus(response, HttpResponseStatus.OK); clusterScheduler.run(); jobScheduler.run(); // run scheduler put in queue jobScheduler.run(); // run scheduler take from queue
/** * Given a service and action, return a set of {@link ActionOnService} describing what other actions on services must * be performed before the given service and action can be performed. * * @param service Service to check. * @param action Action to check. * @return Set of actions on services that must be performed before the input can be performed. */ public Set<ActionOnService> getDirectDependentActions(String service, ProvisionerAction action) { return clusterDependencies.get(new ActionOnService(action, service)); }
@Override public void run() { try { long currentTime = System.currentTimeMillis(); for (String queueName : provisionerQueues.getQueueNames()) { timeoutTasks(queueName, currentTime); } expireClusters(currentTime); } catch (Throwable e) { LOG.error("Got exception: ", e); } }
private void waitForCallback(CallbackScheduler callbackScheduler) throws InterruptedException { int initialSize = mockClusterCallback.getReceivedCallbacks().size(); int size = initialSize; callbackScheduler.run(); while (size == initialSize) { size = mockClusterCallback.getReceivedCallbacks().size(); TimeUnit.MILLISECONDS.sleep(20); } }
@Test(timeout = 20000) public void testSuccessCallbacks() throws Exception { testCallbacks(false); }
private Set<String> expandStopServices(Set<String> services) { Set<String> expandedServices = Sets.newHashSet(services); // if svc A depends on svc B and we're asked to restart svc B, we actually need to restart both svc A and svc B. // similarly, if svc A depends on svc B and we're asked to stop svc B, we actually need to stop both svc A and B. Set<String> additionalServicesToStop = Sets.newHashSet(); do { additionalServicesToStop.clear(); for (String otherService : Sets.difference(serviceMap.keySet(), expandedServices)) { for (String expandedService : expandedServices) { // if the other service depends on the expanded service, we need to add it to the list of services to stop. // ex: otherService=A and expandedService=B, A depends on B, and B is being stopped/restarted if (dependencyResolver.runtimeDependsOn(otherService, expandedService)) { additionalServicesToStop.add(otherService); } } } expandedServices.addAll(additionalServicesToStop); } while (!additionalServicesToStop.isEmpty()); return expandedServices; }
@Test(timeout = 20000) public void testFailureCallbacks() throws Exception { testCallbacks(true); }
private Set<String> expandStartServices(Set<String> services) { Set<String> expandedServices = Sets.newHashSet(services); // if svc A depends on svc B and we're asked to start svc A, we need to start svc B first. Set<String> additionalServicesToStart = Sets.newHashSet(); do { additionalServicesToStart.clear(); for (String otherService : Sets.difference(serviceMap.keySet(), expandedServices)) { for (String expandedService : expandedServices) { // if the other service is one the expanded service depends on, // we need to add it to the list of services to start. // ex: other=A, expanded=B, A depends on B, and A is being started if (dependencyResolver.runtimeDependsOn(expandedService, otherService)) { additionalServicesToStart.add(otherService); } } } expandedServices.addAll(additionalServicesToStart); } while (!additionalServicesToStart.isEmpty()); return expandedServices; } }