/** * Get the rollback task that should run if the given task fails. * * @param task Task that needs to get rolled back. * @return Cluster task that will roll back the given failed task. */ private ClusterTask getRollbackTask(ClusterTask task) { ProvisionerAction rollback = actions.getRollbackActions().get(task.getTaskName()); if (rollback == null) { return null; } TaskId rollbackTaskId = idService.getNewTaskId(JobId.fromString(task.getJobId())); ClusterTask rollbackTask = new ClusterTask(rollback, rollbackTaskId, task.getNodeId(), task.getService(), task.getClusterAction(), task.getClusterTemplateName(), task.getAccount()); return rollbackTask; }
@Override public PreparedStatement createUpdateStatement(Connection conn) throws SQLException { PreparedStatement statement = conn.prepareStatement( "UPDATE tasks SET task=?, status=?, submit_time=?, status_time=?, type=?, " + "cluster_template_name=?, user_id=?, tenant_id=?" + " WHERE task_num=? AND job_num=? AND cluster_id=?"); statement.setBytes(1, dbQueryExecutor.toBytes(clusterTask, ClusterTask.class)); statement.setString(2, clusterTask.getStatus().name()); statement.setTimestamp(3, DBHelper.getTimestamp(clusterTask.getSubmitTime())); statement.setTimestamp(4, DBHelper.getTimestamp(clusterTask.getStatusTime())); statement.setString(5, clusterTask.getTaskName().name()); statement.setString(6, clusterTask.getClusterTemplateName()); statement.setString(7, clusterTask.getAccount().getUserId()); statement.setString(8, clusterTask.getAccount().getTenantId()); statement.setLong(9, taskId.getTaskNum()); statement.setLong(10, taskId.getJobNum()); statement.setLong(11, clusterId); return statement; }
void startNodeAction(ClusterTask clusterTask) throws IOException { // Update node properties if task is associated with a nodeId. // There are cases when we don't associate a nodeId with a task so that the node properties don't get overridden // by the task output. // Eg. deleting a box during a rollback operation since we reuse nodeIds. if (clusterTask.getNodeId() != null) { Node node = clusterStore.getNode(clusterTask.getNodeId()); if (node == null) { LOG.error("Cannot find node {} for task {} to update the properties", clusterTask.getNodeId(), clusterTask.getTaskId()); } else { nodeService.startAction(node, clusterTask.getTaskId(), clusterTask.getService(), clusterTask.getTaskName().name()); } } }
public SchedulableTask(ClusterTask clusterTask, TaskConfig config) { this.taskId = clusterTask.getTaskId(); this.jobId = clusterTask.getJobId(); this.clusterId = clusterTask.getClusterId(); this.taskName = clusterTask.getTaskName().name(); this.nodeId = clusterTask.getNodeId(); this.config = config; }
private long getDeleteTaskTime(List<ClusterTask> deleteTasks, ClusterTask createTask) { ClusterTask result = null; for (ClusterTask deleteTask : deleteTasks) { if (createTask.getClusterId().equals(deleteTask.getClusterId()) && createTask.getNodeId().equals(deleteTask.getNodeId())) { if (result == null) { result = deleteTask; } else if (deleteTask.getStatusTime() < result.getStatusTime()) { result = deleteTask; } } } if (result == null) { return System.currentTimeMillis(); } return result.getStatusTime(); }
clusterStore.writeNode(node); ClusterTask clusterTask = new ClusterTask( ProvisionerAction.CREATE, TaskId.fromString("1-1-1"), node.getId(), "service", ClusterAction.CLUSTER_CREATE, "test", USER1_ACCOUNT); ); SchedulableTask schedulableTask= new SchedulableTask(clusterTask, taskConfig); provisionerQueues.add(tenantId, new Element(clusterTask.getTaskId(), gson.toJson(schedulableTask))); Assert.assertEquals(clusterTask.getTaskId(), task.getTaskId()); new FinishTaskRequest("worker1", PROVISIONER_ID, tenantId, clusterTask.getTaskId(), "some stdout", "some stderr", 0, null, null, provisionerResult); TestHelper.finishTask(getBaseUrlInternalAPI(), finishRequest); ClusterTask actualTask = clusterStore.getClusterTask(TaskId.fromString(clusterTask.getTaskId())); Assert.assertEquals(ClusterTask.Status.COMPLETE, actualTask.getStatus()); Node actualNode = clusterStore.getNode(clusterTask.getNodeId()); Assert.assertNotNull(actualNode); Node.Action lastAction = actualNode.getActions().get(actualNode.getActions().size() - 1);
@Test public void noPermissionsTest() throws Exception { long submitTimeTask6 = 1; long statusTimeTask6 = 2; clusterStore.writeClusterTask(CLUSTER_TASK6); CLUSTER_TASK6.setStatus(ClusterTask.Status.COMPLETE); CLUSTER_TASK6.setSubmitTime(submitTimeTask6); CLUSTER_TASK6.setStatusTime(statusTimeTask6); clusterStore.writeClusterTask(CLUSTER_TASK6); HttpResponse response = doGetExternalAPI("/metrics/nodes/usage?start=5&end=22&tenant=tenant2", USER1_HEADERS); assertResponseStatus(response, HttpResponseStatus.METHOD_NOT_ALLOWED); }
/** * Complete a task by setting the status of the task to {@link ClusterTask.Status#COMPLETE} and the status time to * the current timestamp and the status code to the given code. * * @param clusterTask Task to complete. * @param status Status code of the completed task. * @throws IOException */ public void completeTask(ClusterTask clusterTask, int status) throws IOException { clusterTask.setStatus(ClusterTask.Status.COMPLETE); clusterTask.setStatusCode(status); clusterTask.setStatusTime(System.currentTimeMillis()); clusterStore.writeClusterTask(clusterTask); // update stats serverStats.getSuccessfulProvisionerStats().incrementStat(clusterTask.getTaskName()); }
ClusterTask task1 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("2-1-1"), "node1", "service", ClusterAction.CLUSTER_CREATE, "test", account); ClusterTask task2 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("2-2-2"), "node2", "service", ClusterAction.CLUSTER_CREATE, "test", account); ClusterTask task3 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("2-3-3"), "node3", "service", ClusterAction.CLUSTER_CREATE, "test", account); ClusterTask task4 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("2-4-4"), "node3", "service", ClusterAction.CLUSTER_CREATE, "test", account); task1.setStatus(ClusterTask.Status.IN_PROGRESS); task2.setStatus(ClusterTask.Status.IN_PROGRESS); task3.setStatus(ClusterTask.Status.IN_PROGRESS); task4.setStatus(ClusterTask.Status.IN_PROGRESS); nodeService.startAction(node1, task1.getTaskId(), "service", "action"); Assert.assertEquals(Node.Status.IN_PROGRESS, node1.getActions().get(0).getStatus()); nodeService.startAction(node2, task2.getTaskId(), "service", "action"); Assert.assertEquals(Node.Status.IN_PROGRESS, node2.getActions().get(0).getStatus()); nodeService.startAction(node3, task3.getTaskId(), "service", "action"); Assert.assertEquals(Node.Status.IN_PROGRESS, node3.getActions().get(0).getStatus()); nodeService.startAction(node4, task4.getTaskId(), "service", "action"); Assert.assertEquals(Node.Status.IN_PROGRESS, node4.getActions().get(0).getStatus()); provisionerQueues.add(queueName, new Element(task1.getTaskId(), "")); provisionerQueues.add(queueName, new Element(task2.getTaskId(), "")); provisionerQueues.add(queueName, new Element(task3.getTaskId(), ""));
/** * Starts a task by setting the status of the task to {@link ClusterTask.Status#IN_PROGRESS} and the submit time * to the current timestamp. * * @param clusterTask Task to start. * @throws IOException */ public void startTask(ClusterTask clusterTask) throws IOException { clusterTask.setStatus(ClusterTask.Status.IN_PROGRESS); clusterTask.setSubmitTime(System.currentTimeMillis()); clusterStore.writeClusterTask(clusterTask); // Update stats serverStats.getProvisionerStats().incrementStat(clusterTask.getTaskName()); }
/** * Drop a task by setting the status of the task to {@link ClusterTask.Status#DROPPED} and the status time to the * current timestamp. Tasks can be dropped if there is no longer any point in executing them. For example, if another * task in the same stage has failed, the entire job cannot complete so there is no point in executing any * unexecuted task in the job. * * @param clusterTask Task to drop. * @throws IOException */ public void dropTask(ClusterTask clusterTask) throws IOException { clusterTask.setStatus(ClusterTask.Status.DROPPED); clusterTask.setStatusTime(System.currentTimeMillis()); clusterStore.writeClusterTask(clusterTask); // Update stats serverStats.getDroppedProvisionerStats().incrementStat(clusterTask.getTaskName()); }
if (clusterTask.getNodeId() != null) { Node node = clusterStore.getNode(clusterTask.getNodeId()); if (node == null) { LOG.error("Cannot find node {} for task {} to update the properties", clusterTask.getNodeId(), clusterTask.getTaskId()); } else { if (clusterTask.getStatus() == ClusterTask.Status.COMPLETE) { Map<String, String> ipAddresses = finish.getIpaddresses(); if (ipAddresses != null) {
if (provisionerQueues.remove(queueName, task.getTaskId())) { LOG.debug("Timing out task {} whose queue time is {}", task.getTaskId(), queuedElement.getStatusTime()); task.setStatusMessage(statusMessage); taskService.failTask(task, -1); Node node = clusterStore.getNode(task.getNodeId()); nodeService.failAction(node, "", statusMessage); jobQueues.add(queueName, new Element(task.getJobId()));
@Test public void testGetRunningTasks() throws Exception { ClusterTask task1 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("1-1-1"), "node1", "service", ClusterAction.CLUSTER_CREATE, "test", new Account("testUser", "testTenant")); ClusterTask task2 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("1-1-2"), "node2", "service", ClusterAction.CLUSTER_CREATE, "test", new Account("testUser", "testTenant")); ClusterTask task3 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("1-1-3"), "node3", "service", ClusterAction.CLUSTER_CREATE, "test", new Account("testUser", "testTenant")); ClusterTask task4 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("1-1-4"), "node4", "service", ClusterAction.CLUSTER_CREATE, "test", new Account("testUser", "testTenant")); ClusterTask task5 = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("1-1-5"), "node5", "service", ClusterAction.CLUSTER_CREATE, "test", new Account("testUser", "testTenant")); task1.setSubmitTime(currentTime - 1000); task1.setStatus(ClusterTask.Status.IN_PROGRESS); task2.setSubmitTime(currentTime - 1000); task2.setStatus(ClusterTask.Status.IN_PROGRESS); task3.setSubmitTime(currentTime - 200); task3.setStatus(ClusterTask.Status.IN_PROGRESS); task4.setSubmitTime(currentTime - 200); task4.setStatus(ClusterTask.Status.IN_PROGRESS); task5.setSubmitTime(currentTime - 1000); task5.setStatus(ClusterTask.Status.NOT_SUBMITTED);
@Test public void testQueuedTaskMissingFromStoreIsRemovedFromQueue() { ClusterCleanup clusterCleanup = new ClusterCleanup(clusterStore, clusterService, nodeService, taskService, jobQueues, provisionerQueues, -10, 1, 1); String queueName = account.getTenantId(); ClusterTask task = new ClusterTask(ProvisionerAction.CREATE, TaskId.fromString("3-1-1"), "node1", "service", ClusterAction.CLUSTER_CREATE, "test", account); task.setStatus(ClusterTask.Status.IN_PROGRESS); Cluster cluster = Entities.ClusterExample.createCluster(); TaskConfig taskConfig = TaskConfig.from(cluster, Entities.ClusterExample.NODE1, Entities.ServiceExample.NAMENODE, cluster.getConfig(), ProvisionerAction.START, null); SchedulableTask schedulableTask = new SchedulableTask(task, taskConfig); // add a task to the queue without storing it.x provisionerQueues.add(queueName, new Element(task.getTaskId(), gson.toJson(schedulableTask))); provisionerQueues.takeIterator("0").next(); clusterCleanup.run(); Assert.assertEquals(0, Iterators.size(provisionerQueues.getBeingConsumed(queueName))); }
@Test public void testTakeTask() throws Exception { String tenantId = USER1_ACCOUNT.getTenantId(); ClusterTask clusterTask = new ClusterTask( ProvisionerAction.CREATE, TaskId.fromString("1-1-1"), "node_id", "service", ClusterAction.CLUSTER_CREATE, "test", USER1_ACCOUNT); clusterStore.writeClusterTask(clusterTask); ClusterJob clusterJob = new ClusterJob(JobId.fromString("1-1"), ClusterAction.CLUSTER_CREATE); clusterStore.writeClusterJob(clusterJob); TaskConfig taskConfig = new TaskConfig( NodeProperties.builder().build(), Entities.ProviderExample.JOYENT, ImmutableMap.<String, NodeProperties>of(), new TaskServiceAction("svcA", new ServiceAction("shell", ImmutableMap.<String, String>of())), new JsonObject(), new JsonObject() ); SchedulableTask schedulableTask= new SchedulableTask(clusterTask, taskConfig); provisionerQueues.add(tenantId, new Element(clusterTask.getTaskId(), gson.toJson(schedulableTask))); TakeTaskRequest takeRequest = new TakeTaskRequest("worker1", PROVISIONER_ID, TENANT_ID); HttpResponse response = doPostInternalAPI("/tasks/take", gson.toJson(takeRequest)); assertResponseStatus(response, HttpResponseStatus.OK); JsonObject responseJson = getResponseJson(response); Assert.assertEquals(clusterTask.getTaskId(), responseJson.get("taskId").getAsString()); }
for (String taskId : currentStage) { ClusterTask task = clusterStore.getClusterTask(TaskId.fromString(taskId)); job.setTaskStatus(task.getTaskId(), task.getStatus()); LOG.debug("Status of task {} is {}", taskId, task.getStatus()); if (task.getStatus() == ClusterTask.Status.COMPLETE) { ++completedTasks; } else if (task.getStatus() == ClusterTask.Status.NOT_SUBMITTED) { notSubmittedTasks.add(task); } else if (task.getStatus() == ClusterTask.Status.FAILED) { if (task.getNumAttempts() < maxTaskRetries) { retryTasks.add(task); } else { jobFailed = true; } else if (task.getStatus() == ClusterTask.Status.IN_PROGRESS) { ++inProgressTasks;
for (String taskId : job.getCurrentStage()) { ClusterTask task = clusterStore.getClusterTask(TaskId.fromString(taskId)); actionServices.add(new ActionService(task.getTaskName().name(), task.getService()));
ClusterTask scheduleRetry(ClusterJob job, ClusterTask task) throws Exception { task.addAttempt(); List<ClusterTask> retryTasks = taskService.getRetryTask(task); if (retryTasks.size() == 1) { LOG.trace("Only one retry task for job {} for task {}", job, task); return retryTasks.get(0); } // store all retry tasks for (ClusterTask t : retryTasks) { clusterStore.writeClusterTask(t); } // Remove self from current stage job.getCurrentStage().remove(task.getTaskId()); // Add first retry task to current stage job.getCurrentStage().add(retryTasks.get(0).getTaskId()); // Add the rest of retry tasks after current stage. TODO: this needs to be revisited. job.insertTasksAfterCurrentStage(ImmutableList.copyOf(Iterables.transform(Iterables.skip(retryTasks, 1), CLUSTER_TASK_STRING_FUNCTION))); LOG.trace("Retry job {} for task {}", job, task); return retryTasks.get(0); }
@Override public String apply(ClusterTask clusterTask) { return clusterTask.getTaskId(); } };