solverJob.setJobStatus(ClusterJob.Status.COMPLETE); clusterStore.writeClusterJob(solverJob); ClusterJob createJob = new ClusterJob(clusterJobId, ClusterAction.CLUSTER_CREATE); cluster.setLatestJobId(createJob.getJobId()); clusterStore.writeClusterJob(createJob);
try { ClusterJob job = clusterStore.getClusterJob(jobId); Cluster cluster = clusterStore.getCluster(job.getClusterId()); if (job.getJobStatus() == ClusterJob.Status.PAUSED) { continue; Set<String> currentStage = job.getCurrentStage(); boolean jobFailed = job.getJobStatus() == ClusterJob.Status.FAILED; int completedTasks = 0; int inProgressTasks = 0; LOG.debug("Verifying task statuses for stage {} for job {}", job.getCurrentStageNumber(), jobIdStr); for (String taskId : currentStage) { ClusterTask task = clusterStore.getClusterTask(TaskId.fromString(taskId)); job.setTaskStatus(task.getTaskId(), task.getStatus()); LOG.debug("Status of task {} is {}", taskId, task.getStatus()); if (task.getStatus() == ClusterTask.Status.COMPLETE) { Set<Node> clusterNodes = clusterStore.getClusterNodes(job.getClusterId()); Map<String, Node> nodeMap = Maps.newHashMap(); for (Node node : clusterNodes) { if (job.hasNextStage()) { LOG.debug("Advancing to next stage {} for job {}", job.getCurrentStageNumber(), job.getJobId()); job.advanceStage(); jobQueues.add(queueName, new Element(jobIdStr)); } else {
public ClusterJobProgress(ClusterJob job) { this.action = job.getClusterAction(); this.actionstatus = job.getJobStatus(); this.stepstotal = job.getTaskStatus().size(); int completedTasks = 0; for (ClusterTask.Status taskStatus : job.getTaskStatus().values()) { if (taskStatus == ClusterTask.Status.COMPLETE) { completedTasks++; } } this.stepscompleted = completedTasks; }
public JobPlanner(ClusterJob job, Set<Node> clusterNodes) { this.clusterAction = job.getClusterAction(); this.nodesToPlan = job.getPlannedNodes(); this.serviceNodeMap = ArrayListMultimap.create(); this.serviceMap = Maps.newHashMap(); this.nodeMap = Maps.newHashMap(); for (Node node : clusterNodes) { for (Service service : node.getServices()) { serviceNodeMap.put(service.getName(), node); serviceMap.put(service.getName(), service); } nodeMap.put(node.getId(), node); } this.dependencyResolver = new ServiceDependencyResolver(actions, serviceMap); if (job.getPlannedServices() != null) { this.servicesToPlan = ImmutableSet.copyOf(expandServices(job.getPlannedServices(), clusterAction)); } else { this.servicesToPlan = null; } }
private JsonObject formatJobPlan(ClusterJob job) throws IOException { JsonObject jobJson = new JsonObject(); jobJson.addProperty("id", job.getJobId()); jobJson.addProperty("clusterId", job.getClusterId()); jobJson.addProperty("action", job.getClusterAction().name()); jobJson.addProperty("currentStage", job.getCurrentStageNumber()); JsonArray stagesJson = new JsonArray(); for (Set<String> stage : job.getStagedTasks()) { JsonArray stageJson = new JsonArray(); for (String taskId : stage) { ClusterTask task = clusterStore.getClusterTask(TaskId.fromString(taskId)); JsonObject taskJson = new JsonObject(); taskJson.addProperty("id", task.getTaskId()); taskJson.addProperty("taskName", task.getTaskName().name()); taskJson.addProperty("nodeId", task.getNodeId()); taskJson.addProperty("service", task.getService()); stageJson.add(taskJson); } stagesJson.add(stageJson); } jobJson.add("stages", stagesJson); return jobJson; }
if (clusterJob.getJobStatus() != ClusterJob.Status.PAUSED) { return; clusterJob.setJobStatus(ClusterJob.Status.RUNNING); clusterJob.setStatusMessage("Resumed by user."); clusterStore.writeClusterJob(clusterJob); jobQueues.add(account.getTenantId(), new Element(clusterJob.getJobId())); } finally { lock.unlock();
private TakeTaskRequest getRequest() throws IOException { String tenantId = USER1_ACCOUNT.getTenantId(); ClusterTask clusterTask = new ClusterTask( ProvisionerAction.CREATE, TaskId.fromString("1-1-1"), "node_id", "service", ClusterAction.CLUSTER_CREATE, "test", USER1_ACCOUNT); clusterStore.writeClusterTask(clusterTask); ClusterJob clusterJob = new ClusterJob(JobId.fromString("1-1"), ClusterAction.CLUSTER_CREATE); clusterStore.writeClusterJob(clusterJob); TaskConfig taskConfig = new TaskConfig( NodeProperties.builder().build(), Entities.ProviderExample.JOYENT, ImmutableMap.<String, NodeProperties>of(), new TaskServiceAction("svcA", new ServiceAction("shell", ImmutableMap.<String, String>of())), new JsonObject(), new JsonObject() ); SchedulableTask schedulableTask= new SchedulableTask(clusterTask, taskConfig); provisionerQueues.add(tenantId, new Element(clusterTask.getTaskId(), gson.toJson(schedulableTask))); return new TakeTaskRequest("worker1", PROVISIONER_ID, TENANT_ID); }
/** * Sets the status of the given job to {@link ClusterJob.Status#COMPLETE} and the status of the given cluster to * {@link co.cask.coopr.cluster.Cluster.Status#ACTIVE}. * * @param job Job to complete. * @param cluster Cluster the job was for. * @throws IOException */ public void completeJob(ClusterJob job, Cluster cluster) throws IOException, IllegalAccessException { job.setJobStatus(ClusterJob.Status.COMPLETE); clusterStore.writeClusterJob(job); LOG.debug("Job {} is complete", job.getJobId()); // Update cluster status if (job.getClusterAction() == ClusterAction.CLUSTER_DELETE) { cluster.setStatus(Cluster.Status.TERMINATED); } else { cluster.setStatus(Cluster.Status.ACTIVE); } clusterStore.writeCluster(cluster); serverStats.getSuccessfulClusterStats().incrementStat(job.getClusterAction()); if (job.getClusterAction() == ClusterAction.CLUSTER_DELETE) { wipeSensitiveFields(cluster); } callbackQueues.add(cluster.getAccount().getTenantId(), new Element(gson.toJson(new CallbackData(CallbackData.Type.SUCCESS, cluster, job)))); }
prepareClusterForOperation(cluster, clusterCreateRequest); JobId clusterJobId = idService.getNewJobId(cluster.getId()); ClusterJob clusterJob = new ClusterJob(clusterJobId, ClusterAction.SOLVE_LAYOUT); cluster.setLatestJobId(clusterJob.getJobId());
while (true) { Multiset<ActionService> actionServices = HashMultiset.create(); for (String taskId : job.getCurrentStage()) { ClusterTask task = clusterStore.getClusterTask(TaskId.fromString(taskId)); actionServices.add(new ActionService(task.getTaskName().name(), task.getService())); if (!job.hasNextStage()) { break; job.advanceStage();
/** * Sets the status of the given job to {@link ClusterJob.Status#FAILED} and the status of the cluster to some given * status. * * @param job Job to fail. * @param cluster Cluster to set the status for. * @param status Status to set the cluster to. * @param message Error message. * @throws IOException * @throws IllegalAccessException */ public void failJobAndSetClusterStatus(ClusterJob job, Cluster cluster, Cluster.Status status, String message) throws IOException, IllegalAccessException { cluster.setStatus(status); clusterStore.writeCluster(cluster); job.setJobStatus(ClusterJob.Status.FAILED); if (message != null) { job.setStatusMessage(message); } clusterStore.writeClusterJob(job); serverStats.getFailedClusterStats().incrementStat(job.getClusterAction()); callbackQueues.add(cluster.getAccount().getTenantId(), new Element(gson.toJson(new CallbackData(CallbackData.Type.FAILURE, cluster, job)))); }
Cluster cluster = getCluster(clusterId, account); JobId deleteJobId = idService.getNewJobId(clusterId); ClusterJob deleteJob = new ClusterJob(deleteJobId, ClusterAction.CLUSTER_DELETE); deleteJob.setJobStatus(ClusterJob.Status.RUNNING); cluster.setLatestJobId(deleteJobId.getId()); cluster.setStatus(Cluster.Status.PENDING);
if (clusterJob.getJobStatus() == ClusterJob.Status.COMPLETE || clusterJob.getJobStatus() == ClusterJob.Status.FAILED) { return; clusterJob.setJobStatus(ClusterJob.Status.PAUSED); clusterJob.setStatusMessage("Paused by user."); clusterStore.writeClusterJob(clusterJob); } finally {
private void onStart(CallbackData callbackData, CallbackContext callbackContext) { ClusterJob job = callbackData.getJob(); Cluster cluster = callbackData.getCluster(); try { if (clusterCallback.onStart(callbackData, callbackContext)) { String jobId = callbackData.getJob().getJobId(); jobQueues.add(gElement.getQueueName(), new Element(jobId)); LOG.debug("added job {} to job queue", jobId); } else { switch (job.getClusterAction()) { case CLUSTER_CREATE: taskService.failJobAndTerminateCluster(job, cluster, "Cluster creation stopped by failed start callback."); break; default: // failed to plan means the job should fail, but state has already been changed so the cluster // state in the db is inconsistent with reality. // TODO: Should revert it here but need versioning or cluster history or something to that effect. taskService.failJobAndSetClusterStatus( job, cluster, Cluster.Status.INCONSISTENT, "Failed to schedule the " + job.getClusterAction() + " operation."); break; } } } catch (Exception e) { LOG.error("Exception failing job {} for cluster {}", job.getJobId(), cluster.getId(), e); } } }
/** * Sets the status of the given job to {@link ClusterJob.Status#RUNNING} and add it to the queue to be run. * * @param job Job to start. * @param cluster Cluster the job is for. * @throws IOException */ public void startJob(ClusterJob job, Cluster cluster) throws IOException { // TODO: wrap in a transaction LOG.debug("Starting job {} for cluster {}", job.getJobId(), cluster.getId()); job.setJobStatus(ClusterJob.Status.RUNNING); // Note: writing job status as RUNNING, will allow other operations on the job // (like cancel, etc.) to happen in parallel. clusterStore.writeClusterJob(job); callbackQueues.add(cluster.getAccount().getTenantId(), new Element(gson.toJson(new CallbackData(CallbackData.Type.START, cluster, job)))); }
LOG.error("Exception while expanding macros for task {}", task.getTaskId(), e); taskService.failTask(task, -1); job.setStatusMessage("Exception while expanding macros: " + e.getMessage()); jobQueues.add(queueName, new Element(job.getJobId())); break; queueName, new Element(task.getTaskId(), gson.toJson(schedulableTask))); job.setTaskStatus(task.getTaskId(), ClusterTask.Status.IN_PROGRESS); taskService.startTask(task);
/** * Sets the status of the given job to {@link ClusterJob.Status#FAILED} and the status of the cluster to the default * failure status as given in {@link co.cask.coopr.scheduler.ClusterAction#getFailureStatus()}. * * @param job Job to fail. * @param cluster Cluster to set the status for. * @throws IOException */ public void failJobAndSetClusterStatus(ClusterJob job, Cluster cluster) throws IOException, IllegalAccessException { failJobAndSetClusterStatus(job, cluster, job.getClusterAction().getFailureStatus(), null); }
Set currentStage = job.getCurrentStage(); job.setJobStatus(ClusterJob.Status.PAUSED); clusterStore.writeClusterJob(job); Set newStage = job.getCurrentStage(); job.setJobStatus(ClusterJob.Status.RUNNING); clusterStore.writeClusterJob(job); jobQueues.add(tenantId, new Element(jobId)); jobScheduler.run(); job = clusterStore.getClusterJob(JobId.fromString(jobId)); newStage = job.getCurrentStage();
ClusterTask scheduleRetry(ClusterJob job, ClusterTask task) throws Exception { task.addAttempt(); List<ClusterTask> retryTasks = taskService.getRetryTask(task); if (retryTasks.size() == 1) { LOG.trace("Only one retry task for job {} for task {}", job, task); return retryTasks.get(0); } // store all retry tasks for (ClusterTask t : retryTasks) { clusterStore.writeClusterTask(t); } // Remove self from current stage job.getCurrentStage().remove(task.getTaskId()); // Add first retry task to current stage job.getCurrentStage().add(retryTasks.get(0).getTaskId()); // Add the rest of retry tasks after current stage. TODO: this needs to be revisited. job.insertTasksAfterCurrentStage(ImmutableList.copyOf(Iterables.transform(Iterables.skip(retryTasks, 1), CLUSTER_TASK_STRING_FUNCTION))); LOG.trace("Retry job {} for task {}", job, task); return retryTasks.get(0); }
@Override public void writeClusterJob(ClusterJob clusterJob) throws IOException { JobId jobId = JobId.fromString(clusterJob.getJobId()); long clusterId = Long.parseLong(jobId.getClusterId()); try { Connection conn = dbConnectionPool.getConnection(); try { byte[] jobBytes = dbQueryExecutor.toBytes(clusterJob, ClusterJob.class); DBPut jobPut = new ClusterJobDBPut(clusterJob, jobBytes, jobId, clusterId); jobPut.executePut(conn); } finally { conn.close(); } } catch (SQLException e) { throw new IOException(e); } }