/** * Sets the status of the given job to {@link ClusterJob.Status#FAILED} and the status of the given cluster to * {@link co.cask.coopr.cluster.Cluster.Status#TERMINATED}. * * @param job Job to fail. * @param cluster Cluster to terminate. * @param message Error message. * @throws IOException */ public void failJobAndTerminateCluster(ClusterJob job, Cluster cluster, String message) throws IOException, IllegalAccessException { failJobAndSetClusterStatus(job, cluster, Cluster.Status.TERMINATED, message); }
ClusterTask scheduleRetry(ClusterJob job, ClusterTask task) throws Exception { task.addAttempt(); List<ClusterTask> retryTasks = taskService.getRetryTask(task); if (retryTasks.size() == 1) { LOG.trace("Only one retry task for job {} for task {}", job, task); return retryTasks.get(0); } // store all retry tasks for (ClusterTask t : retryTasks) { clusterStore.writeClusterTask(t); } // Remove self from current stage job.getCurrentStage().remove(task.getTaskId()); // Add first retry task to current stage job.getCurrentStage().add(retryTasks.get(0).getTaskId()); // Add the rest of retry tasks after current stage. TODO: this needs to be revisited. job.insertTasksAfterCurrentStage(ImmutableList.copyOf(Iterables.transform(Iterables.skip(retryTasks, 1), CLUSTER_TASK_STRING_FUNCTION))); LOG.trace("Retry job {} for task {}", job, task); return retryTasks.get(0); }
jobQueues.add(queueName, new Element(jobIdStr)); } else { taskService.completeJob(job, cluster); message = "Unable to create nodes, please check your provider settings"; taskService.failJobAndTerminateCluster(job, cluster, message); } else { taskService.failJobAndSetClusterStatus(job, cluster); taskService.failJob(job);
job.addStage(Sets.newHashSet(Iterables.transform(stageTasks, CLUSTER_TASK_STRING_FUNCTION))); taskService.startJob(job, cluster); switch (clusterAction) { case CLUSTER_CREATE: taskService.failJobAndTerminateCluster(job, cluster, "Failed to schedule the action"); break; default: taskService.failJobAndSetClusterStatus(job, cluster, Cluster.Status.INCONSISTENT, "Failed to schedule the " + clusterAction + " operation."); break;
switch(solverRequest.getType()) { case CREATE_CLUSTER: taskService.failJobAndTerminateCluster(toFailJob, cluster, "Exception while solving layout."); break; case ADD_SERVICES: taskService.failJobAndSetClusterStatus(toFailJob, cluster, clusterStatus, "Exception while solving layout."); break;
if (status == 0) { LOG.debug("Successful finish of the task reported. Task {} by worker {}", taskId, workerId); taskService.completeTask(clusterTask, status); } else { LOG.debug("Failure to finish task reported. Task {} by worker {}", taskId, workerId); taskService.failTask(clusterTask, status);
ClusterJob job = new ClusterJob(jobId, action); cluster.setLatestJobId(jobId.getId()); taskService.completeJob(job, cluster); jobNum++; Assert.assertEquals(sensitiveFields, credentialStore.get(account.getTenantId(), clusterId)); ClusterJob job = new ClusterJob(jobId, ClusterAction.CLUSTER_DELETE); cluster.setLatestJobId(jobId.getId()); taskService.completeJob(job, cluster); Assert.assertTrue(credentialStore.get(account.getTenantId(), clusterId).isEmpty());
ClusterTask rollbackTask = getRollbackTask(task); if (rollbackTask != null) { retryTasks.add(rollbackTask);
taskService.failTask(task, -1);
taskService.failJobAndTerminateCluster(solverJob, cluster, errorMessage); return "Unable to solve layout";
TrackingQueue.ConsumingStatus.FINISHED_SUCCESSFULLY, "Skipped due to job failure."); taskService.dropTask(clusterTask); jobQueues.add(tenantId, new Element(clusterTask.getJobId())); clusterTask = null;
private void onStart(CallbackData callbackData, CallbackContext callbackContext) { ClusterJob job = callbackData.getJob(); Cluster cluster = callbackData.getCluster(); try { if (clusterCallback.onStart(callbackData, callbackContext)) { String jobId = callbackData.getJob().getJobId(); jobQueues.add(gElement.getQueueName(), new Element(jobId)); LOG.debug("added job {} to job queue", jobId); } else { switch (job.getClusterAction()) { case CLUSTER_CREATE: taskService.failJobAndTerminateCluster(job, cluster, "Cluster creation stopped by failed start callback."); break; default: // failed to plan means the job should fail, but state has already been changed so the cluster // state in the db is inconsistent with reality. // TODO: Should revert it here but need versioning or cluster history or something to that effect. taskService.failJobAndSetClusterStatus( job, cluster, Cluster.Status.INCONSISTENT, "Failed to schedule the " + job.getClusterAction() + " operation."); break; } } } catch (Exception e) { LOG.error("Exception failing job {} for cluster {}", job.getJobId(), cluster.getId(), e); } } }
/** * Sets the status of the given job to {@link ClusterJob.Status#FAILED} and the status of the cluster to the default * failure status as given in {@link co.cask.coopr.scheduler.ClusterAction#getFailureStatus()}. * * @param job Job to fail. * @param cluster Cluster to set the status for. * @throws IOException */ public void failJobAndSetClusterStatus(ClusterJob job, Cluster cluster) throws IOException, IllegalAccessException { failJobAndSetClusterStatus(job, cluster, job.getClusterAction().getFailureStatus(), null); }