/** * Returns map name in the form {@code "_jet.snapshot.<jobId>.<dataMapIndex>"}. */ public static String snapshotDataMapName(long jobId, int dataMapIndex) { return SNAPSHOT_DATA_MAP_PREFIX + idToString(jobId) + '.' + dataMapIndex; }
public static String jobNameAndExecutionId(String jobName, long executionId) { return "job '" + jobName + "', execution " + idToString(executionId); }
public static String jobIdAndExecutionId(long jobId, long executionId) { return "job " + idToString(jobId) + ", execution " + idToString(executionId); }
/** * Returns the string representation of this job's ID */ @Nonnull default String getIdString() { return Util.idToString(getId()); }
private void logIgnoredCompletion(@Nullable Throwable failure, JobStatus status) { if (failure != null) { logger.severe("Ignoring failure completion of " + idToString(jobId) + " because status is " + status, failure); } else { logger.severe("Ignoring completion of " + idToString(jobId) + " because status is " + status); } }
@Override public String toString() { return "JobSummary{" + "jobId=" + idToString(jobId) + ", executionId=" + idToString(executionId) + ", name='" + name + '\'' + ", status=" + status + ", submissionTime=" + toLocalTime(submissionTime) + ", completionTime=" + toLocalTime(completionTime) + ", failureText=" + failureText + '}'; } }
public CompletableFuture<Void> exportSnapshot(long jobId, String name, boolean cancelJob) { assertIsMaster("Cannot export snapshot for job " + idToString(jobId) + " from non-master node"); MasterContext masterContext = masterContexts.get(jobId); if (masterContext == null) { throw new JobNotFoundException("MasterContext not found to export snapshot of job " + idToString(jobId)); } return masterContext.exportSnapshot(name, cancelJob); }
public void resumeJob(long jobId) { assertIsMaster("Cannot resume job " + idToString(jobId) + " from non-master node"); MasterContext masterContext = masterContexts.get(jobId); if (masterContext == null) { throw new JobNotFoundException("MasterContext not found to resume job " + idToString(jobId)); } masterContext.resumeJob(jobRepository::newExecutionId); }
/** * Restarts a job for a new execution if the cluster is stable. * Otherwise, it reschedules the restart task. */ void restartJob(long jobId) { MasterContext masterContext = masterContexts.get(jobId); if (masterContext == null) { logger.severe("Master context for job " + idToString(jobId) + " not found to restart"); return; } tryStartJob(masterContext); }
@Nonnull public String getJobNameOrId() { return jobConfig.getName() != null ? jobConfig.getName() : idToString(jobId); }
private void logMissingExeCtx(long executionId) { if (logger.isFinestEnabled()) { logger.finest("Ignoring flow control message applying to non-existent execution context " + idToString(executionId)); } }
/** * Puts the given job record into the jobRecords map. * If another job record is already put, it checks if it has the same DAG. * If it has a different DAG, then the call fails with {@link IllegalStateException} */ void putNewJobRecord(JobRecord jobRecord) { long jobId = jobRecord.getJobId(); JobRecord prev = jobRecords.putIfAbsent(jobId, jobRecord); if (prev != null && !prev.getDag().equals(jobRecord.getDag())) { throw new IllegalStateException("Cannot put job record for job " + idToString(jobId) + " because it already exists with a different DAG"); } }
/** * Delete all snapshots for a given job. */ private void destroySnapshotDataMaps(long jobId) { instance.getMap(snapshotDataMapName(jobId, 0)).destroy(); instance.getMap(snapshotDataMapName(jobId, 1)).destroy(); logFine(logger, "Destroyed both snapshot maps for job %s", idToString(jobId)); }
void clearSnapshotData(long jobId, int dataMapIndex) { String mapName = snapshotDataMapName(jobId, dataMapIndex); try { instance.getMap(mapName).clear(); logFine(logger, "Cleared snapshot data map %s", mapName); } catch (Exception logged) { logger.warning("Cannot delete old snapshot data " + idToString(jobId), logged); } }
@Override public String toString() { return "JobRecord{" + "jobId=" + idToString(jobId) + ", name=" + getConfig().getName() + ", creationTime=" + toLocalDateTime(creationTime) + ", dagJson=" + dagJson + ", config=" + config + '}'; } }
@Override public String toString() { return "JobResult{" + "coordinatorUUID='" + coordinatorUUID + '\'' + ", jobId=" + idToString(jobId) + ", name=" + jobConfig.getName() + ", creationTime=" + toLocalDateTime(creationTime) + ", completionTime=" + toLocalDateTime(completionTime) + ", failureText=" + failureText + '}'; }
private static String formatJob(Job job) { return "id=" + idToString(job.getId()) + ", name=" + job.getName() + ", submissionTime=" + toLocalDateTime(job.getSubmissionTime()); }
@Override public void run() { ILogger logger = getLogger(); JetService service = getService(); Address callerAddress = getCallerAddress(); logger.fine("Completing execution " + idToString(executionId) + " from caller " + callerAddress + ", error=" + error); Address masterAddress = getNodeEngine().getMasterAddress(); if (!callerAddress.equals(masterAddress)) { throw new IllegalStateException("Caller " + callerAddress + " cannot complete execution " + idToString(executionId) + " because it is not master. Master is: " + masterAddress); } service.getJobExecutionService().completeExecution(executionId, error); }
/** * Schedules a restart task that will be run in future for the given job */ void scheduleRestart(long jobId) { MasterContext masterContext = masterContexts.get(jobId); if (masterContext == null) { logger.severe("Master context for job " + idToString(jobId) + " not found to schedule restart"); return; } logger.fine("Scheduling restart on master for job " + masterContext.jobName()); nodeEngine.getExecutionService().schedule(COORDINATOR_EXECUTOR_NAME, () -> restartJob(jobId), RETRY_DELAY_IN_MILLIS, MILLISECONDS); }
void updateQuorumSize(int newQuorumSize) { // This method can be called in parallel if multiple members are added. We don't synchronize here, // but the worst that can happen is that we write the JobRecord out unnecessarily. if (jobExecutionRecord.getQuorumSize() < newQuorumSize) { jobExecutionRecord.setLargerQuorumSize(newQuorumSize); writeJobExecutionRecord(false); logger.info("Current quorum size: " + jobExecutionRecord.getQuorumSize() + " of job " + idToString(jobRecord.getJobId()) + " is updated to: " + newQuorumSize); } }