private void finalizeDatasetState(JobState.DatasetState datasetState, String datasetUrn) { for (TaskState taskState : datasetState.getTaskStates()) { // Backoff the actual high watermark to the low watermark for each task that has not been committed if (taskState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) { taskState.backoffActualHighWatermark(); if (this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // Determine the final dataset state based on the task states (post commit) and the job commit policy. // 1. If COMMIT_ON_FULL_SUCCESS is used, the processing of the dataset is considered failed if any // task for the dataset failed to be committed. // 2. Otherwise, the processing of the dataset is considered successful even if some tasks for the // dataset failed to be committed. datasetState.setState(JobState.RunningState.FAILED); Optional<String> taskStateException = taskState.getTaskFailureException(); log.warn("At least one task did not committed successfully. Setting dataset state to FAILED.", taskStateException.isPresent() ? taskStateException.get() : "Exception not set."); } } } datasetState.setId(datasetUrn); }
dataset2State.setId("dataset2"); TaskState taskState = new TaskState(); taskState.setJobId("job1_id2");
public void persistDatasetState(List<Dataset> datasets, List<LongWatermark> watermarks, String jobName) throws IOException { Preconditions.checkArgument(datasets.size() >= 2); for (int i = 0; i < datasets.size(); i++) { String datasetUrn = datasets.get(i).getUrn(); JobState.DatasetState datasetState = new JobState.DatasetState(jobName, TEST_JOB_ID); datasetState.setDatasetUrn(datasetUrn); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(datasetUrn); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); if (i < datasets.size() - 1) { taskState.setActualHighWatermark(watermarks.get(i)); } datasetState.addTaskState(taskState); this.fsDatasetStateStore.persistDatasetState(datasetUrn, datasetState); } }
private void finalizeDatasetState(JobState.DatasetState datasetState, String datasetUrn) { for (TaskState taskState : datasetState.getTaskStates()) { // Backoff the actual high watermark to the low watermark for each task that has not been committed if (taskState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) { taskState.backoffActualHighWatermark(); if (this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // Determine the final dataset state based on the task states (post commit) and the job commit policy. // 1. If COMMIT_ON_FULL_SUCCESS is used, the processing of the dataset is considered failed if any // task for the dataset failed to be committed. // 2. Otherwise, the processing of the dataset is considered successful even if some tasks for the // dataset failed to be committed. datasetState.setState(JobState.RunningState.FAILED); Optional<String> taskStateException = taskState.getTaskFailureException(); log.warn("At least one task did not committed successfully. Setting dataset state to FAILED.", taskStateException.isPresent() ? taskStateException.get() : "Exception not set."); } } } datasetState.setId(datasetUrn); }
@Test(dependsOnMethods = "testGetJobState") public void testPersistDatasetState() throws IOException { JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME, TEST_JOB_ID); datasetState.setDatasetUrn(TEST_DATASET_URN); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); for (int i = 0; i < 3; i++) { TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); datasetState.addTaskState(taskState); } zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN, datasetState); // persist a second dataset state to test that retrieval of multiple dataset states works datasetState.setDatasetUrn(TEST_DATASET_URN2); datasetState.setId(TEST_DATASET_URN2); datasetState.setDuration(2000); zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState); // second job name for testing getting store names in a later test case datasetState.setJobName(TEST_JOB_NAME2); zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState); }
@Test(dependsOnMethods = "testGetJobState") public void testPersistDatasetState() throws IOException { JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME, TEST_JOB_ID); datasetState.setDatasetUrn(TEST_DATASET_URN); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); for (int i = 0; i < 3; i++) { TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); datasetState.addTaskState(taskState); } this.fsDatasetStateStore.persistDatasetState(TEST_DATASET_URN, datasetState); }