public void removeOldTasks() { DateTime oldestAllowedTask = DateTime.now().minus(infoCacheTime.toMillis()); for (TaskInfo taskInfo : filter(transform(tasks.asMap().values(), SqlTask::getTaskInfo), notNull())) { try { DateTime endTime = taskInfo.getStats().getEndTime(); if (endTime != null && endTime.isBefore(oldestAllowedTask)) { tasks.asMap().remove(taskInfo.getTaskId()); } } catch (RuntimeException e) { log.warn(e, "Error while inspecting age of complete task %s", taskInfo.getTaskId()); } } }
public synchronized void addExchangeLocations(PlanFragmentId fragmentId, Set<URI> exchangeLocations, boolean noMoreExchangeLocations) { requireNonNull(fragmentId, "fragmentId is null"); requireNonNull(exchangeLocations, "exchangeLocations is null"); RemoteSourceNode remoteSource = exchangeSources.get(fragmentId); checkArgument(remoteSource != null, "Unknown remote source %s. Known sources are %s", fragmentId, exchangeSources.keySet()); this.exchangeLocations.putAll(remoteSource.getId(), exchangeLocations); for (RemoteTask task : getAllTasks()) { ImmutableMultimap.Builder<PlanNodeId, Split> newSplits = ImmutableMultimap.builder(); for (URI exchangeLocation : exchangeLocations) { newSplits.put(remoteSource.getId(), createRemoteSplitFor(task.getTaskInfo().getTaskId(), exchangeLocation)); } task.addSplits(newSplits.build()); } if (noMoreExchangeLocations) { completeSourceFragments.add(fragmentId); // is the source now complete? if (completeSourceFragments.containsAll(remoteSource.getSourceFragmentIds())) { completeSources.add(remoteSource.getId()); for (RemoteTask task : getAllTasks()) { task.noMoreSplits(remoteSource.getId()); } } } }
public void failAbandonedTasks() { DateTime now = DateTime.now(); DateTime oldestAllowedHeartbeat = now.minus(clientTimeout.toMillis()); for (SqlTask sqlTask : tasks.asMap().values()) { try { TaskInfo taskInfo = sqlTask.getTaskInfo(); if (taskInfo.getState().isDone()) { continue; } DateTime lastHeartbeat = taskInfo.getLastHeartbeat(); if (lastHeartbeat != null && lastHeartbeat.isBefore(oldestAllowedHeartbeat)) { log.info("Failing abandoned task %s", taskInfo.getTaskId()); sqlTask.failed(new AbandonedException("Task " + taskInfo.getTaskId(), lastHeartbeat, now)); } } catch (RuntimeException e) { log.warn(e, "Error while inspecting age of task %s", sqlTask.getTaskId()); } } }
finishedTasks.add(taskInfo.getTaskId());
sqlTaskManager.addStateChangeListener(taskInfo.getTaskId(), newState -> { if (newState.isDone()) { countDownLatch.countDown();
String failureTask = task.map(x -> x.getTaskId().toString()).orElse(null);
private synchronized void updateExchangeClient(StageInfo outputStage) { // add any additional output locations if (!outputStage.getState().isDone()) { for (TaskInfo taskInfo : outputStage.getTasks()) { SharedBufferInfo outputBuffers = taskInfo.getOutputBuffers(); List<BufferInfo> buffers = outputBuffers.getBuffers(); if (buffers.isEmpty() || outputBuffers.getState().canAddBuffers()) { // output buffer has not been created yet continue; } Preconditions.checkState(buffers.size() == 1, "Expected a single output buffer for task %s, but found %s", taskInfo.getTaskId(), buffers); TaskId bufferId = Iterables.getOnlyElement(buffers).getBufferId(); URI uri = uriBuilderFrom(taskInfo.getSelf()).appendPath("results").appendPath(bufferId.toString()).build(); exchangeClient.addLocation(uri); } } if (allOutputBuffersCreated(outputStage)) { exchangeClient.noMoreLocations(); } }
@Test public void testRemoveOldTasks() throws Exception { try (SqlTaskManager sqlTaskManager = createSqlTaskManager(new TaskManagerConfig().setInfoMaxAge(new Duration(5, TimeUnit.MILLISECONDS)))) { TaskId taskId = TASK_ID; TaskInfo taskInfo = sqlTaskManager.updateTask(TEST_SESSION, taskId, Optional.of(PLAN_FRAGMENT), ImmutableList.<TaskSource>of(), INITIAL_EMPTY_OUTPUT_BUFFERS); assertEquals(taskInfo.getState(), TaskState.RUNNING); taskInfo = sqlTaskManager.cancelTask(taskId); assertEquals(taskInfo.getState(), TaskState.CANCELED); taskInfo = sqlTaskManager.getTaskInfo(taskId); assertEquals(taskInfo.getState(), TaskState.CANCELED); Thread.sleep(100); sqlTaskManager.removeOldTasks(); for (TaskInfo info : sqlTaskManager.getAllTaskInfo()) { assertNotEquals(info.getTaskId(), taskId); } } }
/** * Move the task directly to the failed state */ private void failTask(Throwable cause) { TaskInfo taskInfo = getTaskInfo(); if (!taskInfo.getState().isDone()) { log.debug(cause, "Remote task failed: %s", taskInfo.getSelf()); } updateTaskInfo(new TaskInfo(taskInfo.getTaskId(), taskInfo.getTaskInstanceId(), TaskInfo.MAX_VERSION, TaskState.FAILED, taskInfo.getSelf(), taskInfo.getLastHeartbeat(), taskInfo.getOutputBuffers(), taskInfo.getNoMoreSplits(), taskInfo.getStats(), ImmutableList.of(toFailure(cause)), taskInfo.isNeedsPlan())); }
URI uri = taskInfo.getSelf(); updateTaskInfo(new TaskInfo(taskInfo.getTaskId(), taskInfo.getTaskInstanceId(), TaskInfo.MAX_VERSION,