@Override public void flush(TaskAttemptExecutionAPIEntity entity) { Map<String, Object> fields = new HashMap<>(entity.getTags()); fields.put("startTime", entity.getStartTime()); fields.put("endTime", entity.getEndTime()); fields.put("taskStatus", entity.getTaskStatus()); if (!fields.containsKey(MRJobTagName.ERROR_CATEGORY.toString())) { fields.put("errorCategory", ""); } collector.collect(stormStreamId, new ValuesArray(fields.get(MRJobTagName.TASK_ATTEMPT_ID.toString()), fields)); } }
/** * The default index size is 16. * * @param attempt * @return minimal sort memory */ private long getMinimumIOSortMemory(TaskAttemptExecutionAPIEntity attempt) { long records = attempt.getJobCounters().getCounterValue(MAP_OUTPUT_RECORDS); long outputBytes = attempt.getJobCounters().getCounterValue(MAP_OUTPUT_BYTES); return outputBytes + records * 16; }
String taskType = getTaskType(attempt); if (Constants.TaskType.MAP.toString().equalsIgnoreCase(taskType)) { long mapTime = attempt.getEndTime() - attempt.getStartTime(); avgMapTimeInSec += mapTime; if (firstMap == null || firstMap.getStartTime() > attempt.getStartTime()) { firstMap = attempt; if (lastMap == null || lastMap.getEndTime() < attempt.getEndTime()) { lastMap = attempt; if (worstMap == null || (worstMap.getEndTime() - worstMap.getStartTime()) < mapTime) { worstMap = attempt; long shuffleTime = attempt.getShuffleFinishTime() - attempt.getStartTime(); avgShuffleTimeInSec += shuffleTime; if (firstShuffle == null || firstShuffle.getStartTime() > attempt.getStartTime()) { firstShuffle = attempt; if (lastShuffle == null || lastShuffle.getShuffleFinishTime() < attempt.getShuffleFinishTime()) { lastShuffle = attempt; if (worstShuffle == null || (worstShuffle.getShuffleFinishTime() - worstShuffle.getStartTime()) < shuffleTime) { worstShuffle = attempt; long reduceTime = attempt.getEndTime() - attempt.getShuffleFinishTime(); avgReduceTimeInSec += reduceTime; if (firstReduce == null || firstReduce.getStartTime() > attempt.getStartTime()) { firstReduce = attempt;
taskAttemptStartTime.put(taskAttemptID, Long.valueOf(startTime)); } else if ((recType == RecordTypes.MapAttempt || recType == RecordTypes.ReduceAttempt) && finishTime != null) { // task attempt finish TaskAttemptExecutionAPIEntity entity = new TaskAttemptExecutionAPIEntity(); Map<String, String> taskAttemptExecutionTags = new HashMap<>(taskBaseTags); entity.setTags(taskAttemptExecutionTags); String hostname = values.get(Keys.HOSTNAME); String rack = values.get(Keys.RACK); entity.setStartTime(taskAttemptStartTime.get(taskAttemptID)); entity.setEndTime(Long.valueOf(finishTime)); entity.setTimestamp(jobLaunchTime); entity.setDuration(entity.getEndTime() - entity.getStartTime()); entity.setTaskStatus(values.get(Keys.TASK_STATUS)); entity.setError(values.get(Keys.ERROR)); if (values.containsKey(Keys.SHUFFLE_FINISHED)) { entity.setShuffleFinishTime(Long.valueOf(values.get(Keys.SHUFFLE_FINISHED))); entity.setSortFinishTime(Long.valueOf(values.get(Keys.SORT_FINISHED))); entity.setMapFinishTime(Long.valueOf(values.get(Keys.MAP_FINISH_TIME))); entity.setJobCounters(parseCounters(counters)); if (entity.getTaskStatus().equals(EagleTaskStatus.FAILED.name()) || entity.getTaskStatus().equals(EagleTaskStatus.KILLED.name())) { jobExecutionEntity.setFailedMapAttempts(1 + jobExecutionEntity.getFailedMapAttempts()); if (entity.getTaskStatus().equals(EagleTaskStatus.FAILED.name()) || entity.getTaskStatus().equals(EagleTaskStatus.KILLED.name())) {
@Override public void jobEntityCreated(JobBaseAPIEntity entity) throws Exception { if (!(entity instanceof TaskAttemptExecutionAPIEntity)) { return; } TaskAttemptExecutionAPIEntity e = (TaskAttemptExecutionAPIEntity) entity; Map<String, String> tags = new HashMap<>(); tags.put(MRJobTagName.SITE.toString(), e.getTags().get(MRJobTagName.SITE.toString())); tags.put(MRJobTagName.JOD_DEF_ID.toString(), e.getTags().get(MRJobTagName.JOD_DEF_ID.toString())); tags.put(MRJobTagName.RACK.toString(), e.getTags().get(MRJobTagName.RACK.toString())); tags.put(MRJobTagName.HOSTNAME.toString(), e.getTags().get(MRJobTagName.HOSTNAME.toString())); tags.put(MRJobTagName.JOB_ID.toString(), e.getTags().get(MRJobTagName.JOB_ID.toString())); tags.put(MRJobTagName.TASK_TYPE.toString(), e.getTags().get(MRJobTagName.TASK_TYPE.toString())); CounterKey key = new CounterKey(); key.tags = tags; key.timestamp = roundToMinute(e.getEndTime()); CounterValue value = counters.get(key); if (value == null) { value = new CounterValue(); counters.put(key, value); } if (e.getTaskStatus().equals(EagleTaskStatus.FAILED.name())) { value.failedCount++; } else if (e.getTaskStatus().equals(EagleTaskStatus.KILLED.name())) { value.killedCount++; } value.totalCount++; }
if (!e.getTaskStatus().equals(EagleTaskStatus.FAILED.name()) && !e.getTaskStatus().equals(EagleTaskStatus.KILLED.name())) { return; Map<String, String> tags = new HashMap<>(); failureTask.setTags(tags); tags.put(MRJobTagName.SITE.toString(), e.getTags().get(MRJobTagName.SITE.toString())); tags.put(MRJobTagName.JOD_DEF_ID.toString(), e.getTags().get(MRJobTagName.JOD_DEF_ID.toString())); tags.put(MRJobTagName.RACK.toString(), e.getTags().get(MRJobTagName.RACK.toString())); tags.put(MRJobTagName.HOSTNAME.toString(), e.getTags().get(MRJobTagName.HOSTNAME.toString())); tags.put(MRJobTagName.JOB_ID.toString(), e.getTags().get(MRJobTagName.JOB_ID.toString())); tags.put(MRJobTagName.TASK_ATTEMPT_ID.toString(), e.getTags().get(MRJobTagName.TASK_ATTEMPT_ID.toString())); tags.put(MRJobTagName.TASK_TYPE.toString(), e.getTags().get(MRJobTagName.TASK_TYPE.toString())); final String errCategory = classifier.classifyError(e.getError()); tags.put(MRJobTagName.ERROR_CATEGORY.toString(), errCategory); entity.getTags().put(MRJobTagName.ERROR_CATEGORY.toString(), errCategory); failureTask.setError(e.getError()); failureTask.setFailureCount(1); // hard coded to 1 unless we do pre-aggregation in the future failureTask.setTimestamp(e.getTimestamp()); failureTask.setTaskStatus(e.getTaskStatus()); failureTasks.add(failureTask);
private void taskAttemptEntityCreated(TaskAttemptExecutionAPIEntity entity) { JobCounters jobCounters = entity.getJobCounters(); String taskType = entity.getTags().get(TASK_TYPE.toString()); if (taskType != null && jobCounters != null && jobCounters.getCounters() != null) { if (Constants.TaskType.MAP.toString().equals(taskType.toUpperCase())) { mapAttemptDuration += entity.getDuration(); this.mapTaskAttemptCounterAgg.accumulate(jobCounters.getCounters().get(Constants.TASK_COUNTER)); this.mapFileSystemCounterAgg.accumulate(jobCounters.getCounters().get(Constants.FILE_SYSTEM_COUNTER)); return; } else if (Constants.TaskType.REDUCE.toString().equals(taskType.toUpperCase())) { reduceAttemptDuration += entity.getDuration(); this.reduceTaskAttemptCounterAgg.accumulate(jobCounters.getCounters().get(Constants.TASK_COUNTER)); this.reduceFileSystemTaskCounterAgg.accumulate(jobCounters.getCounters().get(Constants.FILE_SYSTEM_COUNTER)); return; } } ObjectMapper objectMapper = new ObjectMapper(); try { LOG.warn("Unknown task type of task attempt execution entity: " + objectMapper.writeValueAsString(entity)); } catch (Exception e) { LOG.error(e.getMessage(), e); } }
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { TaskAttemptExecutionAPIEntity worstReduce = context.getWorstReduce(); if (context.getNumReduces() == 0 || worstReduce == null) { return null; } StringBuilder sb = new StringBuilder(); try { long worstTimeInSec = (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) / DateTimeUtil.ONESECOND; if (worstTimeInSec - context.getAvgReduceTimeInSec() > 30 * 60 ) { long avgInputs = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS) / context.getNumReduces(); long worstInputs = worstReduce.getJobCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS); if (worstInputs > avgInputs * 5) { sb.append("Data skew detected in reducers. The average reduce time is ").append(context.getAvgReduceTimeInSec()); sb.append(" seconds, the worst reduce time is ").append(worstTimeInSec); sb.append(" seconds. Please investigate this problem to improve your job performance.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.DATA_SKEW, Result.ResultLevel.INFO, sb.toString()); } } catch (NullPointerException e) { // When job failed there may not have counters, so just ignore it } return null; } }
long mapPhaseTimeInSec = (lastMap.getEndTime() - firstMap.getStartTime()) / DateTimeUtil.ONESECOND; if (mapPhaseTimeInSec > context.getAvgMapTimeInSec() * ((context.getNumMaps() + tasksPerTime - 1) / tasksPerTime) * 20) { long reducePhaseTimeInSec = (lastReduce.getEndTime() - firstShuffle.getStartTime()) / DateTimeUtil.ONESECOND; if (reducePhaseTimeInSec > context.getAvgReduceTimeInSec() * ((context.getNumReduces() + tasksPerTime - 1) / tasksPerTime) * 20) {
private String getTaskType(TaskAttemptExecutionAPIEntity taskAttemptInfo) { return taskAttemptInfo.getTags().get(MRJobTagName.TASK_TYPE.toString()); }