@Override public void flush(TaskAttemptExecutionAPIEntity entity) { Map<String, Object> fields = new HashMap<>(entity.getTags()); fields.put("startTime", entity.getStartTime()); fields.put("endTime", entity.getEndTime()); fields.put("taskStatus", entity.getTaskStatus()); if (!fields.containsKey(MRJobTagName.ERROR_CATEGORY.toString())) { fields.put("errorCategory", ""); } collector.collect(stormStreamId, new ValuesArray(fields.get(MRJobTagName.TASK_ATTEMPT_ID.toString()), fields)); } }
String taskType = getTaskType(attempt); if (Constants.TaskType.MAP.toString().equalsIgnoreCase(taskType)) { long mapTime = attempt.getEndTime() - attempt.getStartTime(); avgMapTimeInSec += mapTime; if (firstMap == null || firstMap.getStartTime() > attempt.getStartTime()) { firstMap = attempt; if (lastMap == null || lastMap.getEndTime() < attempt.getEndTime()) { lastMap = attempt; if (worstMap == null || (worstMap.getEndTime() - worstMap.getStartTime()) < mapTime) { worstMap = attempt; long reduceTime = attempt.getEndTime() - attempt.getShuffleFinishTime(); avgReduceTimeInSec += reduceTime; if (firstReduce == null || firstReduce.getStartTime() > attempt.getStartTime()) { firstReduce = attempt; if (lastReduce == null || lastReduce.getEndTime() < attempt.getEndTime()) { lastReduce = attempt; if (worstReduce == null || (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) < reduceTime) { worstReduce = attempt;
@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { TaskAttemptExecutionAPIEntity worstReduce = context.getWorstReduce(); if (context.getNumReduces() == 0 || worstReduce == null) { return null; } StringBuilder sb = new StringBuilder(); try { long worstTimeInSec = (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) / DateTimeUtil.ONESECOND; if (worstTimeInSec - context.getAvgReduceTimeInSec() > 30 * 60 ) { long avgInputs = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS) / context.getNumReduces(); long worstInputs = worstReduce.getJobCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS); if (worstInputs > avgInputs * 5) { sb.append("Data skew detected in reducers. The average reduce time is ").append(context.getAvgReduceTimeInSec()); sb.append(" seconds, the worst reduce time is ").append(worstTimeInSec); sb.append(" seconds. Please investigate this problem to improve your job performance.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.DATA_SKEW, Result.ResultLevel.INFO, sb.toString()); } } catch (NullPointerException e) { // When job failed there may not have counters, so just ignore it } return null; } }
long mapPhaseTimeInSec = (lastMap.getEndTime() - firstMap.getStartTime()) / DateTimeUtil.ONESECOND; if (mapPhaseTimeInSec > context.getAvgMapTimeInSec() * ((context.getNumMaps() + tasksPerTime - 1) / tasksPerTime) * 20) { long reducePhaseTimeInSec = (lastReduce.getEndTime() - firstShuffle.getStartTime()) / DateTimeUtil.ONESECOND; if (reducePhaseTimeInSec > context.getAvgReduceTimeInSec() * ((context.getNumReduces() + tasksPerTime - 1) / tasksPerTime) * 20) {
@Override public void jobEntityCreated(JobBaseAPIEntity entity) throws Exception { if (!(entity instanceof TaskAttemptExecutionAPIEntity)) { return; } TaskAttemptExecutionAPIEntity e = (TaskAttemptExecutionAPIEntity) entity; Map<String, String> tags = new HashMap<>(); tags.put(MRJobTagName.SITE.toString(), e.getTags().get(MRJobTagName.SITE.toString())); tags.put(MRJobTagName.JOD_DEF_ID.toString(), e.getTags().get(MRJobTagName.JOD_DEF_ID.toString())); tags.put(MRJobTagName.RACK.toString(), e.getTags().get(MRJobTagName.RACK.toString())); tags.put(MRJobTagName.HOSTNAME.toString(), e.getTags().get(MRJobTagName.HOSTNAME.toString())); tags.put(MRJobTagName.JOB_ID.toString(), e.getTags().get(MRJobTagName.JOB_ID.toString())); tags.put(MRJobTagName.TASK_TYPE.toString(), e.getTags().get(MRJobTagName.TASK_TYPE.toString())); CounterKey key = new CounterKey(); key.tags = tags; key.timestamp = roundToMinute(e.getEndTime()); CounterValue value = counters.get(key); if (value == null) { value = new CounterValue(); counters.put(key, value); } if (e.getTaskStatus().equals(EagleTaskStatus.FAILED.name())) { value.failedCount++; } else if (e.getTaskStatus().equals(EagleTaskStatus.KILLED.name())) { value.killedCount++; } value.totalCount++; }
entity.setEndTime(Long.valueOf(finishTime)); entity.setTimestamp(jobLaunchTime); entity.setDuration(entity.getEndTime() - entity.getStartTime()); entity.setTaskStatus(values.get(Keys.TASK_STATUS)); entity.setError(values.get(Keys.ERROR)); taskAttemptErrorCategoryEntity.setEndTime(entity.getEndTime()); taskAttemptErrorCategoryEntity.setTimestamp(entity.getTimestamp()); entityCreated(taskAttemptErrorCategoryEntity);