@Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { TaskAttemptExecutionAPIEntity worstReduce = context.getWorstReduce(); if (context.getNumReduces() == 0 || worstReduce == null) { return null; } StringBuilder sb = new StringBuilder(); try { long worstTimeInSec = (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) / DateTimeUtil.ONESECOND; if (worstTimeInSec - context.getAvgReduceTimeInSec() > 30 * 60 ) { long avgInputs = context.getJob().getReduceCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS) / context.getNumReduces(); long worstInputs = worstReduce.getJobCounters().getCounterValue(JobCounters.CounterName.REDUCE_INPUT_RECORDS); if (worstInputs > avgInputs * 5) { sb.append("Data skew detected in reducers. The average reduce time is ").append(context.getAvgReduceTimeInSec()); sb.append(" seconds, the worst reduce time is ").append(worstTimeInSec); sb.append(" seconds. Please investigate this problem to improve your job performance.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.DATA_SKEW, Result.ResultLevel.INFO, sb.toString()); } } catch (NullPointerException e) { // When job failed there may not have counters, so just ignore it } return null; } }
long shuffleTime = attempt.getShuffleFinishTime() - attempt.getStartTime(); avgShuffleTimeInSec += shuffleTime; if (firstShuffle == null || firstShuffle.getStartTime() > attempt.getStartTime()) { firstShuffle = attempt; if (lastShuffle == null || lastShuffle.getShuffleFinishTime() < attempt.getShuffleFinishTime()) { lastShuffle = attempt; if (worstShuffle == null || (worstShuffle.getShuffleFinishTime() - worstShuffle.getStartTime()) < shuffleTime) { worstShuffle = attempt; long reduceTime = attempt.getEndTime() - attempt.getShuffleFinishTime(); avgReduceTimeInSec += reduceTime; if (firstReduce == null || firstReduce.getStartTime() > attempt.getStartTime()) { lastReduce = attempt; if (worstReduce == null || (worstReduce.getEndTime() - worstReduce.getShuffleFinishTime()) < reduceTime) { worstReduce = attempt;