public TaskLogProcessor(JobConf conf) { query = HiveConf.getQueryString(conf); heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats()); heuristics.put(new MapAggrMemErrorHeuristic(), new HeuristicStats()); heuristics.put(new DataCorruptErrorHeuristic(), new HeuristicStats()); for(ErrorHeuristic e : heuristics.keySet()) { e.init(query, conf); } }
public ScriptErrorHeuristic() { setQueryRegex(".*"); getLogRegexes().add(FAILED_REGEX); }
public DataCorruptErrorHeuristic() { setQueryRegex(".*"); getLogRegexes().add(SPLIT_REGEX); getLogRegexes().add(EXCEPTION_REGEX); }
@Test public void testScriptErrorHeuristic() throws Exception { JobConf jobConf = new JobConf(); HiveConf.setQueryString(jobConf, "select * from foo group by moo;"); final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf); String errorCode = "7874"; // example code String content = "line a\nlineb\n" + "Script failed with code " + errorCode + " line c\nlineD\n"; File log3File = writeTestLog("1", content); taskLogProcessor.addTaskAttemptLogUrl(log3File.toURI().toURL().toString()); List<ErrorAndSolution> errList = taskLogProcessor.getErrors(); assertEquals(1, errList.size()); final ErrorAndSolution eas = errList.get(0); String error = eas.getError(); assertNotNull(error); // check that the error code is present in the error description: assertTrue(error.indexOf(errorCode) >= 0); String solution = eas.getSolution(); assertNotNull(solution); assertTrue(solution.length() > 0); }
@Override public ErrorAndSolution getErrorAndSolution() { ErrorAndSolution es = null; if(getQueryMatches() && configMatches) { List<String> matchingLines = getRegexToLogLines().get(OUT_OF_MEMORY_REGEX); if (matchingLines.size() > 0) { String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString(); float confValue = HiveConf.getFloatVar(getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); es = new ErrorAndSolution( "Out of memory due to hash maps used in map-side aggregation.", "Currently " + confName + " is set to " + confValue + ". " + "Try setting it to a lower value. i.e " + "'set " + confName + " = " + confValue/2 + ";'"); } } reset(); return es; } }
@Override public ErrorAndSolution getErrorAndSolution() { ErrorAndSolution es = null; if(getQueryMatches()) { Map<String, List<String>> rll = getRegexToLogLines(); if (rll.get(EXCEPTION_REGEX).size() > 0 && rll.get(SPLIT_REGEX).size() > 0) { // There should only be a single split line... String splitLogLine = rll.get(SPLIT_REGEX).get(0); // Extract only 'split: hdfs://...' Pattern p = Pattern.compile(SPLIT_REGEX, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(splitLogLine); m.find(); String splitStr = m.group(1); es = new ErrorAndSolution( "Data file " + splitStr + " is corrupted.", "Replace file. i.e. by re-running the query that produced the " + "source table / partition."); } } reset(); return es; } }
ErrorAndSolution es = null; if(getQueryMatches()) { for(List<String> matchingLines : getRegexToLogLines().values()) { String errorCode = m2.group(); es = new ErrorAndSolution( "A user-supplied transfrom script has exited with error code " + errorCode + " instead of 0.", reset(); return es;
public MapAggrMemErrorHeuristic() { setQueryRegex("group by"); getLogRegexes().add(OUT_OF_MEMORY_REGEX); }
private String writeThrowableAsFile(String before, Throwable t, String after, String fileSuffix, TaskLogProcessor taskLogProcessor) throws IOException { // compose file text: StringBuilder sb = new StringBuilder(); if (before != null) { sb.append(before); } final String stackTraceStr = toString(t); sb.append(stackTraceStr); if (after != null) { sb.append(after); } // write it to file: File file = writeTestLog(fileSuffix, sb.toString()); // add it to the log processor: taskLogProcessor.addTaskAttemptLogUrl(file.toURI().toURL().toString()); return stackTraceStr; }
@Override public void init(String query, JobConf conf) { super.init(query, conf); configMatches = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE); }
private void checkException(String writenText, List<String> actualTrace) throws IOException { List<String> expectedLines = getLines(writenText); String expected, actual; for (int i=0; i<expectedLines.size(); i++) { expected = expectedLines.get(i); actual = actualTrace.get(i); assertEquals(expected, actual); } }
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { Object a = arguments[0].get(); boolean result = boi.get(a); if(!result) { throw new DataConstraintViolationError( "Either CHECK or NOT NULL constraint violated!"); } resultBool.set(true); return resultBool; }
@Test public void testMapAggrMemErrorHeuristic() throws Exception { JobConf jobConf = new JobConf(); HiveConf.setQueryString(jobConf, "select * from foo group by moo;"); final TaskLogProcessor taskLogProcessor = new TaskLogProcessor(jobConf); Throwable oome = new OutOfMemoryError("java heap space"); File log1File = writeTestLog("1", toString(oome)); taskLogProcessor.addTaskAttemptLogUrl(log1File.toURI().toURL().toString()); List<ErrorAndSolution> errList = taskLogProcessor.getErrors(); assertEquals(1, errList.size()); final ErrorAndSolution eas = errList.get(0); String error = eas.getError(); assertNotNull(error); // check that the error code is present in the error description: assertTrue(error.contains("memory")); String solution = eas.getSolution(); assertNotNull(solution); assertTrue(solution.length() > 0); String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString(); assertTrue(solution.contains(confName)); }
public TaskLogProcessor(JobConf conf) { query = HiveConf.getQueryString(conf); heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats()); heuristics.put(new MapAggrMemErrorHeuristic(), new HeuristicStats()); heuristics.put(new DataCorruptErrorHeuristic(), new HeuristicStats()); for(ErrorHeuristic e : heuristics.keySet()) { e.init(query, conf); } }
@Override public ErrorAndSolution getErrorAndSolution() { ErrorAndSolution es = null; if(getQueryMatches() && configMatches) { List<String> matchingLines = getRegexToLogLines().get(OUT_OF_MEMORY_REGEX); if (matchingLines.size() > 0) { String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString(); float confValue = HiveConf.getFloatVar(getConf(), HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); es = new ErrorAndSolution( "Out of memory due to hash maps used in map-side aggregation.", "Currently " + confName + " is set to " + confValue + ". " + "Try setting it to a lower value. i.e " + "'set " + confName + " = " + confValue/2 + ";'"); } } reset(); return es; } }
@Override public ErrorAndSolution getErrorAndSolution() { ErrorAndSolution es = null; if(getQueryMatches()) { Map<String, List<String>> rll = getRegexToLogLines(); if (rll.get(EXCEPTION_REGEX).size() > 0 && rll.get(SPLIT_REGEX).size() > 0) { // There should only be a single split line... String splitLogLine = rll.get(SPLIT_REGEX).get(0); // Extract only 'split: hdfs://...' Pattern p = Pattern.compile(SPLIT_REGEX, Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(splitLogLine); m.find(); String splitStr = m.group(1); es = new ErrorAndSolution( "Data file " + splitStr + " is corrupted.", "Replace file. i.e. by re-running the query that produced the " + "source table / partition."); } } reset(); return es; } }
public DataCorruptErrorHeuristic() { setQueryRegex(".*"); getLogRegexes().add(SPLIT_REGEX); getLogRegexes().add(EXCEPTION_REGEX); }
public ScriptErrorHeuristic() { setQueryRegex(".*"); getLogRegexes().add(FAILED_REGEX); }
public MapAggrMemErrorHeuristic() { setQueryRegex("group by"); getLogRegexes().add(OUT_OF_MEMORY_REGEX); }
@Override public void init(String query, JobConf conf) { super.init(query, conf); configMatches = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE); }