public void process(ChukwaArchiveKey archiveKey, Chunk chunk, OutputCollector<ChukwaRecordKey, ChukwaRecord> output, Reporter reporter) { chunkInErrorSaved = false; this.archiveKey = archiveKey; this.output = output; this.reporter = reporter; reset(chunk); while (hasNext()) { try { parse(nextLine(), output, reporter); } catch (Throwable e) { saveChunkInError(e); } } }
@Override protected void parse(String recordEntry, OutputCollector<ChukwaRecordKey, ChukwaRecord> output, Reporter reporter) throws Throwable { Log4JMetricsContextChukwaRecord record = new Log4JMetricsContextChukwaRecord(recordEntry); ChukwaRecord chukwaRecord = record.getChukwaRecord(); this.buildGenericRecord(chukwaRecord, null, record.getTimestamp(), record.getRecordType()); output.collect(key, chukwaRecord); }
protected void saveChunkInError(Throwable throwable) { if (chunkInErrorSaved == false) { try { ChunkSaver.saveChunk(chunk, throwable, output, reporter); chunkInErrorSaved = true; } catch (Exception e) { e.printStackTrace(); } } }
public void testLog4JMetricsContextChukwaRecord() throws Throwable { { Log4JMetricsContextChukwaRecord rec = new Log4JMetricsContextChukwaRecord(chukwaQueueLog[0]); ChukwaRecord chukwaRecord = rec.getChukwaRecord(); assertEquals("chunkQueue", rec.getRecordType()); assertEquals("1241568021982", chukwaRecord.getValue("timestamp")); assertEquals((1241568021982l/60000)*60000, rec.getTimestamp()); assertEquals("94", chukwaRecord.getValue("queueSize")); } { Log4JMetricsContextChukwaRecord rec = new Log4JMetricsContextChukwaRecord(chukwaAgentLog[3]); assertEquals("CA_chukwaAgent", rec.getRecordType()); assertEquals(1241653175214l/60000*60000, rec.getTimestamp()); } }
public void testParseIllegalRegex() { jobConf.set(TsProcessor.DEFAULT_TIME_REGEX, "("); ChunkBuilder cb = new ChunkBuilder(); cb.addRecord("2012-10-25 00:18:44,818 some sample record data".getBytes()); Chunk chunk = cb.getChunk(); TsProcessor p = new TsProcessor(); p.reset(chunk); ChukwaTestOutputCollector<ChukwaRecordKey, ChukwaRecord> output = new ChukwaTestOutputCollector<ChukwaRecordKey, ChukwaRecord>(); p.process(null, chunk, output, Reporter.NULL); assertEquals("Output data size not correct.", 1, output.data.size()); ChukwaRecordKey key = output.data.keySet().iterator().next(); ChukwaRecord record = output.data.get(key); assertNull("Output should not be error.", record.getValue("cchunkData")); } }
@SuppressWarnings("unchecked") public void testHBaseRegionServerProcessor() { // test metric for each record type JSONObject json = getJSONObject(); json.put("blockCacheSize", "2681872"); byte[] data = json.toString().getBytes(); HBaseMasterProcessor p = new HBaseMasterProcessor(); ChunkImpl ch = new ChunkImpl("TestType", "Test", data.length, data, null); String failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); // no gauge metrics yet }
public void testDefaultFormat() { String record = buildSampleSimpleRecord(date, "yyyy-MM-dd HH:mm:ss,SSS"); doTest(date, record); }
@SuppressWarnings("unchecked") public void testZookeeperProcessor() { // test metric for each record type JSONObject json = getJSONObject(); json.put("packetsSent", "2049"); json.put("NodeCount", "40"); byte[] data = json.toString().getBytes(); ZookeeperProcessor p = new ZookeeperProcessor(); ChunkImpl ch = new ChunkImpl("TestType", "Test", data.length, data, null); String failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); // test gauge metric json.put("packetsSent", "2122"); data = json.toString().getBytes(); ch = new ChunkImpl("TestType", "Test", data.length, data, null); json.put("packetsSent", "73"); failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); }
@SuppressWarnings("unchecked") public void testJobTrackerProcessor() { // test metric for each record type JSONObject json = getJSONObject(); json.put("memHeapUsedM", "286"); json.put("maps_killed", "3"); json.put("waiting_maps", "1"); json.put("RpcProcessingTime_avg_time", "0.003"); byte[] data = json.toString().getBytes(); JobTrackerProcessor p = new JobTrackerProcessor(); ChunkImpl ch = new ChunkImpl("TestType", "Test", data.length, data, null); String failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); // test gauge metric json.put("maps_killed", "5"); data = json.toString().getBytes(); ch = new ChunkImpl("TestType", "Test", data.length, data, null); json.put("maps_killed", "2"); failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); }
@SuppressWarnings("unchecked") public void testDatanodeProcessor() { // test metric for each record type JSONObject json = getJSONObject(); json.put("heartBeats_num_ops", "10875"); json.put("FilesCreated", "33"); json.put("RpcQueueTime_avg_time", "0.001"); json.put("gcCount", "112"); json.put("Capacity", "22926269645"); byte[] data = json.toString().getBytes(); DatanodeProcessor p = new DatanodeProcessor(); ChunkImpl ch = new ChunkImpl("TestType", "Test", data.length, data, null); String failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); // test gauge metric json.put("heartBeats_num_ops", "10980"); json.put("gcCount", "115"); data = json.toString().getBytes(); ch = new ChunkImpl("TestType", "Test", data.length, data, null); json.put("heartBeats_num_ops", "105"); json.put("gcCount", "3"); failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); }
public void testHLPParseTimes() { HadoopLogProcessor hlp = new HadoopLogProcessor(); int LINES = 50000; long bytes = 0; long ts_start = System.currentTimeMillis(); for (int i = 0; i < LINES; ++i) { Chunk c = getNewChunk(); bytes += c.getData().length; hlp.process(null, c, nullcollector, Reporter.NULL); // hlp.parse(line, nullcollector, Reporter.NULL); } long time = (System.currentTimeMillis() - ts_start); System.out.println("parse took " + time + " milliseconds"); System.out.println("aka " + time * 1.0 / LINES + " ms per line or " + time * 1000.0 / bytes + " ms per kilobyte of log data"); System.out.println("output records had total length of " + serializedSize); }
public void testParse() { ChunkBuilder cb = new ChunkBuilder(); cb.addRecord(RecordConstants.escapeAllButLastRecordSeparator("\n", data[0]) .getBytes()); cb.addRecord(RecordConstants.escapeAllButLastRecordSeparator("\n", data[1]) .getBytes()); cb.addRecord(RecordConstants.escapeAllButLastRecordSeparator("\n", data[2]) .getBytes()); Chunk chunk = cb.getChunk(); OutputCollector<ChukwaRecordKey, ChukwaRecord> output = new ChukwaTestOutputCollector<ChukwaRecordKey, ChukwaRecord>(); TProcessor p = new TProcessor(); p.data = data; p.process(null, chunk, output, null); }
public void testCustomApacheDefaultFormat() { jobConf.set("TsProcessor.default.time.format", "dd/MMM/yyyy:HH:mm:ss Z"); jobConf.set("TsProcessor.default.time.regex", "^(?:[\\d.]+) \\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [-+]\\d{4})\\] .*"); String record = buildSampleApacheRecord(dateWithoutMillis, "dd/MMM/yyyy:HH:mm:ss Z"); doTest(dateWithoutMillis, record); }
public static ChunkProcessor getProcessor(String recordType) throws UnknownRecordTypeException { String path = "org.apache.hadoop.chukwa.extraction.demux.processor.mapper" + recordType; if (processors.containsKey(recordType)) { return processors.get(recordType); } else { ChunkProcessor processor = null; try { processor = (ChunkProcessor) Class.forName(path).getConstructor() .newInstance(); } catch (ClassNotFoundException e) { throw new UnknownRecordTypeException( "Unknown recordType:" + recordType, e); } catch (Exception e) { throw new UnknownRecordTypeException("error constructing processor", e); } // TODO using a ThreadSafe/reuse flag to actually decide if we want // to reuse the same processor again and again register(recordType, processor); return processor; } }
@SuppressWarnings("unchecked") public void testHBaseMasterProcessor() { // test metric for each record type JSONObject json = getJSONObject(); json.put("splitSizeNumOps", "108"); json.put("AverageLoad", "3.33"); byte[] data = json.toString().getBytes(); HBaseMasterProcessor p = new HBaseMasterProcessor(); ChunkImpl ch = new ChunkImpl("TestType", "Test", data.length, data, null); String failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); // test gauge metric json.put("splitSizeNumOps", "109"); data = json.toString().getBytes(); ch = new ChunkImpl("TestType", "Test", data.length, data, null); json.put("splitSizeNumOps", "1"); failMsg = testProcessor(p, json, ch); assertNull(failMsg, failMsg); }
public void testCustomDefaultFormat() { jobConf.set("TsProcessor.default.time.format", "yyyy--MM--dd HH::mm::ss SSS"); String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS"); doTest(date, record); }
public void testCustomApacheDataTypeFormat() { jobConf.set("TsProcessor.time.format." + DATA_TYPE, "dd/MMM/yyyy:HH:mm:ss Z"); jobConf.set("TsProcessor.time.regex." + DATA_TYPE, "^(?:[\\d.]+) \\[(\\d{2}/\\w{3}/\\d{4}:\\d{2}:\\d{2}:\\d{2} [-+]\\d{4})\\] .*"); String record = buildSampleApacheRecord(dateWithoutMillis, "dd/MMM/yyyy:HH:mm:ss Z"); doTest(dateWithoutMillis, record); }
public void testCustomDefaultFormat2() { // this date format produces a date that longer than the format, since z // expands to something like PDT jobConf.set("TsProcessor.default.time.format", "yyyy--MM--dd HH::mm::ss SSS,z"); String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS,z"); doTest(date, record); }
public void testCustomDataTypeFormat() { jobConf.set("TsProcessor.time.format." + DATA_TYPE, "yyyy--MM--dd HH::mm::ss SSS"); String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS"); doTest(date, record); }
public void testCustomDefaultFormatWithCustomDataTypeFormat() { jobConf.set("TsProcessor.default.time.format", "yyyy/MM/dd HH:mm:ss SSS"); jobConf.set("TsProcessor.time.format." + DATA_TYPE, "yyyy--MM--dd HH::mm::ss SSS"); String record = buildSampleSimpleRecord(date, "yyyy--MM--dd HH::mm::ss SSS"); doTest(date, record); }