public void reset() { if (isAlwaysDump && !hasDumped) { dumpLog(LOG); } offset = 0; hasDumped = false; }
@Override public DiskRangeList readFileData(DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException { long startTime = counters.startTimeCounter(); DiskRangeList result = orcDataReaderRef.readFileData(range, baseOffset, doForceDirect); counters.recordHdfsTime(startTime); if (LlapIoImpl.ORC_LOGGER.isTraceEnabled()) { LlapIoImpl.ORC_LOGGER.trace("Disk ranges after disk read (file {}, base offset {}): {}", fileKey, baseOffset, RecordReaderUtils.stringifyDiskRanges(result)); } trace.logRanges(fileKey, baseOffset, result, IoTrace.RangesSrc.DISK); return result; }
public void dumpLog(Logger logger) { hasDumped = true; int ix = 0; logger.info("Dumping LLAP IO trace; " + (offset << 3) + " bytes"); while (ix < offset) { ix = dumpOneLine(ix, logger, log); } }
private static int dumpOneLine(int ix, Logger logger, long[] log) { int event = getFirstInt(log[ix]); switch (event) { case TREE_READER_NEXT_VECTOR: { logger.info(ix + ": TreeReader next vector " + getSecondInt(log[ix])); return ix + 1; logger.info(ix + ": Reading stripe " + getSecondInt(log[ix]) + " at " + log[ix + 1] + " length " + log[ix + 2]); return ix + 3; logger.info(ix + ": Reading " + log[ix + 1] + " rgs for stripe " + getSecondInt(log[ix])); return ix + 2; logger.info(ix + ": Reading filtered rgs for stripe " + getSecondInt(log[ix]) + ": " + DebugUtils.toString(rgs)); return ix + (elements + 2); int val = getSecondInt(log[ix]); RangesSrc src = RangesSrc.values()[val >>> MAX_ELEMENT_BITS]; int rangeCount = val & ((1 << MAX_ELEMENT_BITS) - 1); StringBuilder sb = new StringBuilder(); for (int i = 0; i < rangeCount; ++i, currentOffset += 3) { sb.append(printRange(currentOffset, log)).append(", "); logger.info(ix + ": Reading column " + getSecondInt(log[ix]) + " (included index " + getFirstInt(log[ix + 1]) + "; type " + ColumnEncoding.Kind.values()[getSecondInt(log[ix + 1])] + ")"); return ix + 2;
trace.logColumnRead(i, colRgIx, enc.getKind()); trace.logSkipStream(colIx, streamKind, offset, length); offset += length; continue; trace.logAddStream(colIx, streamKind, offset, length, indexIx, true); RecordReaderUtils.addEntireStreamToRanges(offset, length, listToRead, true); if (isTracingEnabled) { trace.logAddStream(colIx, streamKind, offset, length, indexIx, false); RecordReaderUtils.addRgFilteredStreamToRanges(stream, rgs, isCompressed, indexes[colIx], encodings.get(colIx), types.get(colIx), trace.logStartRg(rgIx); boolean hasErrorForEcb = true; try { trace.logStartCol(ctx.colIx); for (int streamIx = 0; streamIx < ctx.streamCount; ++streamIx) { StreamContext sctx = ctx.streams[streamIx]; trace.logStartStripeStream(sctx.kind); sctx.stripeLevelStream = POOLS.csdPool.take(); trace.logRanges(fileKey, stripeOffset, toRead.next, RangesSrc.PREREAD); hasError = false; } finally {
@Override public ReadPipeline createReadPipeline(Consumer<ColumnVectorBatch> consumer, FileSplit split, Includes includes, SearchArgument sarg, QueryFragmentCounters counters, SchemaEvolutionFactory sef, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, JobConf job, Map<Path, PartitionDesc> parts) throws IOException { cacheMetrics.incrCacheReadRequests(); OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer( consumer, includes, false, counters, ioMetrics); SerDeFileMetadata fm; try { fm = new SerDeFileMetadata(sourceSerDe); } catch (SerDeException e) { throw new IOException(e); } edc.setFileMetadata(fm); // Note that we pass job config to the record reader, but use global config for LLAP IO. // TODO: add tracing to serde reader SerDeEncodedDataReader reader = new SerDeEncodedDataReader(cache, bufferManager, conf, split, includes.getPhysicalColumnIds(), edc, job, reporter, sourceInputFormat, sourceSerDe, counters, fm.getSchema(), parts); edc.init(reader, reader, new IoTrace(0, false)); return edc; }
LOG.trace("Creating context: " + colCtxs[i].toString()); trace.logColumnRead(i, colRgIx, ColumnEncoding.Kind.DIRECT); // Bogus encoding. && ((sargColumns != null && sargColumns[colIx]) || (physicalFileIncludes[colIx] && streamKind == Kind.ROW_INDEX))) { trace.logAddStream(colIx, streamKind, offset, length, -1, true); colCtxs[colIx].addStream(offset, stream, -1); if (isTracingEnabled) {
new LinkedBlockingQueue<Runnable>(), new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); FixedSizedObjectPool<IoTrace> tracePool = IoTrace.createTracePool(conf);
private void processCacheCollisions(long[] collisionMask, List<? extends CacheChunk> toDecompress, MemoryBuffer[] targetBuffers, List<MemoryBuffer> cacheBuffers) { if (collisionMask == null) return; assert collisionMask.length >= (toDecompress.size() >>> 6); // There are some elements that were cached in parallel, take care of them. long maskVal = -1; for (int i = 0; i < toDecompress.size(); ++i) { if ((i & 63) == 0) { maskVal = collisionMask[i >>> 6]; } if ((maskVal & 1) == 1) { // Cache has found an old buffer for the key and put it into array instead of our new one. CacheChunk replacedChunk = toDecompress.get(i); MemoryBuffer replacementBuffer = targetBuffers[i]; if (isTracingEnabled) { LOG.trace("Discarding data due to cache collision: " + replacedChunk.getBuffer() + " replaced with " + replacementBuffer); } trace.logCacheCollision(replacedChunk, replacementBuffer); assert replacedChunk.getBuffer() != replacementBuffer : i + " was not replaced in the results " + "even though mask is [" + Long.toBinaryString(maskVal) + "]"; replacedChunk.handleCacheCollision(cacheWrapper, replacementBuffer, cacheBuffers); } maskVal >>= 1; } }
int[] result = new int[3]; List<IncompleteCb> l = new ArrayList<>(); IoTrace trace = new IoTrace(0, false); BufferChunk rv = EncodedReaderImpl.readLengthBytesFromSmallBuffers( bc, 0l, result, l, true, trace);
new LinkedBlockingQueue<Runnable>(), new ThreadFactoryBuilder().setNameFormat("IO-Elevator-Thread-%d").setDaemon(true).build()); FixedSizedObjectPool<IoTrace> tracePool = IoTrace.createTracePool(conf);
private void handleReaderError(long startTime, Throwable t) throws InterruptedException { recordReaderTime(startTime); consumer.setError(t); trace.dumpLog(LOG); cleanupReaders(); tracePool.offer(trace); }
+ RecordReaderUtils.stringifyDiskRanges(toRead.next)); trace.logRanges(fileKey, stripeOffset, toRead.next, RangesSrc.CACHE);
@Override public ReadPipeline createReadPipeline(Consumer<ColumnVectorBatch> consumer, FileSplit split, Includes includes, SearchArgument sarg, QueryFragmentCounters counters, SchemaEvolutionFactory sef, InputFormat<?, ?> sourceInputFormat, Deserializer sourceSerDe, Reporter reporter, JobConf job, Map<Path, PartitionDesc> parts) throws IOException { cacheMetrics.incrCacheReadRequests(); OrcEncodedDataConsumer edc = new OrcEncodedDataConsumer( consumer, includes, false, counters, ioMetrics); SerDeFileMetadata fm; try { fm = new SerDeFileMetadata(sourceSerDe); } catch (SerDeException e) { throw new IOException(e); } edc.setFileMetadata(fm); // Note that we pass job config to the record reader, but use global config for LLAP IO. // TODO: add tracing to serde reader SerDeEncodedDataReader reader = new SerDeEncodedDataReader(cache, bufferManager, conf, split, includes.getPhysicalColumnIds(), edc, job, reporter, sourceInputFormat, sourceSerDe, counters, fm.getSchema(), parts); edc.init(reader, reader, new IoTrace(0, false)); return edc; }
private void handleReaderError(long startTime, Throwable t) throws InterruptedException { recordReaderTime(startTime); consumer.setError(t); trace.dumpLog(LOG); cleanupReaders(); tracePool.offer(trace); }
@Override public DiskRangeList readFileData(DiskRangeList range, long baseOffset, boolean doForceDirect) throws IOException { long startTime = counters.startTimeCounter(); DiskRangeList result = orcDataReaderRef.readFileData(range, baseOffset, doForceDirect); counters.recordHdfsTime(startTime); if (LlapIoImpl.ORC_LOGGER.isTraceEnabled()) { LlapIoImpl.ORC_LOGGER.trace("Disk ranges after disk read (file {}, base offset {}): {}", fileKey, baseOffset, RecordReaderUtils.stringifyDiskRanges(result)); } trace.logRanges(fileKey, baseOffset, result, IoTrace.RangesSrc.DISK); return result; }