public Map<StreamId, OrcInputStream> readDiskRanges(long stripeOffset, Map<StreamId, DiskRange> diskRanges, AggregatedMemoryContext systemMemoryUsage) throws IOException { // // Note: this code does not use the Java 8 stream APIs to avoid any extra object allocation // // transform ranges to have an absolute offset in file ImmutableMap.Builder<StreamId, DiskRange> diskRangesBuilder = ImmutableMap.builder(); for (Entry<StreamId, DiskRange> entry : diskRanges.entrySet()) { DiskRange diskRange = entry.getValue(); diskRangesBuilder.put(entry.getKey(), new DiskRange(stripeOffset + diskRange.getOffset(), diskRange.getLength())); } diskRanges = diskRangesBuilder.build(); // read ranges Map<StreamId, OrcDataSourceInput> streamsData = orcDataSource.readFully(diskRanges); // transform streams to OrcInputStream ImmutableMap.Builder<StreamId, OrcInputStream> streamsBuilder = ImmutableMap.builder(); for (Entry<StreamId, OrcDataSourceInput> entry : streamsData.entrySet()) { OrcDataSourceInput sourceInput = entry.getValue(); streamsBuilder.put(entry.getKey(), new OrcInputStream(orcDataSource.getId(), sourceInput.getInput(), decompressor, systemMemoryUsage, sourceInput.getRetainedSizeInBytes())); } return streamsBuilder.build(); }
private void readValues() throws IOException { lastReadInputCheckpoint = input.getCheckpoint(); int control = input.read(); if (control == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of RLE integer"); } if (control < 0x80) { numLiterals = control + MIN_REPEAT_SIZE; used = 0; repeat = true; delta = input.read(); if (delta == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "End of stream in RLE Integer"); } // convert from 0 to 255 to -128 to 127 by converting to a signed byte // noinspection SillyAssignment delta = (byte) delta; literals[0] = LongDecode.readVInt(signed, input); } else { numLiterals = 0x100 - control; used = 0; repeat = false; for (int i = 0; i < numLiterals; ++i) { literals[i] = LongDecode.readVInt(signed, input); } } }
@Override public void skip(long items) throws IOException { while (items-- > 0) { int b; do { b = input.read(); if (b == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading BigInteger past EOF"); } } while (b >= 0x80); } } }
private void readNextBlock() throws IOException { lastReadInputCheckpoint = input.getCheckpoint(); int control = input.read(); if (control == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of buffer RLE byte"); } offset = 0; // if byte high bit is not set, this is a repetition; otherwise it is a literal sequence if ((control & 0x80) == 0) { length = control + MIN_REPEAT_SIZE; // read the repeated value int value = input.read(); if (value == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading RLE byte got EOF"); } // fill buffer with the value Arrays.fill(buffer, 0, length, (byte) value); } else { // length is 2's complement of byte length = 0x100 - control; // read the literals into the buffer input.readFully(buffer, 0, length); } }
@Override public void seekToCheckpoint(ByteArrayStreamCheckpoint checkpoint) throws IOException { inputStream.seekToCheckpoint(checkpoint.getInputStreamCheckpoint()); }
private void readNextBlock() throws IOException { lastReadInputCheckpoint = input.getCheckpoint(); int control = input.read(); if (control == -1) { throw new OrcCorruptionException("Read past end of buffer RLE byte from %s", input); } offset = 0; // if byte high bit is not set, this is a repetition; otherwise it is a literal sequence if ((control & 0x80) == 0) { length = control + MIN_REPEAT_SIZE; // read the repeated value int value = input.read(); if (value == -1) { throw new OrcCorruptionException("Reading RLE byte got EOF"); } // fill buffer with the value Arrays.fill(buffer, 0, length, (byte) value); } else { // length is 2's complement of byte length = 0x100 - control; // read the literals into the buffer readFully(input, buffer, 0, length); } }
public static long readDwrfLong(OrcInputStream input, OrcTypeKind type, boolean signed, boolean usesVInt) throws IOException { if (usesVInt) { return readVInt(signed, input); } else if (type == SHORT) { return input.read() | (input.read() << 8); } else if (type == INT) { return input.read() | (input.read() << 8) | (input.read() << 16) | (input.read() << 24); } else if (type == LONG) { return ((long) input.read()) | (((long) input.read()) << 8) | (((long) input.read()) << 16) | (((long) input.read()) << 24) | (((long) input.read()) << 32) | (((long) input.read()) << 40) | (((long) input.read()) << 48) | (((long) input.read()) << 56); } else { throw new IllegalArgumentException(type + " type is not supported"); } }
public ByteInputStream(OrcInputStream input) { this.input = input; lastReadInputCheckpoint = input.getCheckpoint(); }
@Override public int read(byte[] b, int off, int length) throws IOException { if (current == null) { return -1; } if (current.remaining() == 0) { advance(); if (current == null) { return -1; } } return current.read(b, off, length); }
private void advance() throws IOException { if (compressedSliceInput == null || compressedSliceInput.remaining() == 0) { current = null; return; } // 3 byte header // NOTE: this must match BLOCK_HEADER_SIZE currentCompressedBlockOffset = Ints.checkedCast(compressedSliceInput.position()); int b0 = compressedSliceInput.readUnsignedByte(); int b1 = compressedSliceInput.readUnsignedByte(); int b2 = compressedSliceInput.readUnsignedByte(); boolean isUncompressed = (b0 & 0x01) == 1; int chunkLength = (b2 << 15) | (b1 << 7) | (b0 >>> 1); Slice chunk = compressedSliceInput.readSlice(chunkLength); if (isUncompressed) { current = chunk.getInput(); } else { int uncompressedSize; if (compressionKind == ZLIB) { uncompressedSize = decompressZip(chunk); } else { uncompressedSize = decompressSnappy(chunk); } current = Slices.wrappedBuffer(buffer, 0, uncompressedSize).getInput(); } }
try { inflater.setInput((byte[]) in.getBase(), (int) (in.getAddress() - ARRAY_BYTE_BASE_OFFSET), in.length()); allocateOrGrowBuffer(in.length() * EXPECTED_COMPRESSION_RATIO, false); int uncompressedLength = 0; while (true) { allocateOrGrowBuffer(buffer.length * 2, true); if (buffer.length <= oldBufferSize) { throw new IllegalStateException(String.format("Buffer failed to grow. Old size %d, current size %d", oldBufferSize, buffer.length));
@Override public void seekToCheckpoint(DoubleStreamCheckpoint checkpoint) throws IOException { input.seekToCheckpoint(checkpoint.getInputStreamCheckpoint()); }
private void readNextBlock() throws IOException { lastReadInputCheckpoint = input.getCheckpoint(); int control = input.read(); if (control == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Read past end of buffer RLE byte"); } offset = 0; // if byte high bit is not set, this is a repetition; otherwise it is a literal sequence if ((control & 0x80) == 0) { length = control + MIN_REPEAT_SIZE; // read the repeated value int value = input.read(); if (value == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading RLE byte got EOF"); } // fill buffer with the value Arrays.fill(buffer, 0, length, (byte) value); } else { // length is 2's complement of byte length = 0x100 - control; // read the literals into the buffer input.readFully(buffer, 0, length); } }
long b; do { b = input.read(); if (b == -1) { throw new OrcCorruptionException(input.getOrcDataSourceId(), "Reading BigInteger past EOF"); throw new OrcCorruptionException(input.getOrcDataSourceId(), "Decimal does not fit long (invalid table schema?)");
private void readValues() throws IOException { lastReadInputCheckpoint = input.getCheckpoint(); int control = input.read(); if (control == -1) { throw new OrcCorruptionException("Read past end of RLE integer from %s", input); } if (control < 0x80) { numLiterals = control + MIN_REPEAT_SIZE; used = 0; repeat = true; delta = input.read(); if (delta == -1) { throw new OrcCorruptionException("End of stream in RLE Integer from %s", input); } // convert from 0 to 255 to -128 to 127 by converting to a signed byte // noinspection SillyAssignment delta = (byte) delta; literals[0] = LongDecode.readVInt(signed, input); } else { numLiterals = 0x100 - control; used = 0; repeat = false; for (int i = 0; i < numLiterals; ++i) { literals[i] = LongDecode.readVInt(signed, input); } } }
public void readFully(byte[] buffer, int offset, int length) throws IOException { while (offset < length) { int result = read(buffer, offset, length - offset); if (result < 0) { throw new OrcCorruptionException(orcDataSourceId, "Unexpected end of stream"); } offset += result; } }
public LongInputStreamV1(OrcInputStream input, boolean signed) { this.input = input; this.signed = signed; lastReadInputCheckpoint = input.getCheckpoint(); }
public boolean seekToCheckpoint(long checkpoint) throws IOException { int compressedBlockOffset = decodeCompressedBlockOffset(checkpoint); int decompressedOffset = decodeDecompressedOffset(checkpoint); boolean discardedBuffer; if (compressedBlockOffset != currentCompressedBlockOffset) { if (!decompressor.isPresent()) { throw new OrcCorruptionException(orcDataSourceId, "Reset stream has a compressed block offset but stream is not compressed"); } compressedSliceInput.setPosition(compressedBlockOffset); current = EMPTY_SLICE.getInput(); discardedBuffer = true; } else { discardedBuffer = false; } if (decompressedOffset != current.position()) { current.setPosition(0); if (current.remaining() < decompressedOffset) { decompressedOffset -= current.remaining(); advance(); } current.setPosition(decompressedOffset); } return discardedBuffer; }
private int decompressSnappy(Slice in) throws IOException { byte[] inArray = (byte[]) in.getBase(); int inOffset = (int) (in.getAddress() - ARRAY_BYTE_BASE_OFFSET); int inLength = in.length(); int uncompressedLength = Snappy.getUncompressedLength(inArray, inOffset); checkArgument(uncompressedLength <= maxBufferSize, "Snappy requires buffer (%s) larger than max size (%s)", uncompressedLength, maxBufferSize); allocateOrGrowBuffer(uncompressedLength, false); return Snappy.uncompress(inArray, inOffset, inLength, buffer, 0); }