boolean[] includedRgs = null; // Will always be the same for all cols at the moment. for (OrcProto.Stream stream : streamList) { long length = stream.getLength(); int colIx = stream.getColumn(); OrcProto.Stream.Kind streamKind = stream.getKind(); if (!included[colIx] || StreamName.getArea(streamKind) != StreamName.Area.DATA) {
@Override public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException { OrcProto.StripeFooter.Builder fb = OrcProto.StripeFooter.newBuilder(); if (!isEmpty) { fb.addStreams(OrcProto.Stream.newBuilder().setColumn(0).setLength(20).setKind(OrcProto.Stream.Kind.LENGTH)) .addStreams(OrcProto.Stream.newBuilder().setColumn(0).setLength(40).setKind(OrcProto.Stream.Kind.DATA)) .addColumns(OrcProto.ColumnEncoding.newBuilder().setDictionarySize(10).setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2)); } OrcProto.StripeFooter footer = fb.build(); if (doStreamStep) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); CodedOutputStream cos = CodedOutputStream.newInstance(baos); footer.writeTo(cos); cos.flush(); footer = OrcProto.StripeFooter.newBuilder().mergeFrom(baos.toByteArray()).build(); } return footer; }
StreamName name = pair.getKey(); long streamSize = pair.getValue().getOutputSize(); builder.addStreams(OrcProto.Stream.newBuilder() .setColumn(name.getColumn()) .setKind(name.getKind())
StreamName name = pair.getKey(); long streamSize = pair.getValue().getOutputSize(); builder.addStreams(OrcProto.Stream.newBuilder() .setColumn(name.getColumn()) .setKind(name.getKind())
break; int column = stream.getColumn(); if (stream.hasKind() && range.getOffset() <= offset) { switch (stream.getKind()) { case ROW_INDEX: if (included == null || included[column]) { ByteBuffer bb = range.getData().duplicate(); bb.position((int) (offset - range.getOffset())); bb.limit((int) (bb.position() + stream.getLength())); indexes[column] = OrcProto.RowIndex.parseFrom( InStream.createCodedInputStream("index", ReaderImpl.singleton(new BufferChunk(bb, 0)), stream.getLength(), codec, bufferSize)); ByteBuffer bb = range.getData().duplicate(); bb.position((int) (offset - range.getOffset())); bb.limit((int) (bb.position() + stream.getLength())); bloomFilterIndices[column] = OrcProto.BloomFilterIndex.parseFrom (InStream.createCodedInputStream("bloom_filter", ReaderImpl.singleton(new BufferChunk(bb, 0)), stream.getLength(), codec, bufferSize)); offset += stream.getLength();
if (stream.hasKind() && stream.hasColumn()) { int column = stream.getColumn(); if (sargColumns[column]) { switch (stream.getKind()) { case BLOOM_FILTER: if (bloomFilterKinds[column] == null && if (stream.hasKind() && stream.hasColumn()) { int column = stream.getColumn(); if (fileIncluded == null || fileIncluded[column]) { boolean needStream = false; switch (stream.getKind()) { case ROW_INDEX: needStream = true; result.addOrMerge(offset, offset + stream.getLength(), true, false); offset += stream.getLength();
CreateHelper list = new CreateHelper(); for (OrcProto.Stream stream : streamList) { long length = stream.getLength(); int column = stream.getColumn(); OrcProto.Stream.Kind streamKind = stream.getKind(); if (stream.hasKind() && (StreamName.getArea(streamKind) == StreamName.Area.DATA) && (column < includedColumns.length && includedColumns[column])) {
long streamSize = receiver.getOutputSize(); StreamName name = pair.getKey(); footerBuilder.addStreams(OrcProto.Stream.newBuilder().setColumn(name.getColumn()) .setKind(name.getKind()).setLength(streamSize)); if (StreamName.Area.INDEX == name.getArea()) {
/** * Plan the ranges of the file that we need to read given the list of * columns and row groups. * * @param streamList the list of streams available * @param includedColumns which columns are needed * @param doMergeBuffers * @return the list of disk ranges that will be loaded */ static DiskRangeList planReadPartialDataStreams (List<OrcProto.Stream> streamList, boolean[] includedColumns, boolean doMergeBuffers) { long offset = 0; // figure out which columns have a present stream DiskRangeList.CreateHelper list = new DiskRangeList.CreateHelper(); for (OrcProto.Stream stream : streamList) { long length = stream.getLength(); int column = stream.getColumn(); OrcProto.Stream.Kind streamKind = stream.getKind(); // since stream kind is optional, first check if it exists if (stream.hasKind() && (org.apache.orc.impl.StreamName.getArea(streamKind) == org.apache.orc.impl.StreamName.Area.DATA) && includedColumns[column]) { RecordReaderUtils.addEntireStreamToRanges(offset, length, list, doMergeBuffers); } offset += length; } return list.extract(); }
/** * Plan the ranges of the file that we need to read given the list of * columns and row groups. * * @param streamList the list of streams available * @param includedColumns which columns are needed * @param doMergeBuffers * @return the list of disk ranges that will be loaded */ static DiskRangeList planReadPartialDataStreams (List<OrcProto.Stream> streamList, boolean[] includedColumns, boolean doMergeBuffers) { long offset = 0; // figure out which columns have a present stream DiskRangeList.CreateHelper list = new DiskRangeList.CreateHelper(); for (OrcProto.Stream stream : streamList) { long length = stream.getLength(); int column = stream.getColumn(); OrcProto.Stream.Kind streamKind = stream.getKind(); // since stream kind is optional, first check if it exists if (stream.hasKind() && (org.apache.orc.impl.StreamName.getArea(streamKind) == org.apache.orc.impl.StreamName.Area.DATA) && includedColumns[column]) { RecordReaderUtils.addEntireStreamToRanges(offset, length, list, doMergeBuffers); } offset += length; } return list.extract(); }
void createStreams(List<OrcProto.Stream> streamDescriptions, DiskRangeList ranges, boolean[] includeColumn, CompressionCodec codec, int bufferSize, Map<org.apache.orc.impl.StreamName, InStream> streams) throws IOException { long streamOffset = 0; for (OrcProto.Stream streamDesc : streamDescriptions) { int column = streamDesc.getColumn(); if ((includeColumn != null && !includeColumn[column]) || streamDesc.hasKind() && (org.apache.orc.impl.StreamName.getArea(streamDesc.getKind()) != org.apache.orc.impl.StreamName.Area.DATA)) { streamOffset += streamDesc.getLength(); continue; } List<DiskRange> buffers = RecordReaderUtils.getStreamBuffers( ranges, streamOffset, streamDesc.getLength()); org.apache.orc.impl.StreamName name = new StreamName(column, streamDesc.getKind()); streams.put(name, InStream.create(name.toString(), buffers, streamDesc.getLength(), codec, bufferSize)); streamOffset += streamDesc.getLength(); } }
void createStreams(List<OrcProto.Stream> streamDescriptions, DiskRangeList ranges, boolean[] includeColumn, CompressionCodec codec, int bufferSize, Map<org.apache.orc.impl.StreamName, InStream> streams) throws IOException { long streamOffset = 0; for (OrcProto.Stream streamDesc : streamDescriptions) { int column = streamDesc.getColumn(); if ((includeColumn != null && !includeColumn[column]) || streamDesc.hasKind() && (org.apache.orc.impl.StreamName.getArea(streamDesc.getKind()) != org.apache.orc.impl.StreamName.Area.DATA)) { streamOffset += streamDesc.getLength(); continue; } List<DiskRange> buffers = RecordReaderUtils.getStreamBuffers( ranges, streamOffset, streamDesc.getLength()); org.apache.orc.impl.StreamName name = new StreamName(column, streamDesc.getKind()); streams.put(name, InStream.create(name.toString(), buffers, streamDesc.getLength(), codec, bufferSize)); streamOffset += streamDesc.getLength(); } }
void createStreams(List<OrcProto.Stream> streamDescriptions, DiskRangeList ranges, boolean[] includeColumn, CompressionCodec codec, int bufferSize, Map<StreamName, InStream> streams) throws IOException { long streamOffset = 0; for (OrcProto.Stream streamDesc : streamDescriptions) { int column = streamDesc.getColumn(); if ((includeColumn != null && (column < includeColumn.length && !includeColumn[column])) || streamDesc.hasKind() && (StreamName.getArea(streamDesc.getKind()) != StreamName.Area.DATA)) { streamOffset += streamDesc.getLength(); continue; } List<DiskRange> buffers = RecordReaderUtils.getStreamBuffers( ranges, streamOffset, streamDesc.getLength()); StreamName name = new StreamName(column, streamDesc.getKind()); streams.put(name, InStream.create(name.toString(), buffers, streamDesc.getLength(), codec, bufferSize)); streamOffset += streamDesc.getLength(); } }
public static void addRgFilteredStreamToRanges(OrcProto.Stream stream, boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index, OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull, long offset, long length, DiskRangeList.CreateHelper list, boolean doMergeBuffers) { for (int group = 0; group < includedRowGroups.length; ++group) { if (!includedRowGroups[group]) continue; int posn = getIndexPosition( encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull); long start = index.getEntry(group).getPositions(posn); final long nextGroupOffset; boolean isLast = group == (includedRowGroups.length - 1); nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn); start += offset; long end = offset + estimateRgEndOffset( isCompressed, isLast, nextGroupOffset, length, compressionSize); list.addOrMerge(start, end, doMergeBuffers, true); } }
public static void addRgFilteredStreamToRanges(OrcProto.Stream stream, boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index, OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull, long offset, long length, DiskRangeList.CreateHelper list, boolean doMergeBuffers) { for (int group = 0; group < includedRowGroups.length; ++group) { if (!includedRowGroups[group]) continue; int posn = getIndexPosition( encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull); long start = index.getEntry(group).getPositions(posn); final long nextGroupOffset; boolean isLast = group == (includedRowGroups.length - 1); nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn); start += offset; long end = offset + estimateRgEndOffset( isCompressed, isLast, nextGroupOffset, length, compressionSize); list.addOrMerge(start, end, doMergeBuffers, true); } }
public static void addRgFilteredStreamToRanges(OrcProto.Stream stream, boolean[] includedRowGroups, boolean isCompressed, OrcProto.RowIndex index, OrcProto.ColumnEncoding encoding, OrcProto.Type type, int compressionSize, boolean hasNull, long offset, long length, DiskRangeList.CreateHelper list, boolean doMergeBuffers) { for (int group = 0; group < includedRowGroups.length; ++group) { if (!includedRowGroups[group]) continue; int posn = getIndexPosition( encoding.getKind(), type.getKind(), stream.getKind(), isCompressed, hasNull); long start = index.getEntry(group).getPositions(posn); final long nextGroupOffset; boolean isLast = group == (includedRowGroups.length - 1); nextGroupOffset = isLast ? length : index.getEntry(group + 1).getPositions(posn); start += offset; long end = offset + estimateRgEndOffset( isCompressed, isLast, nextGroupOffset, length, compressionSize); list.addOrMerge(start, end, doMergeBuffers, true); } }
public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder(org.apache.orc.OrcProto.Stream prototype) {
public Builder toBuilder() { return newBuilder(this); }
public static Builder newBuilder(org.apache.orc.OrcProto.Stream prototype) { return newBuilder().mergeFrom(prototype); } public Builder toBuilder() { return newBuilder(this); }