assert ctx != null; includedRgs = colRgs[ctx.includedIx]; int indexIx = RecordReaderUtils.getIndexPosition(ctx.encoding.getKind(), types.get(colIx).getKind(), streamKind, isCompressed, hasNull[colIx]); ctx.addStream(offset, stream, indexIx);
@Override public OrcProto.StripeFooter readStripeFooter(StripeInformation stripe) throws IOException { OrcProto.StripeFooter.Builder fb = OrcProto.StripeFooter.newBuilder(); if (!isEmpty) { fb.addStreams(OrcProto.Stream.newBuilder().setColumn(0).setLength(20).setKind(OrcProto.Stream.Kind.LENGTH)) .addStreams(OrcProto.Stream.newBuilder().setColumn(0).setLength(40).setKind(OrcProto.Stream.Kind.DATA)) .addColumns(OrcProto.ColumnEncoding.newBuilder().setDictionarySize(10).setKind(OrcProto.ColumnEncoding.Kind.DIRECT_V2)); } OrcProto.StripeFooter footer = fb.build(); if (doStreamStep) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); CodedOutputStream cos = CodedOutputStream.newInstance(baos); footer.writeTo(cos); cos.flush(); footer = OrcProto.StripeFooter.newBuilder().mergeFrom(baos.toByteArray()).build(); } return footer; }
private void mergeStripeInfos(StripeData to, StripeData from) { if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) { LlapIoImpl.CACHE_LOGGER.trace("Merging slices data: old " + to + " and new " + from); } to.knownTornStart = Math.min(to.knownTornStart, from.knownTornStart); if (from.encodings.length != to.encodings.length) { throw new RuntimeException("Different encodings " + from + "; " + to); } for (int colIx = 0; colIx < from.encodings.length; ++colIx) { if (to.encodings[colIx] == null) { to.encodings[colIx] = from.encodings[colIx]; } else if (from.encodings[colIx] != null && !to.encodings[colIx].equals(from.encodings[colIx])) { throw new RuntimeException("Different encodings at " + colIx + ": " + from + "; " + to); } LlapSerDeDataBuffer[][] fromColData = from.data[colIx]; if (fromColData != null) { if (to.data[colIx] != null) { // Note: we assume here that the data that was returned to the caller from cache will not // be passed back in via put. Right now it's safe since we don't do anything. But if we // evict proactively, we will have to compare objects all the way down. handleRemovedColumnData(to.data[colIx]); } to.data[colIx] = fromColData; } } }
private void mergeStripeInfos(StripeData to, StripeData from) { if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) { LlapIoImpl.CACHE_LOGGER.trace("Merging slices data: old " + to + " and new " + from); } to.knownTornStart = Math.min(to.knownTornStart, from.knownTornStart); if (from.encodings.length != to.encodings.length) { throw new RuntimeException("Different encodings " + from + "; " + to); } for (int colIx = 0; colIx < from.encodings.length; ++colIx) { if (to.encodings[colIx] == null) { to.encodings[colIx] = from.encodings[colIx]; } else if (from.encodings[colIx] != null && !to.encodings[colIx].equals(from.encodings[colIx])) { throw new RuntimeException("Different encodings at " + colIx + ": " + from + "; " + to); } LlapSerDeDataBuffer[][] fromColData = from.data[colIx]; if (fromColData != null) { if (to.data[colIx] != null) { // Note: we assume here that the data that was returned to the caller from cache will not // be passed back in via put. Right now it's safe since we don't do anything. But if we // evict proactively, we will have to compare objects all the way down. handleRemovedColumnData(to.data[colIx]); } to.data[colIx] = fromColData; } } }
private static List<ColumnEncoding> combineCacheAndWriterEncodings( ColumnEncoding[] cacheEncodings, List<ColumnEncoding> writerEncodings) throws IOException { // TODO: refactor with cache impl? it has the same merge logic if (cacheEncodings == null) { return new ArrayList<>(writerEncodings); } if (cacheEncodings.length != writerEncodings.size()) { throw new IOException("Incompatible encoding lengths: " + Arrays.toString(cacheEncodings) + " vs " + writerEncodings); } ColumnEncoding[] combinedEncodings = Arrays.copyOf(cacheEncodings, cacheEncodings.length); for (int colIx = 0; colIx < cacheEncodings.length; ++colIx) { ColumnEncoding newEncoding = writerEncodings.get(colIx); if (newEncoding == null) continue; if (combinedEncodings[colIx] != null && !newEncoding.equals(combinedEncodings[colIx])) { throw new IOException("Incompatible encodings at " + colIx + ": " + Arrays.toString(cacheEncodings) + " vs " + writerEncodings); } combinedEncodings[colIx] = newEncoding; } return Lists.newArrayList(combinedEncodings); }
@Override OrcProto.ColumnEncoding getEncoding() { // Returns the encoding used for the last call to writeStripe if (useDictionaryEncoding) { if(isDirectV2) { return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DICTIONARY_V2). setDictionarySize(dictionary.size()).build(); } return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DICTIONARY). setDictionarySize(dictionary.size()).build(); } else { if(isDirectV2) { return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); } return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DIRECT).build(); } }
@Override OrcProto.ColumnEncoding getEncoding() { // Returns the encoding used for the last call to writeStripe if (useDictionaryEncoding) { if(isDirectV2) { return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DICTIONARY_V2). setDictionarySize(dictionary.size()).build(); } return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DICTIONARY). setDictionarySize(dictionary.size()).build(); } else { if(isDirectV2) { return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DIRECT_V2).build(); } return OrcProto.ColumnEncoding.newBuilder().setKind( OrcProto.ColumnEncoding.Kind.DIRECT).build(); } }
public org.apache.orc.OrcProto.ColumnEncoding buildPartial() { org.apache.orc.OrcProto.ColumnEncoding result = new org.apache.orc.OrcProto.ColumnEncoding(this); int from_bitField0_ = bitField0_; int to_bitField0_ = 0; if (((from_bitField0_ & 0x00000001) == 0x00000001)) { to_bitField0_ |= 0x00000001; } result.kind_ = kind_; if (((from_bitField0_ & 0x00000002) == 0x00000002)) { to_bitField0_ |= 0x00000002; } result.dictionarySize_ = dictionarySize_; if (((from_bitField0_ & 0x00000004) == 0x00000004)) { to_bitField0_ |= 0x00000004; } result.bloomEncoding_ = bloomEncoding_; result.bitField0_ = to_bitField0_; onBuilt(); return result; }
private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding) throws IOException { int dictionarySize = encoding.getDictionarySize(); if (in != null) { // Guard against empty LENGTH stream. IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, false); int offset = 0; if (dictionaryOffsets == null || dictionaryOffsets.length < dictionarySize + 1) { dictionaryOffsets = new int[dictionarySize + 1]; } for (int i = 0; i < dictionarySize; ++i) { dictionaryOffsets[i] = offset; offset += (int) lenReader.next(); } dictionaryOffsets[dictionarySize] = offset; in.close(); } }
private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding) throws IOException { int dictionarySize = encoding.getDictionarySize(); if (in != null) { // Guard against empty LENGTH stream. IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, context); int offset = 0; if (dictionaryOffsets == null || dictionaryOffsets.length < dictionarySize + 1) { dictionaryOffsets = new int[dictionarySize + 1]; } for (int i = 0; i < dictionarySize; ++i) { dictionaryOffsets[i] = offset; offset += (int) lenReader.next(); } dictionaryOffsets[dictionarySize] = offset; in.close(); } }
private void readDictionaryLengthStream(InStream in, OrcProto.ColumnEncoding encoding) throws IOException { int dictionarySize = encoding.getDictionarySize(); if (in != null) { // Guard against empty LENGTH stream. IntegerReader lenReader = createIntegerReader(encoding.getKind(), in, false, false); int offset = 0; if (dictionaryOffsets == null || dictionaryOffsets.length < dictionarySize + 1) { dictionaryOffsets = new int[dictionarySize + 1]; } for (int i = 0; i < dictionarySize; ++i) { dictionaryOffsets[i] = offset; offset += (int) lenReader.next(); } dictionaryOffsets[dictionarySize] = offset; in.close(); } }
public Builder toBuilder() { return newBuilder(this); }
public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder(org.apache.orc.OrcProto.ColumnEncoding prototype) {
public static Encoding from(OrcProto.ColumnEncoding encoding) { if (!encoding.hasBloomEncoding()) { return ORIGINAL; } switch (encoding.getBloomEncoding()) { case 0: return ORIGINAL; case 1: return UTF8_UTC; default: return FUTURE; } } }
public org.apache.orc.OrcProto.ColumnEncoding getDefaultInstanceForType() { return org.apache.orc.OrcProto.ColumnEncoding.getDefaultInstance(); }
public Builder mergeFrom(org.apache.orc.OrcProto.ColumnEncoding other) { if (other == org.apache.orc.OrcProto.ColumnEncoding.getDefaultInstance()) return this; if (other.hasKind()) { setKind(other.getKind()); } if (other.hasDictionarySize()) { setDictionarySize(other.getDictionarySize()); } if (other.hasBloomEncoding()) { setBloomEncoding(other.getBloomEncoding()); } this.mergeUnknownFields(other.getUnknownFields()); return this; }
public static Builder newBuilder(org.apache.orc.OrcProto.ColumnEncoding prototype) { return newBuilder().mergeFrom(prototype); } public Builder toBuilder() { return newBuilder(this); }
/** * Get the encoding for this column. * @return the information about the encoding of this column */ OrcProto.ColumnEncoding.Builder getEncoding() { OrcProto.ColumnEncoding.Builder builder = OrcProto.ColumnEncoding.newBuilder() .setKind(OrcProto.ColumnEncoding.Kind.DIRECT); if (createBloomFilter) { builder.setBloomEncoding(BloomFilterIO.Encoding.CURRENT.getId()); } return builder; }
/** * Is this stream part of a dictionary? * @return is this part of a dictionary? */ public static boolean isDictionary(OrcProto.Stream.Kind kind, OrcProto.ColumnEncoding encoding) { assert kind != OrcProto.Stream.Kind.DICTIONARY_COUNT; OrcProto.ColumnEncoding.Kind encodingKind = encoding.getKind(); return kind == OrcProto.Stream.Kind.DICTIONARY_DATA || (kind == OrcProto.Stream.Kind.LENGTH && (encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY || encodingKind == OrcProto.ColumnEncoding.Kind.DICTIONARY_V2)); }
@Override void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException { if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY && encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY_V2) { throw new IOException("Unknown encoding " + encoding + " in column " + columnId); } }