List<OrcProto.ColumnStatistics> fileColStats = orcTail.getFooter().getStatisticsList(); boolean[] fileIncluded; if (readerTypes == null) {
@Override public SplitInfos applySargToMetadata( SearchArgument sarg, ByteBuffer fileMetadata) throws IOException { // TODO: ideally we should store shortened representation of only the necessary fields // in HBase; it will probably require custom SARG application code. OrcTail orcTail = ReaderImpl.extractFileTail(fileMetadata); OrcProto.Footer footer = orcTail.getFooter(); int stripeCount = footer.getStripesCount(); boolean[] result = OrcInputFormat.pickStripesViaTranslatedSarg( sarg, orcTail.getWriterVersion(), footer.getTypesList(), orcTail.getStripeStatistics(), stripeCount); // For ORC case, send the boundaries of the stripes so we don't have to send the footer. SplitInfos.Builder sb = SplitInfos.newBuilder(); List<StripeInformation> stripes = orcTail.getStripes(); boolean isEliminated = true; for (int i = 0; i < result.length; ++i) { if (result != null && !result[i]) continue; isEliminated = false; StripeInformation si = stripes.get(i); if (LOG.isDebugEnabled()) { LOG.debug("PPD is adding a split " + i + ": " + si.getOffset() + ", " + si.getLength()); } sb.addInfos(SplitInfo.newBuilder().setIndex(i) .setOffset(si.getOffset()).setLength(si.getLength())); } return isEliminated ? null : sb.build(); }
public static boolean isOriginal(Footer footer) { for(OrcProto.UserMetadataItem item: footer.getMetadataList()) { if (item.hasName() && item.getName().equals(OrcRecordUpdater.ACID_KEY_INDEX_NAME)) { return true; } } return false; }
public Builder mergeFrom(org.apache.orc.OrcProto.Footer other) { if (other == org.apache.orc.OrcProto.Footer.getDefaultInstance()) return this; if (other.hasHeaderLength()) { setHeaderLength(other.getHeaderLength()); if (other.hasContentLength()) { setContentLength(other.getContentLength()); if (other.hasNumberOfRows()) { setNumberOfRows(other.getNumberOfRows()); if (other.hasRowIndexStride()) { setRowIndexStride(other.getRowIndexStride()); if (other.hasWriter()) { setWriter(other.getWriter()); this.mergeUnknownFields(other.getUnknownFields()); return this;
FileTail tail = orcTail.getFileTail(); stats = orcTail.getStripeStatisticsProto(); stripes = new ArrayList<>(tail.getFooter().getStripesCount()); for (OrcProto.StripeInformation stripeProto : tail.getFooter().getStripesList()) { stripes.add(new ReaderImpl.StripeInformationImpl(stripeProto));
this.metadataSize = tail.getMetadataSize(); this.versionList = tail.getPostScript().getVersionList(); this.types = tail.getFooter().getTypesList(); this.rowIndexStride = tail.getFooter().getRowIndexStride(); this.contentLength = tail.getFooter().getContentLength(); this.numberOfRows = tail.getFooter().getNumberOfRows(); this.userMetadata = tail.getFooter().getMetadataList(); this.fileStats = tail.getFooter().getStatisticsList(); this.writerVersion = tail.getWriterVersion(); this.stripes = tail.getStripes();
private int writeFooter(long bodyLength) throws IOException { getStream(); OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder(); builder.setContentLength(bodyLength); builder.setHeaderLength(headerLength); builder.setNumberOfRows(rowCount); builder.setRowIndexStride(rowIndexStride); // populate raw data size rawDataSize = computeRawDataSize(); // serialize the types writeTypes(builder, schema); // add the stripe information for(OrcProto.StripeInformation stripe: stripes) { builder.addStripes(stripe); } // add the column statistics writeFileStatistics(builder, treeWriter); // add all of the user metadata for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) { builder.addMetadata(OrcProto.UserMetadataItem.newBuilder() .setName(entry.getKey()).setValue(entry.getValue())); } long startPosn = rawWriter.getPos(); OrcProto.Footer footer = builder.build(); footer.writeTo(protobufWriter); protobufWriter.flush(); writer.flush(); return (int) (rawWriter.getPos() - startPosn); }
private int writeFooter(long bodyLength) throws IOException { getStream(); OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder(); builder.setContentLength(bodyLength); builder.setHeaderLength(headerLength); builder.setNumberOfRows(rowCount); builder.setRowIndexStride(rowIndexStride); // populate raw data size rawDataSize = computeRawDataSize(); // serialize the types writeTypes(builder, schema); // add the stripe information for(OrcProto.StripeInformation stripe: stripes) { builder.addStripes(stripe); } // add the column statistics writeFileStatistics(builder, treeWriter); // add all of the user metadata for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) { builder.addMetadata(OrcProto.UserMetadataItem.newBuilder() .setName(entry.getKey()).setValue(entry.getValue())); } long startPosn = rawWriter.getPos(); OrcProto.Footer footer = builder.build(); footer.writeTo(protobufWriter); protobufWriter.flush(); writer.flush(); return (int) (rawWriter.getPos() - startPosn); }
/** * Build a virtual OrcTail for empty files. * @return a new OrcTail */ OrcTail buildEmptyTail() { OrcProto.PostScript.Builder postscript = OrcProto.PostScript.newBuilder(); OrcFile.Version version = OrcFile.Version.CURRENT; postscript.setMagic(OrcFile.MAGIC) .setCompression(OrcProto.CompressionKind.NONE) .setFooterLength(0) .addVersion(version.getMajor()) .addVersion(version.getMinor()) .setMetadataLength(0) .setWriterVersion(OrcFile.CURRENT_WRITER.getId()); // Use a struct with no fields OrcProto.Type.Builder struct = OrcProto.Type.newBuilder(); struct.setKind(OrcProto.Type.Kind.STRUCT); OrcProto.Footer.Builder footer = OrcProto.Footer.newBuilder(); footer.setHeaderLength(0) .setContentLength(0) .addTypes(struct) .setNumberOfRows(0) .setRowIndexStride(0); OrcProto.FileTail.Builder result = OrcProto.FileTail.newBuilder(); result.setFooter(footer); result.setPostscript(postscript); result.setFileLength(0); result.setPostscriptLength(0); return new OrcTail(result.build(), null); }
@Override public void init() throws IOException { FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fileSystem, path, maxLength); this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer; MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionType, footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer ); this.footerByteBuffer = footerMetaData.footerBuffer; this.compressionKind = rInfo.compressionKind; this.codec = rInfo.codec; this.bufferSize = rInfo.bufferSize; this.metadataSize = rInfo.metadataSize; this.stripeStats = rInfo.metadata.getStripeStatsList(); this.types = rInfo.footer.getTypesList(); this.rowIndexStride = rInfo.footer.getRowIndexStride(); this.contentLength = rInfo.footer.getContentLength(); this.numberOfRows = rInfo.footer.getNumberOfRows(); this.userMetadata = rInfo.footer.getMetadataList(); this.fileStats = rInfo.footer.getStatisticsList(); this.versionList = footerMetaData.versionList; this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList()); this.timeZone = TimeZone.getTimeZone(meta.getProperty(StorageConstants.TIMEZONE, StorageUtil.TAJO_CONF.getSystemTimezone().getID())); this.recordReader = createRecordReader(); super.init(); }
private long writeFooter() throws IOException { writeMetadata(); OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder(); builder.setNumberOfRows(rowCount); builder.setRowIndexStride(rowIndexStride); rawDataSize = computeRawDataSize(); // serialize the types writeTypes(builder, schema); // add the stripe information for(OrcProto.StripeInformation stripe: stripes) { builder.addStripes(stripe); } // add the column statistics writeFileStatistics(builder, treeWriter); // add all of the user metadata for(Map.Entry<String, ByteString> entry: userMetadata.entrySet()) { builder.addMetadata(OrcProto.UserMetadataItem.newBuilder() .setName(entry.getKey()).setValue(entry.getValue())); } builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId()); physicalWriter.writeFileFooter(builder); return writePostScript(); }
public void validateIncludes(OrcProto.Footer footer) throws IOException { if (doesSourceHaveIncludes) return; // Irrelevant. boolean[] translatedIncludes = columnIds == null ? null : OrcInputFormat.genIncludedColumns( OrcUtils.convertTypeFromProtobuf(footer.getTypesList(), 0), columnIds); if (translatedIncludes == null) { throwIncludesMismatchError(translatedIncludes); } int len = Math.min(translatedIncludes.length, writerIncludes.length); for (int i = 0; i < len; ++i) { // Translated includes may be a superset of writer includes due to cache. if (!translatedIncludes[i] && writerIncludes[i]) { throwIncludesMismatchError(translatedIncludes); } } if (translatedIncludes.length < writerIncludes.length) { for (int i = len; i < writerIncludes.length; ++i) { if (writerIncludes[i]) { throwIncludesMismatchError(translatedIncludes); } } } }
@Override public void init() throws IOException { FileMetaInfo footerMetaData = extractMetaInfoFromFooter(fileSystem, path, maxLength); this.footerMetaAndPsBuffer = footerMetaData.footerMetaAndPsBuffer; MetaInfoObjExtractor rInfo = new MetaInfoObjExtractor(footerMetaData.compressionType, footerMetaData.bufferSize, footerMetaData.metadataSize, footerMetaData.footerBuffer ); this.footerByteBuffer = footerMetaData.footerBuffer; this.compressionKind = rInfo.compressionKind; this.codec = rInfo.codec; this.bufferSize = rInfo.bufferSize; this.metadataSize = rInfo.metadataSize; this.stripeStats = rInfo.metadata.getStripeStatsList(); this.types = rInfo.footer.getTypesList(); this.rowIndexStride = rInfo.footer.getRowIndexStride(); this.contentLength = rInfo.footer.getContentLength(); this.numberOfRows = rInfo.footer.getNumberOfRows(); this.userMetadata = rInfo.footer.getMetadataList(); this.fileStats = rInfo.footer.getStatisticsList(); this.versionList = footerMetaData.versionList; this.stripes = convertProtoStripesToStripes(rInfo.footer.getStripesList()); this.timeZone = TimeZone.getTimeZone(meta.getOption(StorageConstants.TIMEZONE, StorageUtil.TAJO_CONF.getSystemTimezone().getID())); this.recordReader = createRecordReader(); super.init(); }
/** * <code>optional .orc.proto.Footer footer = 2;</code> */ public Builder mergeFooter(org.apache.orc.OrcProto.Footer value) { if (footerBuilder_ == null) { if (((bitField0_ & 0x00000002) == 0x00000002) && footer_ != org.apache.orc.OrcProto.Footer.getDefaultInstance()) { footer_ = org.apache.orc.OrcProto.Footer.newBuilder(footer_).mergeFrom(value).buildPartial(); } else { footer_ = value; } onChanged(); } else { footerBuilder_.mergeFrom(value); } bitField0_ |= 0x00000002; return this; } /**
public OrcFileMetadata(Object fileKey, OrcProto.Footer footer, OrcProto.PostScript ps, List<StripeStatistics> stats, List<StripeInformation> stripes, final OrcFile.Version fileVersion) { this.stripeStats = stats; this.compressionKind = CompressionKind.valueOf(ps.getCompression().name()); this.compressionBufferSize = (int)ps.getCompressionBlockSize(); this.stripes = stripes; this.isOriginalFormat = OrcInputFormat.isOriginal(footer); this.writerVersionNum = ps.getWriterVersion(); this.versionList = ps.getVersionList(); this.metadataSize = (int) ps.getMetadataLength(); this.types = footer.getTypesList(); this.rowIndexStride = footer.getRowIndexStride(); this.contentLength = footer.getContentLength(); this.numberOfRows = footer.getNumberOfRows(); this.fileStats = footer.getStatisticsList(); this.fileKey = fileKey; this.fileVersion = fileVersion; }
public Builder newBuilderForType() { return newBuilder(); } public static Builder newBuilder(org.apache.orc.OrcProto.Footer prototype) {
public Builder toBuilder() { return newBuilder(this); }
public List<OrcProto.Type> getTypes() { return getFooter().getTypesList(); }
public static Builder newBuilder(org.apache.orc.OrcProto.Footer prototype) { return newBuilder().mergeFrom(prototype); } public Builder toBuilder() { return newBuilder(this); }
@Override public ColumnStatistics[] getStatistics() throws IOException { // Generate the stats OrcProto.Footer.Builder builder = OrcProto.Footer.newBuilder(); // add the column statistics writeFileStatistics(builder, treeWriter); return ReaderImpl.deserializeStats(schema, builder.getStatisticsList()); }