public BytesInput toBytesInput() { return BytesInput.concat( BytesInput.fromUnsignedVarInt(blockSizeInValues), BytesInput.fromUnsignedVarInt(miniBlockNumInABlock)); } }
/** * @return the bytes representing the packed values * @throws IOException */ public BytesInput toBytes() throws IOException { int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth); if (DEBUG) LOG.debug("writing " + (slabs.size() * slabSize + packedByteLength) + " bytes"); if (inputSize > 0) { for (int i = inputSize; i < input.length; i++) { input[i] = 0; } pack(); } return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength)); }
/** * @return the bytes representing the packed values * @throws IOException */ public BytesInput toBytes() throws IOException { int packedByteLength = packedPosition + BytesUtils.paddedByteCountFromBits(inputSize * bitWidth); if (DEBUG) LOG.debug("writing " + (slabs.size() * slabSize + packedByteLength) + " bytes"); if (inputSize > 0) { for (int i = inputSize; i < input.length; i++) { input[i] = 0; } pack(); } return concat(concat(slabs), BytesInput.from(packed, 0, packedByteLength)); }
@Override public BytesInput getBytes() { return BytesInput.concat(prefixLengthWriter.getBytes(), suffixWriter.getBytes()); }
@Override public BytesInput getBytes() { try { // prepend the length of the column BytesInput rle = encoder.toBytes(); return BytesInput.concat(BytesInput.fromInt(Ints.checkedCast(rle.size())), rle); } catch (IOException e) { throw new ParquetEncodingException(e); } }
public BytesInput readAsBytesInput(int size) throws IOException { if (pos + size > count) { // this is to workaround a bug where the compressedLength // of the chunk is missing the size of the header of the dictionary // to allow reading older files (using dictionary) we need this. // usually 13 to 19 bytes are missing int l1 = count - pos; int l2 = size - l1; LOG.info("completed the column chunk with " + l2 + " bytes"); return BytesInput.concat(super.readAsBytesInput(l1), BytesInput.copy(BytesInput.from(f, l2))); } return super.readAsBytesInput(size); }
@Override public BytesInput getBytes() { serializeCurrentValue(); BytesInput buf = bitWriter.finish(); if (Log.DEBUG) LOG.debug("writing a buffer of size " + buf.size() + " + 4 bytes"); // We serialize the length so that on deserialization we can // deserialize as we go, instead of having to load everything // into memory return concat(BytesInput.fromInt((int)buf.size()), buf); }
@Override public BytesInput getBytes() { try { out.flush(); } catch (IOException e) { throw new ParquetEncodingException("could not write page", e); } if (Log.DEBUG) LOG.debug("writing a buffer of size " + arrayOut.size()); return BytesInput.concat(lengthWriter.getBytes(), BytesInput.from(arrayOut)); }
/** * getBytes will trigger flushing block buffer, DO NOT write after getBytes() is called without calling reset() * * @return */ @Override public BytesInput getBytes() { //The Page Header should include: blockSizeInValues, numberOfMiniBlocks, totalValueCount if (deltaValuesToFlush != 0) { flushBlockBuffer(); } return BytesInput.concat( config.toBytesInput(), BytesInput.fromUnsignedVarInt(totalValueCount), BytesInput.fromZigZagVarInt(firstValue), BytesInput.from(baos)); }
buf.collect(BytesInput.concat(BytesInput.from(tempOutputStream), compressedBytes)); encodings.add(rlEncoding); encodings.add(dlEncoding);
BytesInput.concat( BytesInput.from(tempOutputStream), repetitionLevels,
private void writePage() { if (DEBUG) LOG.debug("write page"); try { pageWriter.writePage( concat(repetitionLevelColumn.getBytes(), definitionLevelColumn.getBytes(), dataColumn.getBytes()), valueCount, statistics, repetitionLevelColumn.getEncoding(), definitionLevelColumn.getEncoding(), dataColumn.getEncoding()); } catch (IOException e) { throw new ParquetEncodingException("could not write page for " + path, e); } repetitionLevelColumn.reset(); definitionLevelColumn.reset(); dataColumn.reset(); valueCount = 0; resetStatistics(); }
@Override public BytesInput getBytes() { int maxDicId = getDictionarySize() - 1; if (DEBUG) LOG.debug("max dic id " + maxDicId); int bitWidth = BytesUtils.getWidthFromMaxInt(maxDicId); int initialSlabSize = CapacityByteArrayOutputStream.initialSlabSizeHeuristic(MIN_INITIAL_SLAB_SIZE, maxDictionaryByteSize, 10); RunLengthBitPackingHybridEncoder encoder = new RunLengthBitPackingHybridEncoder(bitWidth, initialSlabSize, maxDictionaryByteSize); IntIterator iterator = encodedValues.iterator(); try { while (iterator.hasNext()) { encoder.writeInt(iterator.next()); } // encodes the bit width byte[] bytesHeader = new byte[] { (byte) bitWidth }; BytesInput rleEncodedBytes = encoder.toBytes(); if (DEBUG) LOG.debug("rle encoded bytes " + rleEncodedBytes.size()); BytesInput bytes = concat(BytesInput.from(bytesHeader), rleEncodedBytes); // remember size of dictionary when we last wrote a page lastUsedDictionarySize = getDictionarySize(); lastUsedDictionaryByteSize = dictionaryByteSize; return bytes; } catch (IOException e) { throw new ParquetEncodingException("could not encode the values", e); } }