public void collect(BytesInput bytesInput) throws IOException { byte[] bytes = bytesInput.toByteArray(); slabs.add(bytes); size += bytes.length; }
public void collect(BytesInput bytesInput) throws IOException { byte[] bytes = bytesInput.toByteArray(); slabs.add(bytes); size += bytes.length; }
/** * copies the input into a new byte array * @param bytesInput * @return * @throws IOException */ public static BytesInput copy(BytesInput bytesInput) throws IOException { return from(bytesInput.toByteArray()); }
/** * copies the input into a new byte array * @param bytesInput * @return * @throws IOException */ public static BytesInput copy(BytesInput bytesInput) throws IOException { return from(bytesInput.toByteArray()); }
/** * copies the input into a new byte array * @param bytesInput * @return * @throws IOException */ public static BytesInput copy(BytesInput bytesInput) throws IOException { return from(bytesInput.toByteArray()); }
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException { final BytesInput decompressed; if (codec != null) { decompressor.reset(); InputStream is = codec.createInputStream(new ByteArrayInputStream(bytes.toByteArray()), decompressor); decompressed = BytesInput.from(is, uncompressedSize); } else { decompressed = bytes; } return decompressed; }
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException { final BytesInput decompressed; if (codec != null) { decompressor.reset(); InputStream is = codec.createInputStream(new ByteArrayInputStream(bytes.toByteArray()), decompressor); decompressed = BytesInput.from(is, uncompressedSize); } else { decompressed = bytes; } return decompressed; }
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException { BytesInput decompressed; if (codec != null) { decompressor.reset(); InputStream inputStream = codec.createInputStream(new ByteArrayInputStream(bytes.toByteArray()), decompressor); decompressed = BytesInput.from(inputStream, uncompressedSize); } else { decompressed = bytes; } return decompressed; }
private ParquetLevelReader buildLevelRLEReader(int maxLevel, BytesInput bytes) { try { if (maxLevel == 0) { return new ParquetLevelNullReader(); } return new ParquetLevelRLEReader(new RunLengthBitPackingHybridDecoder(BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new ParquetDecodingException("could not read levels in page for col " + columnDescriptor, e); } }
private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { try { if (maxLevel == 0) { return new NullIntIterator(); } return new RLEIntIterator( new RunLengthBitPackingHybridDecoder( BytesUtils.getWidthFromMaxInt(maxLevel), new ByteArrayInputStream(bytes.toByteArray()))); } catch (IOException e) { throw new ParquetDecodingException("could not read levels in page for col " + path, e); } }
/** * @param dictionaryPage * @throws IOException */ public PlainLongDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray(); longDictionaryContent = new long[dictionaryPage.getDictionarySize()]; LongPlainValuesReader longReader = new LongPlainValuesReader(); longReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0); for (int i = 0; i < longDictionaryContent.length; i++) { longDictionaryContent[i] = longReader.readLong(); } }
/** * @param dictionaryPage * @throws IOException */ public PlainIntegerDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray(); intDictionaryContent = new int[dictionaryPage.getDictionarySize()]; IntegerPlainValuesReader intReader = new IntegerPlainValuesReader(); intReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0); for (int i = 0; i < intDictionaryContent.length; i++) { intDictionaryContent[i] = intReader.readInteger(); } }
/** * @param dictionaryPage * @throws IOException */ public PlainDoubleDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray(); doubleDictionaryContent = new double[dictionaryPage.getDictionarySize()]; DoublePlainValuesReader doubleReader = new DoublePlainValuesReader(); doubleReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0); for (int i = 0; i < doubleDictionaryContent.length; i++) { doubleDictionaryContent[i] = doubleReader.readDouble(); } }
/** * @param dictionaryPage * @throws IOException */ public PlainFloatDictionary(DictionaryPage dictionaryPage) throws IOException { super(dictionaryPage); final byte[] dictionaryBytes = dictionaryPage.getBytes().toByteArray(); floatDictionaryContent = new float[dictionaryPage.getDictionarySize()]; FloatPlainValuesReader floatReader = new FloatPlainValuesReader(); floatReader.initFromPage(dictionaryPage.getDictionarySize(), dictionaryBytes, 0); for (int i = 0; i < floatDictionaryContent.length; i++) { floatDictionaryContent[i] = floatReader.readFloat(); } }
private ValuesReader readPageV2(DataPageV2 page) { repetitionReader = buildLevelRLEReader(columnDescriptor.getMaxRepetitionLevel(), page.getRepetitionLevels()); definitionReader = buildLevelRLEReader(columnDescriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { return initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + columnDescriptor, e); } }
private void compareOutput(int bound, int[] ints, String[] result) throws IOException { BoundedIntColumnWriter bicw = new BoundedIntColumnWriter(bound); for (int i : ints) { bicw.writeInteger(i); } System.out.println(Arrays.toString(ints)); System.out.println(Arrays.toString(result)); byte[] byteArray = bicw.getBytes().toByteArray(); assertEquals(concat(result), toBinaryString(byteArray, 4)); BoundedIntColumnReader bicr = new BoundedIntColumnReader(bound); bicr.initFromPage(1, byteArray, 0); String expected = ""; String got = ""; for (int i : ints) { expected += " " + i; got += " " + bicr.readInteger(); } assertEquals(expected, got); }
private void readPageV2(DataPageV2 page) { this.repetitionLevelColumn = newRLEIterator(path.getMaxRepetitionLevel(), page.getRepetitionLevels()); this.definitionLevelColumn = newRLEIterator(path.getMaxDefinitionLevel(), page.getDefinitionLevels()); try { if (DEBUG) LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } }
private void validateEncodeDecode(int bitLength, int[] vals, String expected) throws IOException { final int bound = (int)Math.pow(2, bitLength) - 1; BitPackingColumnWriter w = new BitPackingColumnWriter(bound); for (int i : vals) { w.writeInteger(i); } byte[] bytes = w.getBytes().toByteArray(); System.out.println("vals ("+bitLength+"): " + TestBitPacking.toString(vals)); System.out.println("bytes: " + TestBitPacking.toString(bytes)); assertEquals(expected, TestBitPacking.toString(bytes)); BitPackingColumnReader r = new BitPackingColumnReader(bound); r.initFromPage(vals.length, bytes, 0); int[] result = new int[vals.length]; for (int i = 0; i < result.length; i++) { result[i] = r.readInteger(); } System.out.println("result: " + TestBitPacking.toString(result)); assertArrayEquals("result: " + TestBitPacking.toString(result), vals, result); }
private ValuesReader readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRlEncoding().getValuesReader(columnDescriptor, ValuesType.REPETITION_LEVEL); ValuesReader dlReader = page.getDlEncoding().getValuesReader(columnDescriptor, ValuesType.DEFINITION_LEVEL); repetitionReader = new ParquetLevelValuesReader(rlReader); definitionReader = new ParquetLevelValuesReader(dlReader); try { byte[] bytes = page.getBytes().toByteArray(); rlReader.initFromPage(pageValueCount, bytes, 0); int offset = rlReader.getNextOffset(); dlReader.initFromPage(pageValueCount, bytes, offset); offset = dlReader.getNextOffset(); return initDataReader(page.getValueEncoding(), bytes, offset, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("Error reading parquet page " + page + " in column " + columnDescriptor, e); } }
private void readPageV1(DataPageV1 page) { ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL); ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL); this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); try { byte[] bytes = page.getBytes().toByteArray(); if (DEBUG) LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records"); if (DEBUG) LOG.debug("reading repetition levels at 0"); rlReader.initFromPage(pageValueCount, bytes, 0); int next = rlReader.getNextOffset(); if (DEBUG) LOG.debug("reading definition levels at " + next); dlReader.initFromPage(pageValueCount, bytes, next); next = dlReader.getNextOffset(); if (DEBUG) LOG.debug("reading data at " + next); initDataReader(page.getValueEncoding(), bytes, next, page.getValueCount()); } catch (IOException e) { throw new ParquetDecodingException("could not read page " + page + " in col " + path, e); } }