@Override public void close() throws IOException { if (columnarBatch != null) { columnarBatch.close(); columnarBatch = null; } super.close(); }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
/** * Returns the list of files at 'path' recursively. This skips files that are ignored normally * by MapReduce. */ public static List<String> listDirectory(File path) throws IOException { List<String> result = new ArrayList<>(); if (path.isDirectory()) { for (File f: path.listFiles()) { result.addAll(listDirectory(f)); } } else { char c = path.getName().charAt(0); if (c != '.' && c != '_') { result.add(path.getAbsolutePath()); } } return result; }
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); String sparkRequestedSchemaString =
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); String sparkRequestedSchemaString =
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); String sparkRequestedSchemaString =
/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }
@Override public void close() throws IOException { if (columnarBatch != null) { columnarBatch.close(); columnarBatch = null; } super.close(); }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. this.defColumn = new VectorizedRleValuesReader(bitWidth, false); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromPage( this.pageValueCount, page.getDefinitionLevels().toInputStream()); try { initDataReader(page.getDataEncoding(), page.getData().toInputStream()); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
/** * Returns the list of files at 'path' recursively. This skips files that are ignored normally * by MapReduce. */ public static List<String> listDirectory(File path) { List<String> result = new ArrayList<>(); if (path.isDirectory()) { for (File f: path.listFiles()) { result.addAll(listDirectory(f)); } } else { char c = path.getName().charAt(0); if (c != '.' && c != '_') { result.add(path.getAbsolutePath()); } } return result; }
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); String sparkRequestedSchemaString =
/** * Utility API that will read all the data in path. This circumvents the need to create Hadoop * objects to use this class. `columns` can contain the list of columns to project. */ @Override public void initialize(String path, List<String> columns) throws IOException, UnsupportedOperationException { super.initialize(path, columns); initializeInternal(); }
@Override public void close() throws IOException { if (columnarBatch != null) { columnarBatch.close(); columnarBatch = null; } super.close(); }
private void readPageV2(DataPageV2 page) throws IOException { this.pageValueCount = page.getValueCount(); this.repetitionLevelColumn = createRLEIterator(descriptor.getMaxRepetitionLevel(), page.getRepetitionLevels(), descriptor); int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); this.defColumn = new VectorizedRleValuesReader(bitWidth); this.definitionLevelColumn = new ValuesReaderIntIterator(this.defColumn); this.defColumn.initFromBuffer( this.pageValueCount, page.getDefinitionLevels().toByteArray()); try { initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0); } catch (IOException e) { throw new IOException("could not read page " + page + " in col " + descriptor, e); } } }
/** * Returns the list of files at 'path' recursively. This skips files that are ignored normally * by MapReduce. */ public static List<String> listDirectory(File path) { List<String> result = new ArrayList<>(); if (path.isDirectory()) { for (File f: path.listFiles()) { result.addAll(listDirectory(f)); } } else { char c = path.getName().charAt(0); if (c != '.' && c != '_') { result.add(path.getAbsolutePath()); } } return result; }
/** * Implementation of RecordReader API. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException, UnsupportedOperationException { super.initialize(inputSplit, taskAttemptContext); initializeInternal(); }
@Override public void close() throws IOException { if (columnarBatch != null) { columnarBatch.close(); columnarBatch = null; } super.close(); }
/** * Returns the list of files at 'path' recursively. This skips files that are ignored normally * by MapReduce. */ public static List<String> listDirectory(File path) throws IOException { List<String> result = new ArrayList<>(); if (path.isDirectory()) { for (File f: path.listFiles()) { result.addAll(listDirectory(f)); } } else { char c = path.getName().charAt(0); if (c != '.' && c != '_') { result.add(path.getAbsolutePath()); } } return result; }