final RecordReader recordReader = colIO.getRecordReader(pages, new GroupRecordConverter(schema));
@Override public void visit(MessageType messageType) { columnIO = new MessageColumnIO(requestedSchema, validating, createdBy); visitChildren(columnIO, messageType, requestedSchema); columnIO.setLevels(); columnIO.setLeaves(leaves); }
public ParquetReader(MessageColumnIO messageColumnIO, List<BlockMetaData> blocks, ParquetDataSource dataSource, AggregatedMemoryContext systemMemoryContext) { this.blocks = blocks; this.dataSource = requireNonNull(dataSource, "dataSource is null"); this.systemMemoryContext = requireNonNull(systemMemoryContext, "systemMemoryContext is null"); this.currentRowGroupMemoryContext = systemMemoryContext.newAggregatedMemoryContext(); columns = messageColumnIO.getLeaves(); columnReaders = new PrimitiveColumnReader[columns.size()]; }
private void newSchema() throws IOException { // Reset it to half of current number and bound it within the limits recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCountForNextMemCheck / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK); String json = new Schema(batchSchema).toJson(); extraMetaData.put(DREMIO_ARROW_SCHEMA_2_1, json); List<Type> types = Lists.newArrayList(); for (Field field : batchSchema) { if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) { continue; } Type childType = getType(field); if (childType != null) { types.add(childType); } } Preconditions.checkState(types.size() > 0, "No types for parquet schema"); schema = new MessageType("root", types); int dictionarySize = (int)context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR); final ParquetProperties parquetProperties = new ParquetProperties(dictionarySize, writerVersion, enableDictionary, new ParquetDirectByteBufferAllocator(columnEncoderAllocator), pageSize, true, enableDictionaryForBinary); pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(codecFactory.getCompressor(codec), schema, parquetProperties); store = new ColumnWriteStoreV1(pageStore, pageSize, parquetProperties); MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema); consumer = columnIO.getRecordWriter(store); setUp(schema, consumer); }
public RecordConsumer getRecordWriter(ColumnWriteStore columns) { RecordConsumer recordWriter = new MessageColumnIORecordConsumer(columns); if (DEBUG) recordWriter = new RecordConsumerLoggingWrapper(recordWriter); return validating ? new ValidatingRecordConsumer(recordWriter, getType()) : recordWriter; }
void setLevels() { setLevels(0, 0, new String[0], new int[0], Arrays.<ColumnIO>asList(this), Arrays.<ColumnIO>asList(this)); }
@Override public void visit(MessageType messageType) { columnIO = new MessageColumnIO(requestedSchema, validating, createdBy); visitChildren(columnIO, messageType, requestedSchema); columnIO.setLevels(); columnIO.setLeaves(leaves); }
public ParquetReader(MessageColumnIO messageColumnIO, List<BlockMetaData> blocks, ParquetDataSource dataSource, AggregatedMemoryContext systemMemoryContext, DataSize maxReadBlockSize) { this.blocks = blocks; this.dataSource = requireNonNull(dataSource, "dataSource is null"); this.systemMemoryContext = requireNonNull(systemMemoryContext, "systemMemoryContext is null"); this.currentRowGroupMemoryContext = systemMemoryContext.newAggregatedMemoryContext(); this.maxReadBlockBytes = requireNonNull(maxReadBlockSize, "maxReadBlockSize is null").toBytes(); columns = messageColumnIO.getLeaves(); columnReaders = new PrimitiveColumnReader[columns.size()]; maxBytesPerCell = new long[columns.size()]; }
private void initStore() { pageStore = new ColumnChunkPageWriteStore(compressor, schema, props.getAllocator()); columnStore = props.newColumnWriteStore(schema, pageStore); MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema); this.recordConsumer = columnIO.getRecordWriter(columnStore); writeSupport.prepareForWrite(recordConsumer); }
public RecordConsumer getRecordWriter(ColumnWriteStore columns) { RecordConsumer recordWriter = new MessageColumnIORecordConsumer(columns); if (DEBUG) recordWriter = new RecordConsumerLoggingWrapper(recordWriter); return validating ? new ValidatingRecordConsumer(recordWriter, getType()) : recordWriter; }
void setLevels() { setLevels(0, 0, new String[0], new int[0], Arrays.<ColumnIO>asList(this), Arrays.<ColumnIO>asList(this)); }
/** * Load Decision Tree model. * * @param pathToMdl Path to model. */ private static Model loadDecisionTreeModel(String pathToMdl) { try (ParquetFileReader r = ParquetFileReader.open(HadoopInputFile.fromPath(new Path(pathToMdl), new Configuration()))) { PageReadStore pages; final MessageType schema = r.getFooter().getFileMetaData().getSchema(); final MessageColumnIO colIO = new ColumnIOFactory().getColumnIO(schema); final Map<Integer, NodeData> nodes = new TreeMap<>(); while (null != (pages = r.readNextRowGroup())) { final long rows = pages.getRowCount(); final RecordReader recordReader = colIO.getRecordReader(pages, new GroupRecordConverter(schema)); for (int i = 0; i < rows; i++) { final SimpleGroup g = (SimpleGroup)recordReader.read(); NodeData nodeData = extractNodeDataFromParquetRow(g); nodes.put(nodeData.id, nodeData); } } return buildDecisionTreeModel(nodes); } catch (IOException e) { System.out.println("Error reading parquet file."); e.printStackTrace(); } return null; }
public static List<PrimitiveColumnIO> getColumns(MessageType fileSchema, MessageType requestedSchema) { return (new ColumnIOFactory()).getColumnIO(requestedSchema, fileSchema, true).getLeaves(); }
private void initStore() { pageStore = new ColumnChunkPageWriteStore(compressor, schema, props.getAllocator(), props.getColumnIndexTruncateLength()); columnStore = props.newColumnWriteStore(schema, pageStore); MessageColumnIO columnIO = new ColumnIOFactory(validating).getColumnIO(schema); this.recordConsumer = columnIO.getRecordWriter(columnStore); writeSupport.prepareForWrite(recordConsumer); }
@Override public RecordReader<T> visit(NoOpFilter noOpFilter) { return new RecordReaderImplementation<T>( MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy)); } });
final RecordReader recordReader = colIO.getRecordReader(pagesMetaData, new GroupRecordConverter(schema)); for (int i = 0; i < rows; i++) { final SimpleGroup g = (SimpleGroup)recordReader.read(); while (null != (pages = r.readNextRowGroup())) { final long rows = pages.getRowCount(); final RecordReader recordReader = colIO.getRecordReader(pages, new GroupRecordConverter(schema)); for (int i = 0; i < rows; i++) { final SimpleGroup g = (SimpleGroup)recordReader.read();
public static List<PrimitiveColumnIO> getColumns(MessageType fileSchema, MessageType requestedSchema) { return (new ColumnIOFactory()).getColumnIO(requestedSchema, fileSchema, true).getLeaves(); }
store = new ColumnWriteStoreV1(pageStore, parquetProperties); MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema); consumer = columnIO.getRecordWriter(store); setUp(schema, consumer);
@Override public RecordReader<T> visit(NoOpFilter noOpFilter) { return new RecordReaderImplementation<T>( MessageColumnIO.this, recordMaterializer, validating, new ColumnReadStoreImpl(columns, recordMaterializer.getRootConverter(), getType(), createdBy)); } });
/** * Load SVM model. * * @param pathToMdl Path to model. */ private static Model loadLinearSVMModel(String pathToMdl) { Vector coefficients = null; double interceptor = 0; try (ParquetFileReader r = ParquetFileReader.open(HadoopInputFile.fromPath(new Path(pathToMdl), new Configuration()))) { PageReadStore pages; final MessageType schema = r.getFooter().getFileMetaData().getSchema(); final MessageColumnIO colIO = new ColumnIOFactory().getColumnIO(schema); while (null != (pages = r.readNextRowGroup())) { final long rows = pages.getRowCount(); final RecordReader recordReader = colIO.getRecordReader(pages, new GroupRecordConverter(schema)); for (int i = 0; i < rows; i++) { final SimpleGroup g = (SimpleGroup)recordReader.read(); interceptor = readSVMInterceptor(g); coefficients = readSVMCoefficients(g); } } } catch (IOException e) { System.out.println("Error reading parquet file."); e.printStackTrace(); } return new SVMLinearClassificationModel(coefficients, interceptor); }