reader.setup(mutator); Map<String, ValueVector> fieldVectorMap = new HashMap<>(); for (VectorWrapper<?> vw : mutator.getContainer()) { fieldVectorMap.put(vw.getField().getName(), vw.getValueVector()); mutator.getContainer().buildSchema(BatchSchema.SelectionVectorMode.NONE); return mutator.getContainer().getSchema();
reader.setup(mutator); reader.next(); mutator.getContainer().buildSchema(SelectionVectorMode.NONE); return mutator.getContainer().getSchema();
@Override public int next() { int recordCount = inner.next(); if (mutator.isSchemaChanged()) { newSchema(); } incoming.setAllCount(recordCount); if (DEBUG_PRINT) { FragmentHandle h = context.getFragmentHandle(); outgoing.buildSchema(); String op = String.format("CoercionReader:%d:%d:%d --> (%d), %s", h.getMajorFragmentId(), h.getMinorFragmentId(), context.getStats().getOperatorId(), recordCount, outgoing.getSchema()); System.out.println(op); BatchPrinter.printBatch(mutator.getContainer()); } if (projector != null) { projector.projectRecords(recordCount); for (final ValueVector v : allocationVectors) { v.setValueCount(recordCount); } } return recordCount; }
public CoercionReader(OperatorContext context, List<SchemaPath> columns, RecordReader inner, BatchSchema targetSchema) { super(context, columns); this.mutator = new SampleMutator(context.getAllocator()); this.incoming = mutator.getContainer(); this.inner = inner; this.outgoing = new VectorContainer(context.getAllocator()); this.targetSchema = targetSchema; this.exprs = new ArrayList<>(targetSchema.getFieldCount()); for (Field field : targetSchema.getFields()) { final FieldReference inputRef = FieldReference.getWithQuotedRef(field.getName()); final CompleteType targetType = CompleteType.fromField(field); if (targetType.isUnion() || targetType.isComplex()) { // we are assuming that map and list fields won't need coercion but inner reader may rely on sampling // a handful of rows to figure out the schema and if the list/map is empty in those rows, the schema will be // incomplete exprs.add(new NamedExpression(inputRef, inputRef)); // one way to fix this issue is to add the target field in the incoming container and rely on // schema learning to handle any changes we hit when reading from the underlying reader mutator.addField(field, TypeHelper.getValueVectorClass(field)); } else { final MajorType majorType = MajorTypeHelper.getMajorTypeForField(field); LogicalExpression cast = FunctionCallFactory.createCast(majorType, inputRef); exprs.add(new NamedExpression(cast, inputRef)); } //TODO check that the expression type is a subset of the targetSchema type } }
mutator.getContainer().buildSchema(BatchSchema.SelectionVectorMode.NONE); return mutator.getContainer().getSchema();
String op = String.format("CoercionReader:%d:%d:%d, %s --> %s", h.getMajorFragmentId(), h.getMinorFragmentId(), context.getStats().getOperatorId(), incoming.getSchema(), outgoing.getSchema()); System.out.println(op); mutator.getContainer().setAllCount(2); BatchPrinter.printBatch(mutator.getContainer());
private BatchSchema getSampledSchema(HTableDescriptor descriptor, DatasetConfig oldConfig) throws Exception { BatchSchema oldSchema = null; ByteString bytes = oldConfig != null ? DatasetHelper.getSchemaBytes(oldConfig) : null; if(bytes != null) { oldSchema = BatchSchema.deserialize(bytes); } final HBaseSubScanSpec spec = new HBaseSubScanSpec(getNamespace(), getTableName(), null, null, null); try ( BufferAllocator allocator = context.getAllocator().newChildAllocator("hbase-sample", 0, Long.MAX_VALUE); SampleMutator mutator = new SampleMutator(allocator); HBaseRecordReader reader = new HBaseRecordReader(connect.getConnection(), spec, GroupScan.ALL_COLUMNS, null, true); ) { reader.setNumRowsPerBatch(100); if(oldSchema != null) { oldSchema.materializeVectors(GroupScan.ALL_COLUMNS, mutator); } // add row key. mutator.addField(CompleteType.VARBINARY.toField(HBaseRecordReader.ROW_KEY), ValueVector.class); // add all column families. for (HColumnDescriptor col : descriptor.getFamilies()) { mutator.addField(CompleteType.struct().toField(col.getNameAsString()), ValueVector.class); } reader.setup(mutator); reader.next(); mutator.getContainer().buildSchema(SelectionVectorMode.NONE); return mutator.getContainer().getSchema(); } catch (ExecutionSetupException e) { throw UserException.dataReadError(e).message("Unable to sample schema for table %s.", key).build(logger); } }