public ExcelRecordReader(final OperatorContext executionContext, final FileSystemWrapper dfs, final Path path, final ExcelFormatPluginConfig pluginConfig, final List<SchemaPath> columns) { super(executionContext, columns); this.executionContext = executionContext; this.dfs = dfs; this.path = path; this.pluginConfig = pluginConfig; /* Get the list of columns to project, build a lookup table and pass it * to respective parsers for filtering the columns from excel sheets. */ ArrayList<SchemaPath> columnInfo; if(!isStarQuery() && !isSkipQuery()) { columnInfo = new ArrayList<>(getColumns()); this.columnsToProject = new HashSet<>(); for (int i = 0; i < columnInfo.size(); i++) { String columnName = (columnInfo.get(i)).getAsNamePart().getName(); this.columnsToProject.add(columnName); } logger.debug("number of projected columns: ", columnsToProject.size()); } else { logger.debug("projected columns is null"); this.columnsToProject = null; } }
private static void validate(WritableBatch b) { for (SerializedField f : b.getDef().getFieldList()) { if (!f.hasNamePart()) { throw new RuntimeException("Field is missing name"); } if (f.getNamePart().getName().equals("")) { throw new RuntimeException("Field name is empty"); } } }
public static Field create(SerializedField serField){ if (!serField.hasNamePart() || serField.getNamePart().getName().equals("")) { throw new RuntimeException(); } return TypeHelper.getFieldForSerializedField(serField); }
for (TypedFieldId id : valueFieldIds) { if (id.getIntermediateType() == CompleteType.OBJECT) { materializedValueFields[i++] = new Field(ref.getAsNamePart().getName(), true, id.getIntermediateType().getType(), null); } else { materializedValueFields[i++] = new Field(ref.getAsNamePart().getName(), true, id.getIntermediateType().getType(), null);
DataMode mode = field.getMajorType().getMode(); MinorType minor = field.getMajorType().getMinorType(); String name = field.getNamePart().getName(); boolean changed = false; if (logger.isDebugEnabled()) {
final String fieldName = field.getNamePart().getName();
public static Field getFieldForSerializedField(SerializedField serializedField) { String name = serializedField.getNamePart().getName(); org.apache.arrow.vector.types.Types.MinorType arrowMinorType = getArrowMinorType(serializedField.getMajorType().getMinorType()); switch(serializedField.getMajorType().getMinorType()) { Preconditions.checkState(childList.size() > 0, "children should start with validity vector buffer"); SerializedField bits = childList.get(0); Preconditions.checkState(bits.getNamePart().getName().equals("$bits$"), "children should start with validity vector buffer: %s", childList); for (int i = 1; i < childList.size(); i++) { SerializedField child = childList.get(i);
if (column.equals(ROW_KEY_PATH)) { if (sample) { Field field = CompleteType.VARBINARY.toField(column.getAsNamePart().getName()); rowKeyVector = outputMutator.addField(field, VarBinaryVector.class); } else {
/** * computes the number of buffers for a given serialized field * @param field serialized field * @param buffers total buffers in the batch * @param buffersStart starting buffer for the passed field * * @return number of buffers for the field */ private static int fieldBuffersCount(SerializedField field, ByteBuf[] buffers, final int buffersStart) { int totalBufferWidth = 0; int lastIndex = buffersStart; while (totalBufferWidth < field.getBufferLength() && lastIndex < buffersStart + buffers.length) { ByteBuf buf = buffers[lastIndex]; totalBufferWidth += buf.readableBytes(); ++lastIndex; } if (totalBufferWidth != field.getBufferLength()) { throw new IllegalStateException("not enough buffers for field " + field.getNamePart().getName() + " of type " + field.getMajorType()); } return lastIndex - buffersStart; }
private StructVector getOrCreateFamilyVector(OutputMutator output, String familyName, boolean allocateOnCreate) { StructVector v = familyVectorMap.get(familyName); if(v == null) { SchemaPath column = SchemaPath.getSimplePath(familyName); Field field = getFieldForNameAndMajorType(column.getAsNamePart().getName(), COLUMN_FAMILY_TYPE); if (sample) { v = outputMutator.addField(field, StructVector.class); if (allocateOnCreate) { v.allocateNew(); } } else { v = (StructVector) output.getVector(column.getAsNamePart().getName()); } getColumns().add(column); familyVectorMap.put(familyName, v); } return v; }
/** * The data layout is the same for the actual data within a repeated field, as it is in a scalar vector for * the same sql type. For example, a repeated int vector has a vector of offsets into a regular int vector to * represent the lists. As the data layout for the actual values in the same in the repeated vector as in the * scalar vector of the same type, we can avoid making individual copies for the column being flattened, and just * use vector copies between the inner vector of the repeated field to the resulting scalar vector from the flatten * operation. This is completed after we determine how many records will fit (as we will hit either a batch end, or * the end of one of the other vectors while we are copying the data of the other vectors alongside each new flattened * value coming out of the repeated field.) */ private TransferPair getFlattenFieldTransferPair(FieldReference outputName) { final TypedFieldId fieldId = incoming.getSchema().getFieldId(config.getColumn()); final Class<? extends ValueVector> vectorClass = TypeHelper.getValueVectorClass(incoming.getSchema().getColumn(fieldId.getFieldIds()[0])); final ValueVector flattenField = incoming.getValueAccessorById(vectorClass, fieldId.getFieldIds()).getValueVector(); final ValueVector vvIn = RepeatedValueVector.class.cast(flattenField).getDataVector(); return vvIn.getTransferPair(outputName.getAsNamePart().getName(), context.getAllocator()); }
@Test public void testColumnOrderingWithUnionVector() throws Exception { List<QueryDataBatch> results = null; try { results = testRunAndReturn(QueryType.SQL, "SELECT * FROM cp.\"type_changes.json\""); final RecordBatchDef def = results.get(0).getHeader().getDef(); assertEquals(2, def.getFieldCount()); assertEquals("a", def.getField(0).getNamePart().getName()); assertEquals("b", def.getField(1).getNamePart().getName()); } finally { if (results != null) { for(QueryDataBatch r : results) { r.release(); } } } }
@Override public Field materialize(final NamedExpression ne, final BatchSchema schema, ErrorCollector collector, FunctionLookupContext context) { return MajorTypeHelper.getFieldForNameAndMajorType(ne.getRef().getAsNamePart().getName(), getMajorType()); }
private static Set<String> extractSchema(QueryWritableBatch batch) { List<SerializedField> fields = batch.getHeader().getDef().getFieldList(); Set<String> schema = Sets.newHashSet(); for (SerializedField field : fields) { schema.add(field.getNamePart().getName()); } return schema; }
public void writeTo(io.protostuff.Output output, com.dremio.exec.proto.UserBitShared.NamePart message) throws java.io.IOException { if(message.hasType()) output.writeEnum(1, message.getType().getNumber(), false); if(message.hasName()) output.writeString(2, message.getName(), false); if(message.hasChild()) output.writeObject(3, message.getChild(), com.dremio.exec.proto.SchemaUserBitShared.NamePart.WRITE, false); } public boolean isInitialized(com.dremio.exec.proto.UserBitShared.NamePart message)
private static PathSegment getPathSegment(NamePart n) { PathSegment child = n.hasChild() ? getPathSegment(n.getChild()) : null; if (n.getType() == Type.ARRAY) { return new ArraySegment(child); } else { return new NameSegment(n.getName(), child); } }
/** * <code>optional string name = 2;</code> */ public Builder clearName() { bitField0_ = (bitField0_ & ~0x00000002); name_ = getDefaultInstance().getName(); onChanged(); return this; } /**
private static Map<String, MajorType> extractSchema(final RecordBatchDef batchDef) { final List<SerializedField> serializedFields = batchDef.getFieldList(); final Map<String, MajorType> fieldsMap = Maps.newHashMap(); for (SerializedField field : serializedFields) { fieldsMap.put(field.getNamePart().getName(), field.getMajorType()); } return fieldsMap; } }
@Override boolean materialize(final NamedExpression ne, final VectorContainer batch, ClassProducer producer) throws SchemaChangeException { final Field outputField = MajorTypeHelper.getFieldForNameAndMajorType(ne.getRef().getAsNamePart().getName(), getMajorType()); batch.addOrGet(outputField).allocateNew(); fieldId = batch.getValueVectorId(ne.getRef()); return true; }
public void load(SerializedField metadata, ArrowBuf buffer) { Preconditions.checkArgument(vector.name.equals(metadata.getNamePart().getName()), "The field %s doesn't match the provided metadata %s.", vector.name, metadata); final int valueCount = metadata.getValueCount(); final int expectedLength = vector.getValidityBufferSizeFromCount(valueCount); final int actualLength = metadata.getBufferLength(); assert expectedLength == actualLength: "expected and actual buffer sizes do not match"; vector.clear(); vector.valueBuffer = buffer.slice(0, actualLength); vector.valueBuffer.writerIndex(actualLength); vector.valueBuffer.retain(); vector.valueCount = valueCount; }