protected RecordReaderImpl(ReaderImpl fileReader, Reader.Options options) throws IOException { super(fileReader, options); batch = this.schema.createRowBatch(); rowInBatch = 0; }
switch (schema.getCategory()) { case BOOLEAN: return BasicTypeInfo.BOOLEAN_TYPE_INFO; return PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO; case STRUCT: List<TypeDescription> fieldSchemas = schema.getChildren(); TypeInformation[] fieldTypes = new TypeInformation[fieldSchemas.size()]; for (int i = 0; i < fieldSchemas.size(); i++) { fieldTypes[i] = schemaToTypeInfo(fieldSchemas.get(i)); String[] fieldNames = schema.getFieldNames().toArray(new String[]{}); return new RowTypeInfo(fieldTypes, fieldNames); case LIST: TypeDescription elementSchema = schema.getChildren().get(0); TypeInformation<?> elementType = schemaToTypeInfo(elementSchema); TypeDescription keySchema = schema.getChildren().get(0); TypeDescription valSchema = schema.getChildren().get(1); TypeInformation<?> keyType = schemaToTypeInfo(keySchema); TypeInformation<?> valType = schemaToTypeInfo(valSchema);
switch (pinfo.getPrimitiveCategory()) { case BOOLEAN: return TypeDescription.createBoolean(); case BYTE: return TypeDescription.createByte(); case SHORT: return TypeDescription.createShort(); case INT: return TypeDescription.createInt(); case LONG: return TypeDescription.createLong(); case FLOAT: return TypeDescription.createFloat(); case DOUBLE: return TypeDescription.createDouble(); case STRING: return TypeDescription.createString(); case DATE: return TypeDescription.createDate(); case TIMESTAMP: return TypeDescription.createTimestamp(); case BINARY: return TypeDescription.createBinary(); case DECIMAL: { DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo; return TypeDescription.createDecimal() .withScale(dinfo.getScale()) .withPrecision(dinfo.getPrecision());
public VectorizedRowBatch createRowBatch(boolean useDecimal64) { return useDecimal64 ? this.schema.createRowBatchV2() : this.schema.createRowBatch(); }
/** * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null). * * @return The ORC projection mask. */ private boolean[] computeProjectionMask() { // mask with all fields of the schema boolean[] projectionMask = new boolean[schema.getMaximumId() + 1]; // for each selected field for (int inIdx : selectedFields) { // set all nested fields of a selected field to true TypeDescription fieldSchema = schema.getChildren().get(inIdx); for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) { projectionMask[i] = true; } } return projectionMask; }
public StructColumnConverter(TypeDescription schema) { List<TypeDescription> kids = schema.getChildren(); childrenConverters = new JsonConverter[kids.size()]; for (int c = 0; c < childrenConverters.length; ++c) { childrenConverters[c] = createConverter(kids.get(c)); } fieldNames = schema.getFieldNames(); }
public static TypeDescription convertTypeInfo(TypeDesc desc) { switch (desc.getDataType().getType()) { case BOOLEAN: return TypeDescription.createBoolean(); case BIT: return TypeDescription.createByte(); case INT2: return TypeDescription.createShort(); case INT4: case INET4: return TypeDescription.createInt(); case INT8: return TypeDescription.createLong(); case FLOAT4: return TypeDescription.createFloat(); case FLOAT8: return TypeDescription.createDouble(); case TEXT: return TypeDescription.createString(); case DATE: return TypeDescription.createDate(); case TIMESTAMP: return TypeDescription.createTimestamp(); case BLOB: return TypeDescription.createBinary(); case CHAR: return TypeDescription.createChar() .withMaxLength(desc.getDataType().getLength()); case RECORD: { TypeDescription result = TypeDescription.createStruct();
switch (type.typeId()) { case BOOLEAN: result = TypeDescription.createBoolean(); break; case INTEGER: result = TypeDescription.createInt(); break; case LONG: result = TypeDescription.createLong(); break; case FLOAT: result = TypeDescription.createFloat(); break; case DOUBLE: result = TypeDescription.createDouble(); break; case DATE: result = TypeDescription.createDate(); break; case TIME: result = TypeDescription.createInt(); break; case TIMESTAMP: result = TypeDescription.createTimestamp(); break; case STRING: result = TypeDescription.createString(); break; case UUID:
private TypeDescription determineOrcType( OrcSpec.DataType dataType ) { switch ( dataType ) { case BOOLEAN: return TypeDescription.createBoolean(); case TINYINT: return TypeDescription.createByte(); case SMALLINT: return TypeDescription.createShort(); case INTEGER: return TypeDescription.createInt(); case BIGINT: return TypeDescription.createLong(); case DATE: return TypeDescription.createDate(); case BINARY: return TypeDescription.createBinary(); case CHAR: return TypeDescription.createChar(); case VARCHAR: return TypeDescription.createVarchar(); case STRING: return TypeDescription.createString(); case FLOAT: return TypeDescription.createFloat(); case DOUBLE: return TypeDescription.createDouble(); case DECIMAL: return TypeDescription.createDecimal(); case TIMESTAMP: return TypeDescription.createTimestamp();
public static void convert(InputStream input, String outputOrc) throws IOException { TypeDescription schema = createStruct() .addField("id", createLong()) .addField("type", createString()) .addField("tags", createMap( createString(), createString() )) .addField("lat", createDecimal().withScale(7).withPrecision(9)) .addField("lon", createDecimal().withScale(7).withPrecision(10)) .addField("nds", createList( createStruct() .addField("ref", createLong()) )) .addField("members", createList( createStruct() .addField("type", createString()) .addField("ref", createLong()) .addField("role", createString()) )) .addField("changeset", createLong()) .addField("timestamp", createTimestamp()) .addField("uid", createLong()) .addField("user", createString()) .addField("version", createLong()) .addField("visible", createBoolean()); VectorizedRowBatch batch = schema.createRowBatch();
private static TypeDescription convertType(final DataType fieldType) { if (fieldType instanceof BooleanType) { return TypeDescription.createBoolean(); } else if (fieldType instanceof ByteType) { return TypeDescription.createByte(); } else if (fieldType instanceof ShortType) { return TypeDescription.createShort(); } else if (fieldType instanceof IntType) { return TypeDescription.createInt(); } else if (fieldType instanceof LongType) { return TypeDescription.createLong(); } else if (fieldType instanceof FloatType) { return TypeDescription.createFloat(); } else if (fieldType instanceof DoubleType) { return TypeDescription.createDouble(); } else if (fieldType instanceof StringType || fieldType instanceof ByteArrayType) { return TypeDescription.createString(); } else if (fieldType instanceof DateType) { return TypeDescription.createDate(); } else if (fieldType instanceof TimestampType) { return TypeDescription.createTimestamp(); } else if (fieldType instanceof DecimalType) { int precision = ((DecimalType) fieldType).precision(); int scale = ((DecimalType) fieldType).scale(); return TypeDescription.createDecimal().withPrecision(precision).withScale(scale); } else { throw new UnsupportedOperationException("Unsupported category: " + fieldType); } } }
public static List<TypeDescription> setTypeBuilderFromSchema( OrcProto.Type.Builder type, TypeDescription schema) { List<TypeDescription> children = schema.getChildren(); switch (schema.getCategory()) { case BOOLEAN: type.setKind(OrcProto.Type.Kind.BOOLEAN); case CHAR: type.setKind(OrcProto.Type.Kind.CHAR); type.setMaximumLength(schema.getMaxLength()); break; case VARCHAR: type.setKind(OrcProto.Type.Kind.VARCHAR); type.setMaximumLength(schema.getMaxLength()); break; case BINARY: case DECIMAL: type.setKind(OrcProto.Type.Kind.DECIMAL); type.setPrecision(schema.getPrecision()); type.setScale(schema.getScale()); break; case LIST: type.setKind(OrcProto.Type.Kind.LIST); type.addSubtypes(children.get(0).getId()); break; case MAP: type.setKind(OrcProto.Type.Kind.MAP); for(TypeDescription t: children) { type.addSubtypes(t.getId());
private ColumnVector createColumn(TypeDescription type, int batchSize, final boolean useDecimal64ColumnVectors) { switch (type.getCategory()) { case BOOLEAN: case BYTE: return new TimestampColumnVector(batchSize); case DECIMAL: if (useDecimal64ColumnVectors && type.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) { return new Decimal64ColumnVector(batchSize, type.getPrecision(), type.getScale()); } else { return new DecimalColumnVector(batchSize, type.getPrecision(), type.getScale()); List<TypeDescription> subtypeIdxs = type.getChildren(); ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()]; for (int i = 0; i < fieldVector.length; ++i) { List<TypeDescription> subtypeIdxs = type.getChildren(); ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()]; for (int i = 0; i < fieldVector.length; ++i) { return new ListColumnVector(batchSize, createColumn(type.getChildren().get(0), batchSize, useDecimal64ColumnVectors)); case MAP: List<TypeDescription> subtypeIdxs = type.getChildren(); return new MapColumnVector(batchSize, createColumn(subtypeIdxs.get(0), batchSize, useDecimal64ColumnVectors), createColumn(subtypeIdxs.get(1), batchSize, useDecimal64ColumnVectors)); default: throw new IllegalArgumentException("LLAP does not support " + type.getCategory());
private void addField(TypeDescription typeDescription, String colName, ColumnType colType, ColumnDescription subColDesc) { switch (colType) { case STRING: typeDescription.addField(colName, TypeDescription.createString()); break; case LONG: typeDescription.addField(colName, TypeDescription.createLong()); break; case DOUBLE: typeDescription.addField(colName, TypeDescription.createDouble()); break; case BOOLEAN: typeDescription.addField(colName, TypeDescription.createBoolean()); break; case TIMESTAMP: typeDescription.addField(colName, TypeDescription.createTimestamp()); break; case ARRAY: // 이 ARRAY는 ORC의 LIST와는 다르다. STRUCT로 구현된다. TypeDescription structType = new TypeDescription(TypeDescription.Category.STRUCT); for (int i = 0; i < subColDesc.getArrColDesc().size(); i++) { ColumnDescription childColDesc = subColDesc.getArrColDesc().get(i); addField(structType, "c" + i, childColDesc.getType(), childColDesc); typeDescription.addField(colName, structType); break; case MAP: // 이 MAP은 ORC의 MAP과는 다르다. 역시 STRUCT로 구현된다. structType = new TypeDescription(TypeDescription.Category.STRUCT); for (String key : subColDesc.getMapColDesc().keySet()) { ColumnDescription childColDesc = subColDesc.getMapColDesc().get(key); addField(structType, key, childColDesc.getType(), childColDesc);
int numFlattenedCols = schema.getMaximumId(); boolean[] results = new boolean[numFlattenedCols + 1]; if ("*".equals(selectedColumns)) { schema.getCategory() == TypeDescription.Category.STRUCT) { List<String> fieldNames = schema.getFieldNames(); List<TypeDescription> fields = schema.getChildren(); for (String column: selectedColumns.split((","))) { TypeDescription col = findColumn(column, fieldNames, fields); if (col != null) { for(int i=col.getId(); i <= col.getMaximumId(); ++i) { results[i] = true;
private static TypeDescription getSchema(Schema schema) { TypeDescription result = TypeDescription.createStruct(); for (String col : schema.getColumnNames()) { ColumnDescription cd = schema.getDescription(col); TypeDescription current; switch (cd.kind) { case None: default: throw new RuntimeException("Unexpected data type " + cd.kind); case String: case Json: current = TypeDescription.createString(); break; case Date: current = TypeDescription.createTimestamp(); break; case Integer: current = TypeDescription.createInt(); break; case Duration: case Double: current = TypeDescription.createDouble(); break; } result.addField(col, current); } return result; }
private static Class<?> getClassForType(TypeDescription schema) { switch (schema.getCategory()) { case BOOLEAN: return Boolean.class; return Row.class; case LIST: Class<?> childClass = getClassForType(schema.getChildren().get(0)); return Array.newInstance(childClass, 0).getClass(); case MAP:
CompressionCodec codec, TreeReaderFactory.Context context, int[] columnMapping) throws IOException { if (schema.getCategory() != Category.STRUCT) { throw new AssertionError("Schema is not a struct: " + schema); List<TypeDescription> children = schema.getChildren(); int childCount = children.size(), includedCount = 0; for (int childIx = 0; childIx < childCount; ++childIx) { int batchColIx = children.get(childIx).getId(); if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) { if (LOG.isDebugEnabled()) { LOG.debug("Column at " + childIx + " " + children.get(childIx).getId() + ":" + children.get(childIx).toString() + " has no data"); int batchColIx = children.get(schemaChildIx).getId(); if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) continue; childReaders[++inclChildIx] = createEncodedTreeReader( schema.getChildren().get(schemaChildIx), encodings, batch, codec, context); columnMapping[inclChildIx] = schemaChildIx;
private static TypeDescription getTypeDescriptionFromTableProperties(Properties tableProperties) { TypeDescription schema = null; if (tableProperties != null) { final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES); if (!Strings.isNullOrEmpty(columnNameProperty) && !Strings.isNullOrEmpty(columnTypeProperty)) { List<String> columnNames = columnNameProperty.length() == 0 ? new ArrayList<String>() : Arrays.asList(columnNameProperty.split(",")); List<TypeInfo> columnTypes = columnTypeProperty.length() == 0 ? new ArrayList<TypeInfo>() : TypeInfoUtils .getTypeInfosFromTypeString(columnTypeProperty); schema = TypeDescription.createStruct(); for (int i = 0; i < columnNames.size(); i++) { schema.addField(columnNames.get(i), OrcInputFormat.convertTypeInfo(columnTypes.get(i))); } } } if (LOG.isDebugEnabled()) { LOG.debug("ORC schema = " + schema); } return schema; }
public static TypeDescription[] genIncludedTypes(TypeDescription fileSchema, List<Integer> included, Integer recursiveStruct) { TypeDescription[] result = new TypeDescription[included.size()]; List<TypeDescription> children = fileSchema.getChildren(); for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) { int indexInBatchCols = included.indexOf(columnNumber); if (indexInBatchCols >= 0) { result[indexInBatchCols] = children.get(columnNumber); } else if (recursiveStruct != null && recursiveStruct == columnNumber) { // This assumes all struct cols immediately follow struct List<TypeDescription> nestedChildren = children.get(columnNumber).getChildren(); for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) { int columnNumberNested = columnNumber + 1 + columnNumberDelta; int nestedIxInBatchCols = included.indexOf(columnNumberNested); if (nestedIxInBatchCols >= 0) { result[nestedIxInBatchCols] = nestedChildren.get(columnNumberDelta); } } } } return result; }