org.apache.orc.TypeDescription java code examples

protected RecordReaderImpl(ReaderImpl fileReader,
              Reader.Options options) throws IOException {
 super(fileReader, options);
 batch = this.schema.createRowBatch();
 rowInBatch = 0;
}

switch (schema.getCategory()) {
  case BOOLEAN:
    return BasicTypeInfo.BOOLEAN_TYPE_INFO;
    return PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO;
  case STRUCT:
    List<TypeDescription> fieldSchemas = schema.getChildren();
    TypeInformation[] fieldTypes = new TypeInformation[fieldSchemas.size()];
    for (int i = 0; i < fieldSchemas.size(); i++) {
      fieldTypes[i] = schemaToTypeInfo(fieldSchemas.get(i));
    String[] fieldNames = schema.getFieldNames().toArray(new String[]{});
    return new RowTypeInfo(fieldTypes, fieldNames);
  case LIST:
    TypeDescription elementSchema = schema.getChildren().get(0);
    TypeInformation<?> elementType = schemaToTypeInfo(elementSchema);
    TypeDescription keySchema = schema.getChildren().get(0);
    TypeDescription valSchema = schema.getChildren().get(1);
    TypeInformation<?> keyType = schemaToTypeInfo(keySchema);
    TypeInformation<?> valType = schemaToTypeInfo(valSchema);

switch (pinfo.getPrimitiveCategory()) {
 case BOOLEAN:
  return TypeDescription.createBoolean();
 case BYTE:
  return TypeDescription.createByte();
 case SHORT:
  return TypeDescription.createShort();
 case INT:
  return TypeDescription.createInt();
 case LONG:
  return TypeDescription.createLong();
 case FLOAT:
  return TypeDescription.createFloat();
 case DOUBLE:
  return TypeDescription.createDouble();
 case STRING:
  return TypeDescription.createString();
 case DATE:
  return TypeDescription.createDate();
 case TIMESTAMP:
  return TypeDescription.createTimestamp();
 case BINARY:
  return TypeDescription.createBinary();
 case DECIMAL: {
  DecimalTypeInfo dinfo = (DecimalTypeInfo) pinfo;
  return TypeDescription.createDecimal()
    .withScale(dinfo.getScale())
    .withPrecision(dinfo.getPrecision());

public VectorizedRowBatch createRowBatch(boolean useDecimal64) {
 return useDecimal64 ? this.schema.createRowBatchV2() : this.schema.createRowBatch();
}

/**
 * Computes the ORC projection mask of the fields to include from the selected fields.rowOrcInputFormat.nextRecord(null).
 *
 * @return The ORC projection mask.
 */
private boolean[] computeProjectionMask() {
  // mask with all fields of the schema
  boolean[] projectionMask = new boolean[schema.getMaximumId() + 1];
  // for each selected field
  for (int inIdx : selectedFields) {
    // set all nested fields of a selected field to true
    TypeDescription fieldSchema = schema.getChildren().get(inIdx);
    for (int i = fieldSchema.getId(); i <= fieldSchema.getMaximumId(); i++) {
      projectionMask[i] = true;
    }
  }
  return projectionMask;
}

public StructColumnConverter(TypeDescription schema) {
  List<TypeDescription> kids = schema.getChildren();
  childrenConverters = new JsonConverter[kids.size()];
  for (int c = 0; c < childrenConverters.length; ++c) {
    childrenConverters[c] = createConverter(kids.get(c));
  }
  fieldNames = schema.getFieldNames();
}

public static TypeDescription convertTypeInfo(TypeDesc desc) {
 switch (desc.getDataType().getType()) {
  case BOOLEAN:
   return TypeDescription.createBoolean();
  case BIT:
   return TypeDescription.createByte();
  case INT2:
   return TypeDescription.createShort();
  case INT4:
  case INET4:
   return TypeDescription.createInt();
  case INT8:
   return TypeDescription.createLong();
  case FLOAT4:
   return TypeDescription.createFloat();
  case FLOAT8:
   return TypeDescription.createDouble();
  case TEXT:
   return TypeDescription.createString();
  case DATE:
   return TypeDescription.createDate();
  case TIMESTAMP:
   return TypeDescription.createTimestamp();
  case BLOB:
   return TypeDescription.createBinary();
  case CHAR:
   return TypeDescription.createChar()
     .withMaxLength(desc.getDataType().getLength());
  case RECORD: {
   TypeDescription result = TypeDescription.createStruct();

switch (type.typeId()) {
 case BOOLEAN:
  result = TypeDescription.createBoolean();
  break;
 case INTEGER:
  result = TypeDescription.createInt();
  break;
 case LONG:
  result = TypeDescription.createLong();
  break;
 case FLOAT:
  result = TypeDescription.createFloat();
  break;
 case DOUBLE:
  result = TypeDescription.createDouble();
  break;
 case DATE:
  result = TypeDescription.createDate();
  break;
 case TIME:
  result = TypeDescription.createInt();
  break;
 case TIMESTAMP:
  result = TypeDescription.createTimestamp();
  break;
 case STRING:
  result = TypeDescription.createString();
  break;
 case UUID:

private TypeDescription determineOrcType( OrcSpec.DataType dataType ) {
 switch ( dataType ) {
  case BOOLEAN:
   return TypeDescription.createBoolean();
  case TINYINT:
   return TypeDescription.createByte();
  case SMALLINT:
   return TypeDescription.createShort();
  case INTEGER:
   return TypeDescription.createInt();
  case BIGINT:
   return TypeDescription.createLong();
  case DATE:
   return TypeDescription.createDate();
  case BINARY:
   return TypeDescription.createBinary();
  case CHAR:
   return TypeDescription.createChar();
  case VARCHAR:
   return TypeDescription.createVarchar();
  case STRING:
   return TypeDescription.createString();
  case FLOAT:
   return TypeDescription.createFloat();
  case DOUBLE:
   return TypeDescription.createDouble();
  case DECIMAL:
   return TypeDescription.createDecimal();
  case TIMESTAMP:
   return TypeDescription.createTimestamp();

public static void convert(InputStream input, String outputOrc) throws IOException {
  TypeDescription schema = createStruct()
      .addField("id", createLong())
      .addField("type", createString())
      .addField("tags", createMap(
          createString(),
          createString()
      ))
      .addField("lat", createDecimal().withScale(7).withPrecision(9))
      .addField("lon", createDecimal().withScale(7).withPrecision(10))
      .addField("nds", createList(
          createStruct()
              .addField("ref", createLong())
      ))
      .addField("members", createList(
          createStruct()
              .addField("type", createString())
              .addField("ref", createLong())
              .addField("role", createString())
      ))
      .addField("changeset", createLong())
      .addField("timestamp", createTimestamp())
      .addField("uid", createLong())
      .addField("user", createString())
      .addField("version", createLong())
      .addField("visible", createBoolean());
  VectorizedRowBatch batch = schema.createRowBatch();

  private static TypeDescription convertType(final DataType fieldType) {
    if (fieldType instanceof BooleanType) {
      return TypeDescription.createBoolean();
    } else if (fieldType instanceof ByteType) {
      return TypeDescription.createByte();
    } else if (fieldType instanceof ShortType) {
      return TypeDescription.createShort();
    } else if (fieldType instanceof IntType) {
      return TypeDescription.createInt();
    } else if (fieldType instanceof LongType) {
      return TypeDescription.createLong();
    } else if (fieldType instanceof FloatType) {
      return TypeDescription.createFloat();
    } else if (fieldType instanceof DoubleType) {
      return TypeDescription.createDouble();
    } else if (fieldType instanceof StringType || fieldType instanceof ByteArrayType) {
      return TypeDescription.createString();
    } else if (fieldType instanceof DateType) {
      return TypeDescription.createDate();
    } else if (fieldType instanceof TimestampType) {
      return TypeDescription.createTimestamp();
    } else if (fieldType instanceof DecimalType) {
      int precision = ((DecimalType) fieldType).precision();
      int scale = ((DecimalType) fieldType).scale();
      return TypeDescription.createDecimal().withPrecision(precision).withScale(scale);
    } else {
      throw new UnsupportedOperationException("Unsupported category: " + fieldType);
    }
  }
}

public static List<TypeDescription> setTypeBuilderFromSchema(
  OrcProto.Type.Builder type, TypeDescription schema) {
 List<TypeDescription> children = schema.getChildren();
 switch (schema.getCategory()) {
  case BOOLEAN:
   type.setKind(OrcProto.Type.Kind.BOOLEAN);
  case CHAR:
   type.setKind(OrcProto.Type.Kind.CHAR);
   type.setMaximumLength(schema.getMaxLength());
   break;
  case VARCHAR:
   type.setKind(OrcProto.Type.Kind.VARCHAR);
   type.setMaximumLength(schema.getMaxLength());
   break;
  case BINARY:
  case DECIMAL:
   type.setKind(OrcProto.Type.Kind.DECIMAL);
   type.setPrecision(schema.getPrecision());
   type.setScale(schema.getScale());
   break;
  case LIST:
   type.setKind(OrcProto.Type.Kind.LIST);
   type.addSubtypes(children.get(0).getId());
   break;
  case MAP:
   type.setKind(OrcProto.Type.Kind.MAP);
   for(TypeDescription t: children) {
    type.addSubtypes(t.getId());

private ColumnVector createColumn(TypeDescription type, int batchSize, final boolean useDecimal64ColumnVectors) {
 switch (type.getCategory()) {
  case BOOLEAN:
  case BYTE:
   return new TimestampColumnVector(batchSize);
  case DECIMAL:
   if (useDecimal64ColumnVectors && type.getPrecision() <= TypeDescription.MAX_DECIMAL64_PRECISION) {
    return new Decimal64ColumnVector(batchSize, type.getPrecision(), type.getScale());
   } else {
    return new DecimalColumnVector(batchSize, type.getPrecision(), type.getScale());
   List<TypeDescription> subtypeIdxs = type.getChildren();
   ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()];
   for (int i = 0; i < fieldVector.length; ++i) {
   List<TypeDescription> subtypeIdxs = type.getChildren();
   ColumnVector[] fieldVector = new ColumnVector[subtypeIdxs.size()];
   for (int i = 0; i < fieldVector.length; ++i) {
   return new ListColumnVector(batchSize, createColumn(type.getChildren().get(0), batchSize,
    useDecimal64ColumnVectors));
  case MAP:
   List<TypeDescription> subtypeIdxs = type.getChildren();
   return new MapColumnVector(batchSize, createColumn(subtypeIdxs.get(0), batchSize, useDecimal64ColumnVectors),
    createColumn(subtypeIdxs.get(1), batchSize, useDecimal64ColumnVectors));
  default:
   throw new IllegalArgumentException("LLAP does not support " + type.getCategory());

private void addField(TypeDescription typeDescription, String colName, ColumnType colType, ColumnDescription subColDesc) {
 switch (colType) {
  case STRING:
   typeDescription.addField(colName, TypeDescription.createString());
   break;
  case LONG:
   typeDescription.addField(colName, TypeDescription.createLong());
   break;
  case DOUBLE:
   typeDescription.addField(colName, TypeDescription.createDouble());
   break;
  case BOOLEAN:
   typeDescription.addField(colName, TypeDescription.createBoolean());
   break;
  case TIMESTAMP:
   typeDescription.addField(colName, TypeDescription.createTimestamp());
   break;
  case ARRAY:   // 이 ARRAY는 ORC의 LIST와는 다르다. STRUCT로 구현된다.
   TypeDescription structType = new TypeDescription(TypeDescription.Category.STRUCT);
   for (int i = 0; i < subColDesc.getArrColDesc().size(); i++) {
    ColumnDescription childColDesc = subColDesc.getArrColDesc().get(i);
    addField(structType, "c" + i, childColDesc.getType(), childColDesc);
   typeDescription.addField(colName, structType);
   break;
  case MAP:     // 이 MAP은 ORC의 MAP과는 다르다. 역시 STRUCT로 구현된다.
   structType = new TypeDescription(TypeDescription.Category.STRUCT);
   for (String key : subColDesc.getMapColDesc().keySet()) {
    ColumnDescription childColDesc = subColDesc.getMapColDesc().get(key);
    addField(structType, key, childColDesc.getType(), childColDesc);

int numFlattenedCols = schema.getMaximumId();
boolean[] results = new boolean[numFlattenedCols + 1];
if ("*".equals(selectedColumns)) {
  schema.getCategory() == TypeDescription.Category.STRUCT) {
 List<String> fieldNames = schema.getFieldNames();
 List<TypeDescription> fields = schema.getChildren();
 for (String column: selectedColumns.split((","))) {
  TypeDescription col = findColumn(column, fieldNames, fields);
  if (col != null) {
   for(int i=col.getId(); i <= col.getMaximumId(); ++i) {
    results[i] = true;

private static TypeDescription getSchema(Schema schema) {
  TypeDescription result = TypeDescription.createStruct();
  for (String col : schema.getColumnNames()) {
    ColumnDescription cd = schema.getDescription(col);
    TypeDescription current;
    switch (cd.kind) {
      case None:
      default:
        throw new RuntimeException("Unexpected data type " + cd.kind);
      case String:
      case Json:
        current = TypeDescription.createString();
        break;
      case Date:
        current = TypeDescription.createTimestamp();
        break;
      case Integer:
        current = TypeDescription.createInt();
        break;
      case Duration:
      case Double:
        current = TypeDescription.createDouble();
        break;
    }
    result.addField(col, current);
  }
  return result;
}

private static Class<?> getClassForType(TypeDescription schema) {
  switch (schema.getCategory()) {
    case BOOLEAN:
      return Boolean.class;
      return Row.class;
    case LIST:
      Class<?> childClass = getClassForType(schema.getChildren().get(0));
      return Array.newInstance(childClass, 0).getClass();
    case MAP:

  CompressionCodec codec, TreeReaderFactory.Context context, int[] columnMapping)
    throws IOException {
if (schema.getCategory() != Category.STRUCT) {
 throw new AssertionError("Schema is not a struct: " + schema);
List<TypeDescription> children = schema.getChildren();
int childCount = children.size(), includedCount = 0;
for (int childIx = 0; childIx < childCount; ++childIx) {
 int batchColIx = children.get(childIx).getId();
 if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) {
  if (LOG.isDebugEnabled()) {
   LOG.debug("Column at " + childIx + " " + children.get(childIx).getId()
     + ":" + children.get(childIx).toString() + " has no data");
 int batchColIx = children.get(schemaChildIx).getId();
 if (!batch.hasData(batchColIx) && !batch.hasVectors(batchColIx)) continue;
 childReaders[++inclChildIx] = createEncodedTreeReader(
   schema.getChildren().get(schemaChildIx), encodings, batch, codec, context);
 columnMapping[inclChildIx] = schemaChildIx;

private static TypeDescription getTypeDescriptionFromTableProperties(Properties tableProperties) {
 TypeDescription schema = null;
 if (tableProperties != null) {
  final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS);
  final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES);
  if (!Strings.isNullOrEmpty(columnNameProperty) && !Strings.isNullOrEmpty(columnTypeProperty)) {
   List<String> columnNames =
    columnNameProperty.length() == 0 ? new ArrayList<String>() : Arrays.asList(columnNameProperty.split(","));
   List<TypeInfo> columnTypes = columnTypeProperty.length() == 0 ? new ArrayList<TypeInfo>() : TypeInfoUtils
    .getTypeInfosFromTypeString(columnTypeProperty);
   schema = TypeDescription.createStruct();
   for (int i = 0; i < columnNames.size(); i++) {
    schema.addField(columnNames.get(i), OrcInputFormat.convertTypeInfo(columnTypes.get(i)));
   }
  }
 }
 if (LOG.isDebugEnabled()) {
  LOG.debug("ORC schema = " + schema);
 }
 return schema;
}

public static TypeDescription[] genIncludedTypes(TypeDescription fileSchema,
  List<Integer> included, Integer recursiveStruct) {
 TypeDescription[] result = new TypeDescription[included.size()];
 List<TypeDescription> children = fileSchema.getChildren();
 for (int columnNumber = 0; columnNumber < children.size(); ++columnNumber) {
  int indexInBatchCols = included.indexOf(columnNumber);
  if (indexInBatchCols >= 0) {
   result[indexInBatchCols] = children.get(columnNumber);
  } else if (recursiveStruct != null && recursiveStruct == columnNumber) {
   // This assumes all struct cols immediately follow struct
   List<TypeDescription> nestedChildren = children.get(columnNumber).getChildren();
   for (int columnNumberDelta = 0; columnNumberDelta < nestedChildren.size(); ++columnNumberDelta) {
    int columnNumberNested = columnNumber + 1 + columnNumberDelta;
    int nestedIxInBatchCols = included.indexOf(columnNumberNested);
    if (nestedIxInBatchCols >= 0) {
     result[nestedIxInBatchCols] = nestedChildren.get(columnNumberDelta);
    }
   }
  }
 }
 return result;
}

Javadoc

This is the description of the types in an ORC file.

Most used methods

createRowBatch
getChildren
Get the subtypes of this type.
getCategory
Get the kind of this type.
getFieldNames
For struct types, get the list of field names.
addField
Add a field to a struct type as it is built.
createString
createTimestamp
createBoolean
createDouble
createLong
createStruct
getMaximumId
Get the maximum id assigned to this type or its children. The first call will cause all of the the i

Popular in Java

Creating JSON documents from java classes using gson
onRequestPermissionsResult (Fragment)
requestLocationUpdates (LocationManager)
findViewById (Activity)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
Date (java.sql)
A class which can consume and produce dates in SQL Date format. Dates are represented in SQL as yyyy
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
CodeWhisperer alternatives

How to useTypeDescription in org.apache.orc

Best Java code snippets using org.apache.orc.TypeDescription (Showing top 20 results out of 315)

How to use
TypeDescription
in
org.apache.orc