private PrimitiveType getElementType(Type type) { if (type.isPrimitive()) { return type.asPrimitiveType(); } if (type.asGroupType().getFields().size() > 1) { throw new RuntimeException( "Current Parquet Vectorization reader doesn't support nested type"); } return type.asGroupType().getFields().get(0).asGroupType().getFields().get(0) .asPrimitiveType(); }
/** * check if a parquet type is a valid 'list' type */ private static boolean isLogicalListType(Type listType) { return !listType.isPrimitive() && listType.getOriginalType() != null && listType.getOriginalType().equals(OriginalType.LIST) && listType.asGroupType().getFieldCount() == 1 && listType.asGroupType().getFields().get(0).isRepetition(Type.Repetition.REPEATED); }
private static boolean isElementType(Type repeatedType, String parentName) { if (repeatedType.isPrimitive() || (repeatedType.asGroupType().getFieldCount() != 1)) { return true; } else if (repeatedType.getName().equals("array")) { return true; // existing avro data } else if (repeatedType.getName().equals(parentName + "_tuple")) { return true; // existing thrift data } // false for the following cases: // * name is "list", which matches the spec // * name is "bag", which indicates existing hive or pig data // * ambiguous case, which should be assumed is 3-level according to spec return false; } }
} else { if (type instanceof GroupType) { GroupType groupType = type.asGroupType(); List<Type> ts = projectLeafTypes(groupType.getFields(), f.getNodes()); GroupType g = buildProjectedGroupType(groupType, ts);
protected static Converter getConverterFromDescription(Type type, int index, ConverterParent parent, TypeInfo hiveTypeInfo) { if (type == null) { return null; } if (type.isPrimitive()) { return getConverterFromDescription(type.asPrimitiveType(), index, parent, hiveTypeInfo); } return getConverterFromDescription(type.asGroupType(), index, parent, hiveTypeInfo); }
/** * check if a parquet type is a valid 'map' type */ private static boolean isLogicalMapType(Type groupType) { OriginalType ot = groupType.getOriginalType(); if (groupType.isPrimitive() || ot == null || groupType.isRepetition(Type.Repetition.REPEATED)) { return false; } if (groupType.getOriginalType().equals(OriginalType.MAP) || groupType.getOriginalType().equals(OriginalType.MAP_KEY_VALUE)) { GroupType myMapType = groupType.asGroupType(); if (myMapType.getFieldCount() != 1 || myMapType.getFields().get(0).isPrimitive()) { return false; } GroupType mapItemType = myMapType.getFields().get(0).asGroupType(); return mapItemType.isRepetition(Type.Repetition.REPEATED) && mapItemType.getFieldCount() == 2 && mapItemType.getFields().get(0).getName().equalsIgnoreCase("key") && mapItemType.getFields().get(0).isPrimitive() && mapItemType.getFields().get(1).getName().equalsIgnoreCase("value"); } return false; }
public ListDataWriter(ListObjectInspector inspector, GroupType groupType) { this.inspector = inspector; // Get the internal array structure GroupType repeatedType = groupType.getType(0).asGroupType(); this.repeatedGroupName = repeatedType.getName(); Type elementType = repeatedType.getType(0); this.elementName = elementType.getName(); ObjectInspector elementInspector = this.inspector.getListElementObjectInspector(); this.elementWriter = createWriter(elementInspector, elementType); }
private boolean isSubType( final GroupType groupType, final Type subtype) { if (subtype.isPrimitive() || subtype.isRepetition(Type.Repetition.REPEATED)) { return groupType.getFields().contains(subtype); } else { for (Type g : groupType.getFields()) { if (!g.isPrimitive() && g.getName().equals(subtype.getName())) { // check all elements are contained in g boolean containsAll = false; for (Type subSubType : subtype.asGroupType().getFields()) { containsAll = isSubType(g.asGroupType(), subSubType); if (!containsAll) { break; } } if (containsAll) { return containsAll; } } } return false; } }
List<VectorizedColumnReader> fieldReaders = new ArrayList<>(); List<TypeInfo> fieldTypes = structTypeInfo.getAllStructFieldTypeInfos(); List<Type> types = type.asGroupType().getFields(); for (int i = 0; i < fieldTypes.size(); i++) { VectorizedColumnReader r = GroupType groupType = type.asGroupType(); "Failed to get the field types for Map with type " + type); groupType = groupType.getFields().get(0).asGroupType(); nestGroup++;
private Converter getFieldConverter(Type type, int fieldIndex, TypeInfo hiveTypeInfo) { Converter converter; if (type.isRepetition(Type.Repetition.REPEATED)) { if (type.isPrimitive()) { converter = new Repeated.RepeatedPrimitiveConverter( type.asPrimitiveType(), this, fieldIndex, hiveTypeInfo); } else { converter = new Repeated.RepeatedGroupConverter( type.asGroupType(), this, fieldIndex, hiveTypeInfo == null ? null : ((ListTypeInfo) hiveTypeInfo) .getListElementTypeInfo()); } repeatedConverters.add((Repeated) converter); } else { converter = getConverterFromDescription(type, fieldIndex, this, hiveTypeInfo); } return converter; }
private HiveCollectionConverter(GroupType collectionType, ConverterParent parent, int index, boolean isMap, TypeInfo hiveTypeInfo) { setMetadata(parent.getMetadata()); this.collectionType = collectionType; this.parent = parent; this.index = index; Type repeatedType = collectionType.getType(0); if (isMap) { this.innerConverter = new KeyValueConverter( repeatedType.asGroupType(), this, hiveTypeInfo); } else if (isElementType(repeatedType, collectionType.getName())) { this.innerConverter = getConverterFromDescription(repeatedType, 0, this, extractListCompatibleType(hiveTypeInfo)); } else { this.innerConverter = new ElementConverter( repeatedType.asGroupType(), this, extractListCompatibleType(hiveTypeInfo)); } }
case STRUCT: List<Type> groupFields = getProjectedGroupFields( fieldType.asGroupType(), ((StructTypeInfo) colType).getAllStructFieldNames(), ((StructTypeInfo) colType).getAllStructFieldTypeInfos() TypeInfo elemType = ((ListTypeInfo) colType).getListElementTypeInfo(); if (elemType.getCategory() == ObjectInspector.Category.STRUCT) { Type subFieldType = fieldType.asGroupType().getType(0); if (!subFieldType.isPrimitive()) { String subFieldName = subFieldType.getName(); if (name.equals(ParquetHiveSerDe.ARRAY) || name.equals(ParquetHiveSerDe.LIST)) { subFieldType = new GroupType(Repetition.REPEATED, subFieldName, getProjectedType(elemType, subFieldType.asGroupType().getType(0))); } else { subFieldType = getProjectedType(elemType, subFieldType);
public MapDataWriter(MapObjectInspector inspector, GroupType groupType) { this.inspector = inspector; // Get the internal map structure (MAP_KEY_VALUE) GroupType repeatedType = groupType.getType(0).asGroupType(); this.repeatedGroupName = repeatedType.getName(); // Get key element information Type keyType = repeatedType.getType(0); ObjectInspector keyInspector = this.inspector.getMapKeyObjectInspector(); this.keyName = keyType.getName(); this.keyWriter = createWriter(keyInspector, keyType); // Get value element information Type valuetype = repeatedType.getType(1); ObjectInspector valueInspector = this.inspector.getMapValueObjectInspector(); this.valueName = valuetype.getName(); this.valueWriter = createWriter(valueInspector, valuetype); }
@Test public void testMapOriginalType() throws Exception { final String hiveColumnTypes = "map<string,string>"; final String hiveColumnNames = "mapCol"; final List<String> columnNames = createHiveColumnsFrom(hiveColumnNames); final List<TypeInfo> columnTypes = createHiveTypeInfoFrom(hiveColumnTypes); final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes); // this messageType only has one optional field, whose name is mapCol, original Type is MAP assertEquals(1, messageTypeFound.getFieldCount()); org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0); assertEquals("mapCol",topLevel.getName()); assertEquals(OriginalType.MAP, topLevel.getOriginalType()); assertEquals(Repetition.OPTIONAL, topLevel.getRepetition()); assertEquals(1, topLevel.asGroupType().getFieldCount()); org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0); //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE; assertEquals("map", secondLevel.getName()); assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType()); assertEquals(Repetition.REPEATED, secondLevel.getRepetition()); } }
GroupType groupType = type.asGroupType(); OriginalType originalType = type.getOriginalType();
/** * {@inheritDoc} */ public void startGroup() { previousField.push(-1); types.push(types.peek().asGroupType().getType(fields.peek())); delegate.startGroup(); }
/** * Converts a Parquet {@link Type} to a Flink {@link InternalType}. */ private InternalType convertType(Type parquetType) { if (parquetType.isPrimitive()) { return convertPrimitiveType(parquetType.asPrimitiveType()); } else { return convertGroupType(parquetType.asGroupType()); } }
@Override public void visit(GroupType groupType) { if (currentRequestedType.isPrimitive()) { incompatibleSchema(groupType, currentRequestedType); } GroupColumnIO newIO = new GroupColumnIO(groupType, current, currentRequestedIndex); current.add(newIO); visitChildren(newIO, groupType, currentRequestedType.asGroupType()); }
@Override public ParquetValueReader<?> list(Types.ListType expectedList, GroupType array, ParquetValueReader<?> elementReader) { GroupType repeated = array.getFields().get(0).asGroupType(); String[] repeatedPath = currentPath(); int repeatedD = type.getMaxDefinitionLevel(repeatedPath) - 1; int repeatedR = type.getMaxRepetitionLevel(repeatedPath) - 1; Type elementType = repeated.getType(0); int elementD = type.getMaxDefinitionLevel(path(elementType.getName())) - 1; return new ArrayReader<>(repeatedD, repeatedR, option(elementType, elementD, elementReader)); }
@Override protected Type union(Type toMerge, boolean strict) { if (toMerge.isPrimitive()) { throw new IncompatibleSchemaModificationException("can not merge primitive type " + toMerge + " into group type " + this); } return new GroupType(toMerge.getRepetition(), getName(), toMerge.getOriginalType(), mergeFields(toMerge.asGroupType()), getId()); }