/** * Check the underlying Parquet file is able to parse as Hive Decimal type. * * @param type */ protected void decimalTypeCheck(Type type) { DecimalMetadata decimalMetadata = type.asPrimitiveType().getDecimalMetadata(); if (decimalMetadata == null) { throw new UnsupportedOperationException("The underlying Parquet type cannot be able to " + "converted to Hive Decimal type: " + type); } }
private PrimitiveType getElementType(Type type) { if (type.isPrimitive()) { return type.asPrimitiveType(); } if (type.asGroupType().getFields().size() > 1) { throw new RuntimeException( "Current Parquet Vectorization reader doesn't support nested type"); } return type.asGroupType().getFields().get(0).asGroupType().getFields().get(0) .asPrimitiveType(); }
protected static Converter getConverterFromDescription(Type type, int index, ConverterParent parent, TypeInfo hiveTypeInfo) { if (type == null) { return null; } if (type.isPrimitive()) { return getConverterFromDescription(type.asPrimitiveType(), index, parent, hiveTypeInfo); } return getConverterFromDescription(type.asGroupType(), index, parent, hiveTypeInfo); }
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) throws IOException { this.pageValueCount = valueCount; this.endOfPageValueCount = valuesRead + pageValueCount; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { throw new IOException( "could not read page in col " + descriptor + " as the dictionary was missing for encoding " + dataEncoding); } dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType, dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary .getDictionary()), skipTimestampConversion); this.isCurrentPageDictionaryEncoded = true; } else { dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType, dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion); this.isCurrentPageDictionaryEncoded = false; } try { dataColumn.initFromPage(pageValueCount, in); } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } }
/** * The decimal precision and scale is filled into decimalColumnVector. If the data in * Parquet is in decimal, the precision and scale will come in from decimalMetadata. If parquet * is not in decimal, then this call is made because HMS shows the type as decimal. So, the * precision and scale are picked from hiveType. * * @param decimalMetadata * @param decimalColumnVector */ private void fillDecimalPrecisionScale(DecimalMetadata decimalMetadata, DecimalColumnVector decimalColumnVector) { if (decimalMetadata != null) { decimalColumnVector.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision(); decimalColumnVector.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale(); } else if (TypeInfoUtils.getBaseName(hiveType.getTypeName()) .equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) { decimalColumnVector.precision = (short) ((DecimalTypeInfo) hiveType).getPrecision(); decimalColumnVector.scale = (short) ((DecimalTypeInfo) hiveType).getScale(); } else { throw new UnsupportedOperationException( "The underlying Parquet type cannot be converted to Hive Decimal type: " + type); } } }
case DECIMAL: decimalTypeCheck(type); int precision = type.asPrimitiveType().getDecimalMetadata().getPrecision(); int scale = type.asPrimitiveType().getDecimalMetadata().getScale(); lcv.child = new DecimalColumnVector(total, precision, scale); for (int i = 0; i < valueList.size(); i++) {
public BaseVectorizedColumnReader( ColumnDescriptor descriptor, PageReader pageReader, boolean skipTimestampConversion, Type parquetType, TypeInfo hiveType) throws IOException { this.descriptor = descriptor; this.type = parquetType; this.pageReader = pageReader; this.maxDefLevel = descriptor.getMaxDefinitionLevel(); this.skipTimestampConversion = skipTimestampConversion; this.hiveType = hiveType; DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); if (dictionaryPage != null) { try { this.dictionary = ParquetDataColumnReaderFactory .getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType, dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage), skipTimestampConversion); this.isCurrentPageDictionaryEncoded = true; } catch (IOException e) { throw new IOException("could not decode the dictionary for " + descriptor, e); } } else { this.dictionary = null; this.isCurrentPageDictionaryEncoded = false; } }
private Converter getFieldConverter(Type type, int fieldIndex, TypeInfo hiveTypeInfo) { Converter converter; if (type.isRepetition(Type.Repetition.REPEATED)) { if (type.isPrimitive()) { converter = new Repeated.RepeatedPrimitiveConverter( type.asPrimitiveType(), this, fieldIndex, hiveTypeInfo); } else { converter = new Repeated.RepeatedGroupConverter( type.asGroupType(), this, fieldIndex, hiveTypeInfo == null ? null : ((ListTypeInfo) hiveTypeInfo) .getListElementTypeInfo()); } repeatedConverters.add((Repeated) converter); } else { converter = getConverterFromDescription(type, fieldIndex, this, hiveTypeInfo); } return converter; }
switch (type){ case LONG: if (parquetType.asPrimitiveType().getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32) { return new IntFilterPredicateLeafBuilder(); if (parquetType.asPrimitiveType().getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.FLOAT) { return new FloatFilterPredicateLeafBuilder();
private void readDecimal( int total, DecimalColumnVector c, int rowId) throws IOException { DecimalMetadata decimalMetadata = type.asPrimitiveType().getDecimalMetadata(); byte[] decimalData = null; fillDecimalPrecisionScale(decimalMetadata, c); int left = total; while (left > 0) { readRepetitionAndDefinitionLevels(); if (definitionLevel >= maxDefLevel) { decimalData = dataColumn.readDecimal(); if (dataColumn.isValid()) { c.vector[rowId].set(decimalData, c.scale); c.isNull[rowId] = false; c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); } else { setNullValue(c, rowId); } } else { setNullValue(c, rowId); } rowId++; left--; } }
DecimalMetadata decimalMetadata = type.asPrimitiveType().getDecimalMetadata(); DecimalColumnVector decimalColumnVector = ((DecimalColumnVector) column); byte[] decimalData = null;
public static Optional<RichColumnDescriptor> getDescriptor(List<PrimitiveColumnIO> columns, List<String> path) { checkArgument(path.size() >= 1, "Parquet nested path should have at least one component"); int index = getPathIndex(columns, path); if (index == -1) { return Optional.empty(); } PrimitiveColumnIO columnIO = columns.get(index); return Optional.of(new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType())); }
public static Optional<RichColumnDescriptor> getDescriptor(List<PrimitiveColumnIO> columns, List<String> path) { checkArgument(path.size() >= 1, "Parquet nested path should have at least one component"); int index = getPathIndex(columns, path); if (index == -1) { return Optional.empty(); } PrimitiveColumnIO columnIO = columns.get(index); return Optional.of(new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType())); }
@Override protected void validateType(Type type) { Preconditions.checkArgument(type.isPrimitive()); validatePrimitiveType(type.asPrimitiveType()); }
@Override public void visit(PrimitiveType primitiveType) { if (!currentRequestedType.isPrimitive() || (this.strictTypeChecking && currentRequestedType.asPrimitiveType().getPrimitiveTypeName() != primitiveType.getPrimitiveTypeName())) { incompatibleSchema(primitiveType, currentRequestedType); } PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, currentRequestedIndex, leaves.size()); current.add(newIO); leaves.add(newIO); }
@Override public void visit(PrimitiveType primitiveType) { if (!currentRequestedType.isPrimitive() || (this.strictTypeChecking && currentRequestedType.asPrimitiveType().getPrimitiveTypeName() != primitiveType.getPrimitiveTypeName())) { incompatibleSchema(primitiveType, currentRequestedType); } PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, currentRequestedIndex, leaves.size()); current.add(newIO); leaves.add(newIO); }
/** * Converts a Parquet {@link Type} to a Flink {@link InternalType}. */ private InternalType convertType(Type parquetType) { if (parquetType.isPrimitive()) { return convertPrimitiveType(parquetType.asPrimitiveType()); } else { return convertGroupType(parquetType.asGroupType()); } }
public ColumnDescriptor getColumnDescription(String[] path) { int maxRep = getMaxRepetitionLevel(path); int maxDef = getMaxDefinitionLevel(path); PrimitiveType type = getType(path).asPrimitiveType(); return new ColumnDescriptor(path, type, maxRep, maxDef); }
@Override void checkContains(Type subType) { super.checkContains(subType); if (!subType.isPrimitive()) { throw new InvalidRecordException(subType + " found: expected " + this); } PrimitiveType primitiveType = subType.asPrimitiveType(); if (this.primitive != primitiveType.primitive) { throw new InvalidRecordException(subType + " found: expected " + this); } }
public ColumnDescriptor getColumnDescription(String[] path) { int maxRep = getMaxRepetitionLevel(path); int maxDef = getMaxDefinitionLevel(path); PrimitiveType type = getType(path).asPrimitiveType(); return new ColumnDescriptor(path, type, maxRep, maxDef); }