public final int appendByteArray(byte[] value, int offset, int length) { int copiedOffset = arrayData().appendBytes(length, value, offset); reserve(elementsAppended + 1); putArray(elementsAppended, copiedOffset, length); return elementsAppended++; }
/** * Called to close all the columns in this batch. It is not valid to access the data after * calling this. This must be called at the end to clean up memory allocations. */ public void close() { for (ColumnVector c: columns) { c.close(); } }
@Override public Object[] array() { DataType dt = data.dataType(); Object[] list = new Object[length]; if (!data.isNullAt(offset + i)) { list[i] = data.getBoolean(offset + i); if (!data.isNullAt(offset + i)) { list[i] = data.getByte(offset + i); if (!data.isNullAt(offset + i)) { list[i] = data.getShort(offset + i); if (!data.isNullAt(offset + i)) { list[i] = data.getInt(offset + i); if (!data.isNullAt(offset + i)) { list[i] = data.getFloat(offset + i); if (!data.isNullAt(offset + i)) { list[i] = data.getDouble(offset + i); if (!data.isNullAt(offset + i)) { list[i] = data.getLong(offset + i); DecimalType decType = (DecimalType)dt; for (int i = 0; i < length; i++) { if (!data.isNullAt(offset + i)) { list[i] = getDecimal(i, decType.precision(), decType.scale());
/** * Returns the array at rowid. */ public final Array getArray(int rowId) { resultArray.length = getArrayLength(rowId); resultArray.offset = getArrayOffset(rowId); return resultArray; }
DataType t = col.dataType(); col.putNulls(0, capacity); } else { if (t == DataTypes.BooleanType) { col.putBooleans(0, capacity, row.getBoolean(fieldIdx)); } else if (t == DataTypes.ByteType) { col.putBytes(0, capacity, row.getByte(fieldIdx)); } else if (t == DataTypes.ShortType) { col.putShorts(0, capacity, row.getShort(fieldIdx)); } else if (t == DataTypes.IntegerType) { col.putInts(0, capacity, row.getInt(fieldIdx)); } else if (t == DataTypes.LongType) { col.putLongs(0, capacity, row.getLong(fieldIdx)); } else if (t == DataTypes.FloatType) { col.putFloats(0, capacity, row.getFloat(fieldIdx)); } else if (t == DataTypes.DoubleType) { col.putDoubles(0, capacity, row.getDouble(fieldIdx)); } else if (t == DataTypes.StringType) { UTF8String v = row.getUTF8String(fieldIdx); byte[] bytes = v.getBytes(); for (int i = 0; i < capacity; i++) { col.putByteArray(i, bytes); Decimal d = row.getDecimal(fieldIdx, dt.precision(), dt.scale()); if (dt.precision() <= Decimal.MAX_INT_DIGITS()) { col.putInts(0, capacity, (int)d.toUnscaledLong()); } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { col.putLongs(0, capacity, d.toUnscaledLong());
switch (descriptor.getType()) { case INT32: if (column.dataType() == DataTypes.IntegerType || DecimalType.is32BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { if (!column.isNullAt(i)) { column.putInt(i, dictionary.decodeToInt(dictionaryIds.getDictId(i))); } else if (column.dataType() == DataTypes.ByteType) { for (int i = rowId; i < rowId + num; ++i) { if (!column.isNullAt(i)) { column.putByte(i, (byte) dictionary.decodeToInt(dictionaryIds.getDictId(i))); } else if (column.dataType() == DataTypes.ShortType) { for (int i = rowId; i < rowId + num; ++i) { if (!column.isNullAt(i)) { column.putShort(i, (short) dictionary.decodeToInt(dictionaryIds.getDictId(i))); throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); if (column.dataType() == DataTypes.LongType || DecimalType.is64BitDecimalType(column.dataType())) { for (int i = rowId; i < rowId + num; ++i) { if (!column.isNullAt(i)) { column.putLong(i, dictionary.decodeToLong(dictionaryIds.getDictId(i))); } else if (column.dataType() == DataTypes.TimestampType) { for (int i = rowId; i < rowId + num; ++i) { if (!column.isNullAt(i)) {
private static void appendValue(ColumnVector dst, DataType t, Object o) { if (o == null) { if (t instanceof CalendarIntervalType) { dst.appendStruct(true); } else { dst.appendNull(); dst.appendBoolean(((Boolean)o).booleanValue()); } else if (t == DataTypes.ByteType) { dst.appendByte(((Byte) o).byteValue()); } else if (t == DataTypes.ShortType) { dst.appendShort(((Short)o).shortValue()); } else if (t == DataTypes.IntegerType) { dst.appendInt(((Integer)o).intValue()); } else if (t == DataTypes.LongType) { dst.appendLong(((Long)o).longValue()); } else if (t == DataTypes.FloatType) { dst.appendFloat(((Float)o).floatValue()); } else if (t == DataTypes.DoubleType) { dst.appendDouble(((Double)o).doubleValue()); } else if (t == DataTypes.StringType) { byte[] b =((String)o).getBytes(StandardCharsets.UTF_8); dst.appendByteArray(b, 0, b.length); } else if (t instanceof DecimalType) { DecimalType dt = (DecimalType) t; Decimal d = Decimal.apply((BigDecimal) o, dt.precision(), dt.scale()); if (dt.precision() <= Decimal.MAX_INT_DIGITS()) { dst.appendInt((int) d.toUnscaledLong()); } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) { dst.appendLong(d.toUnscaledLong());
if (DecimalType.is32BitDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putInt(rowId + i, (int) ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen))); } else { column.putNull(rowId + i); } else if (DecimalType.is64BitDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putLong(rowId + i, ParquetRowConverter.binaryToUnscaledLong(data.readBinary(arrayLen))); } else { column.putNull(rowId + i); } else if (DecimalType.isByteArrayDecimalType(column.dataType())) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putByteArray(rowId + i, data.readBinary(arrayLen).getBytes()); } else { column.putNull(rowId + i); throw new UnsupportedOperationException("Unimplemented type: " + column.dataType());
private static void appendValue(ColumnVector dst, DataType t, Row src, int fieldIdx) { if (t instanceof ArrayType) { ArrayType at = (ArrayType)t; if (src.isNullAt(fieldIdx)) { dst.appendNull(); } else { List<Object> values = src.getList(fieldIdx); dst.appendArray(values.size()); for (Object o : values) { appendValue(dst.arrayData(), at.elementType(), o); } } } else if (t instanceof StructType) { StructType st = (StructType)t; if (src.isNullAt(fieldIdx)) { dst.appendStruct(true); } else { dst.appendStruct(false); Row c = src.getStruct(fieldIdx); for (int i = 0; i < st.fields().length; i++) { appendValue(dst.getChildColumn(i), st.fields()[i].dataType(), c, i); } } } else { appendValue(dst, t, src.get(fieldIdx)); } }
@Override public CalendarInterval getInterval(int ordinal) { if (columns[ordinal].isNullAt(rowId)) return null; final int months = columns[ordinal].getChildColumn(0).getInt(rowId); final long microseconds = columns[ordinal].getChildColumn(1).getLong(rowId); return new CalendarInterval(months, microseconds); }
dictionaryIds = column.reserveDictionaryIds(total); if (column.hasDictionary() || (rowId == 0 && (descriptor.getType() == PrimitiveType.PrimitiveTypeName.INT32 || (descriptor.getType() == PrimitiveType.PrimitiveTypeName.INT64 && column.dataType() != DataTypes.TimestampType) || descriptor.getType() == PrimitiveType.PrimitiveTypeName.FLOAT || descriptor.getType() == PrimitiveType.PrimitiveTypeName.DOUBLE || column.setDictionary(dictionary); } else { decodeDictionaryIds(rowId, num, column, dictionaryIds); if (column.hasDictionary() && rowId != 0) { decodeDictionaryIds(0, rowId, column, column.getDictionaryIds()); column.setDictionary(null); switch (descriptor.getType()) { case BOOLEAN:
@Override public CalendarInterval getInterval(int ordinal) { int month = data.getChildColumn(0).getInt(offset + ordinal); long microseconds = data.getChildColumn(1).getLong(offset + ordinal); return new CalendarInterval(month, microseconds); }
private void readBinaryBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: implement remaining type conversions VectorizedValuesReader data = (VectorizedValuesReader) dataColumn; if (column.isArray()) { defColumn.readBinarys(num, column, rowId, maxDefLevel, data); } else if (column.dataType() == DataTypes.TimestampType) { for (int i = 0; i < num; i++) { if (defColumn.readInteger() == maxDefLevel) { column.putLong(rowId + i, // Read 12 bytes for INT96 ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12))); } else { column.putNull(rowId + i); } } } else { throw new UnsupportedOperationException("Unimplemented type: " + column.dataType()); } }
@Override public int getInt(int rowId) { if (dictionary == null) { return intData[rowId]; } else { return dictionary.decodeToInt(dictionaryIds.getDictId(rowId)); } }
public final int appendNotNulls(int count) { assert (!(dataType() instanceof StructType)); reserve(elementsAppended + count); int result = elementsAppended; putNotNulls(elementsAppended, count); elementsAppended += count; return result; }
/** * Append APIs. These APIs all behave similarly and will append data to the current vector. It * is not valid to mix the put and append APIs. The append APIs are slower and should only be * used if the sizes are not known up front. * In all these cases, the return value is the rowId for the first appended element. */ public final int appendNull() { assert (!(dataType() instanceof StructType)); // Use appendStruct() reserve(elementsAppended + 1); putNull(elementsAppended); return elementsAppended++; }
/** * Returns the array data as the java primitive array. * For example, an array of IntegerType will return an int[]. * Throws exceptions for unhandled schemas. */ public static Object toPrimitiveJavaArray(ColumnVector.Array array) { DataType dt = array.data.dataType(); if (dt instanceof IntegerType) { int[] result = new int[array.length]; ColumnVector data = array.data; for (int i = 0; i < result.length; i++) { if (data.isNullAt(array.offset + i)) { throw new RuntimeException("Cannot handle NULL values."); } result[i] = data.getInt(array.offset + i); } return result; } else { throw new UnsupportedOperationException(); } }
private void readFloatBatch(int rowId, int num, ColumnVector column) throws IOException { // This is where we implement support for the valid type conversions. // TODO: support implicit cast to double? if (column.dataType() == DataTypes.FloatType) { defColumn.readFloats( num, column, rowId, maxDefLevel, (VectorizedValuesReader) dataColumn); } else { throw new UnsupportedOperationException("Unsupported conversion to: " + column.dataType()); } }