public static GroupColumnIO getMapKeyValueColumn(GroupColumnIO groupColumnIO) { while (groupColumnIO.getChildrenCount() == 1) { groupColumnIO = (GroupColumnIO) groupColumnIO.getChild(0); } return groupColumnIO; }
public ListWriteProtocol(GroupColumnIO columnIO, ThriftField values, Events returnClause) { super(returnClause); this.listContent = columnIO.getChild(0); this.contentProtocol = getProtocol(values, listContent, new Events() { int consumedRecords = 0; @Override public void start() { } @Override public void end() { ++ consumedRecords; if (consumedRecords == size) { currentProtocol = ListWriteProtocol.this; consumedRecords = 0; } } }); }
public static GroupColumnIO getMapKeyValueColumn(GroupColumnIO groupColumnIO) { while (groupColumnIO.getChildrenCount() == 1) { groupColumnIO = (GroupColumnIO) groupColumnIO.getChild(0); } return groupColumnIO; }
public MapWriteProtocol(GroupColumnIO columnIO, MapType type, Events returnClause) { super(returnClause); this.mapContent = (GroupColumnIO)columnIO.getChild(0); this.key = mapContent.getChild(0); this.value = mapContent.getChild(1); this.keyProtocol = getProtocol(type.getKey(), this.key, new Events() { @Override
/** * Parquet column names are case-sensitive unlike Hive, which converts all column names to lowercase. * Therefore, when we look up columns we first check for exact match, and if that fails we look for a case-insensitive match. */ public static ColumnIO lookupColumnByName(GroupColumnIO groupColumnIO, String columnName) { ColumnIO columnIO = groupColumnIO.getChild(columnName); if (columnIO != null) { return columnIO; } for (int i = 0; i < groupColumnIO.getChildrenCount(); i++) { if (groupColumnIO.getChild(i).getName().equalsIgnoreCase(columnName)) { return groupColumnIO.getChild(i); } } return null; }
@Override public void startField(String field, int index) { try { if (DEBUG) log("startField({}, {})", field, index); currentColumnIO = ((GroupColumnIO) currentColumnIO).getChild(index); emptyField = true; if (DEBUG) printState(); } catch (RuntimeException e) { throw new ParquetEncodingException("error starting field " + field + " at " + index, e); } }
/** * Parquet column names are case-sensitive unlike Hive, which converts all column names to lowercase. * Therefore, when we look up columns we first check for exact match, and if that fails we look for a case-insensitive match. */ public static ColumnIO lookupColumnByName(GroupColumnIO groupColumnIO, String columnName) { ColumnIO columnIO = groupColumnIO.getChild(columnName); if (columnIO != null) { return columnIO; } for (int i = 0; i < groupColumnIO.getChildrenCount(); i++) { if (groupColumnIO.getChild(i).getName().equalsIgnoreCase(columnName)) { return groupColumnIO.getChild(i); } } return null; }
@Override public void startField(String field, int index) { try { if (DEBUG) log("startField({}, {})", field, index); currentColumnIO = ((GroupColumnIO) currentColumnIO).getChild(index); emptyField = true; if (DEBUG) printState(); } catch (RuntimeException e) { throw new ParquetEncodingException("error starting field " + field + " at " + index, e); } }
private void flushCachedNulls(GroupColumnIO group) { //flush children first for (int i = 0; i < group.getChildrenCount(); i++) { ColumnIO child = group.getChild(i); if (child instanceof GroupColumnIO) { flushCachedNulls((GroupColumnIO) child); } } //then flush itself writeNullToLeaves(group); }
thriftFieldIdToParquetField[thriftType.getChildren().get(i).getFieldId()] = schema.getChild(i); for (int i = 0; i < children.length; i++) { final ThriftField field = thriftType.getChildren().get(i); final ColumnIO columnIO = schema.getChild(field.getName()); if (columnIO == null) { throw new RuntimeException("Could not find " + field.getName() + " in " + schema);
private void flushCachedNulls(GroupColumnIO group) { //flush children first for (int i = 0; i < group.getChildrenCount(); i++) { ColumnIO child = group.getChild(i); if (child instanceof GroupColumnIO) { flushCachedNulls((GroupColumnIO) child); } } //then flush itself writeNullToLeaves(group); }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO) currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO) currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
return Optional.empty(); Optional<Field> keyField = constructField(mapType.getKeyType(), keyValueColumnIO.getChild(0)); Optional<Field> valueField = constructField(mapType.getValueType(), keyValueColumnIO.getChild(1)); return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(keyField, valueField))); return Optional.empty(); Optional<Field> field = constructField(types.get(0), getArrayElementColumn(groupColumnIO.getChild(0))); return Optional.of(new GroupField(type, repetitionLevel, definitionLevel, required, ImmutableList.of(field)));