private static int getPathIndex(List<PrimitiveColumnIO> columns, List<String> path) { int maxLevel = path.size(); int index = -1; for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { ColumnIO[] fields = columns.get(columnIndex).getPath(); if (fields.length <= maxLevel) { continue; } if (fields[maxLevel].getName().equalsIgnoreCase(path.get(maxLevel - 1))) { boolean match = true; for (int level = 0; level < maxLevel - 1; level++) { if (!fields[level + 1].getName().equalsIgnoreCase(path.get(level))) { match = false; } } if (match) { index = columnIndex; } } } return index; }
private static int getPathIndex(List<PrimitiveColumnIO> columns, List<String> path) { int maxLevel = path.size(); int index = -1; for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { ColumnIO[] fields = columns.get(columnIndex).getPath(); if (fields.length <= maxLevel) { continue; } if (fields[maxLevel].getName().equalsIgnoreCase(path.get(maxLevel - 1))) { boolean match = true; for (int level = 0; level < maxLevel - 1; level++) { if (!fields[level + 1].getName().equalsIgnoreCase(path.get(level))) { match = false; } } if (match) { index = columnIndex; } } } return index; }
@Override public void start() { recordConsumer.startField(value.getName(), value.getIndex()); } @Override
@Override public void end() { recordConsumer.endField(key.getName(), key.getIndex()); currentProtocol = valueProtocol; } });
@Override public void writeFieldEnd() throws TException { recordConsumer.endField(currentType.getName(), currentType.getIndex()); } }
@Override public void start() { recordConsumer.startGroup(); recordConsumer.startField(key.getName(), key.getIndex()); } @Override
@Override public void end() { consumed ++; recordConsumer.endField(value.getName(), value.getIndex()); recordConsumer.endGroup(); if (consumed == countToConsume) { currentProtocol = MapWriteProtocol.this; consumed = 0; } else { currentProtocol = keyProtocol; } } });
/** * Parquet column names are case-sensitive unlike Hive, which converts all column names to lowercase. * Therefore, when we look up columns we first check for exact match, and if that fails we look for a case-insensitive match. */ public static ColumnIO lookupColumnByName(GroupColumnIO groupColumnIO, String columnName) { ColumnIO columnIO = groupColumnIO.getChild(columnName); if (columnIO != null) { return columnIO; } for (int i = 0; i < groupColumnIO.getChildrenCount(); i++) { if (groupColumnIO.getChild(i).getName().equalsIgnoreCase(columnName)) { return groupColumnIO.getChild(i); } } return null; }
/** * Parquet column names are case-sensitive unlike Hive, which converts all column names to lowercase. * Therefore, when we look up columns we first check for exact match, and if that fails we look for a case-insensitive match. */ public static ColumnIO lookupColumnByName(GroupColumnIO groupColumnIO, String columnName) { ColumnIO columnIO = groupColumnIO.getChild(columnName); if (columnIO != null) { return columnIO; } for (int i = 0; i < groupColumnIO.getChildrenCount(); i++) { if (groupColumnIO.getChild(i).getName().equalsIgnoreCase(columnName)) { return groupColumnIO.getChild(i); } } return null; }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
@Override public void writeFieldBegin(TField field) throws TException { if (field.type == TType.STOP) { return; } try { currentType = thriftFieldIdToParquetField[field.id]; if (currentType == null) { throw new ParquetEncodingException("field " + field.id + " was not found in " + thriftType + " and " + schema.getType()); } final int index = currentType.getIndex(); recordConsumer.startField(currentType.getName(), index); currentProtocol = children[index]; } catch (ArrayIndexOutOfBoundsException e) { throw new ParquetEncodingException("field " + field.id + " was not found in " + thriftType + " and " + schema.getType()); } }