ColumnIO getParent(int r) { if (getRepetitionLevel() == r && getType().isRepetition(Repetition.REPEATED)) { return this; } else if (getParent()!=null && getParent().getDefinitionLevel()>=r) { return getParent().getParent(r); } else { throw new InvalidRecordException("no parent("+r+") for "+Arrays.toString(this.getFieldPath())); } }
@Override void setLevels(int r, int d, String[] fieldPath, int[] indexFieldPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, indexFieldPath, repetition, path); for (ColumnIO child : this.children) { String[] newFieldPath = Arrays.copyOf(fieldPath, fieldPath.length + 1); int[] newIndexFieldPath = Arrays.copyOf(indexFieldPath, indexFieldPath.length + 1); newFieldPath[fieldPath.length] = child.getType().getName(); newIndexFieldPath[indexFieldPath.length] = child.getIndex(); List<ColumnIO> newRepetition; if (child.getType().isRepetition(REPEATED)) { newRepetition = new ArrayList<ColumnIO>(repetition); newRepetition.add(child); } else { newRepetition = repetition; } List<ColumnIO> newPath = new ArrayList<ColumnIO>(path); newPath.add(child); child.setLevels( // the type repetition level increases whenever there's a possible repetition child.getType().isRepetition(REPEATED) ? r + 1 : r, // the type definition level increases whenever a field can be missing (not required) !child.getType().isRepetition(REQUIRED) ? d + 1 : d, newFieldPath, newIndexFieldPath, newRepetition, newPath ); } }
@Override List<String[]> getColumnNames() { ArrayList<String[]> result = new ArrayList<String[]>(); for (ColumnIO c : children) { result.addAll(c.getColumnNames()); } return result; }
private void printState() { if (DEBUG) { log(currentLevel + ", " + fieldsWritten[currentLevel] + ": " + Arrays.toString(currentColumnIO.getFieldPath()) + " r:" + r[currentLevel]); if (r[currentLevel] > currentColumnIO.getRepetitionLevel()) { // sanity check throw new InvalidRecordException(r[currentLevel] + "(r) > " + currentColumnIO.getRepetitionLevel() + " ( schema r)"); } } }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
void add(ColumnIO child) { children.add(child); childrenByName.put(child.getType().getName(), child); ++ childrenSize; }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO) currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }
@Override public void addInteger(int value) { if (DEBUG) log("addInt({})", value); emptyField = false; getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel()); setRepetitionLevel(); if (DEBUG) printState(); }
@Override public void endField(String field, int index) { if (DEBUG) log("endField({}, {})",field ,index); currentColumnIO = currentColumnIO.getParent(); if (emptyField) { throw new ParquetEncodingException("empty fields are illegal, the field should be ommited completely instead"); } fieldsWritten[currentLevel].markWritten(index); r[currentLevel] = currentLevel == 0 ? 0 : r[currentLevel - 1]; if (DEBUG) printState(); }
PrimitiveColumnIO getLast() { return children.get(children.size()-1).getLast(); }
PrimitiveColumnIO getFirst() { return children.get(0).getFirst(); }
private static int getPathIndex(List<PrimitiveColumnIO> columns, List<String> path) { int maxLevel = path.size(); int index = -1; for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { ColumnIO[] fields = columns.get(columnIndex).getPath(); if (fields.length <= maxLevel) { continue; } if (fields[maxLevel].getName().equalsIgnoreCase(path.get(maxLevel - 1))) { boolean match = true; for (int level = 0; level < maxLevel - 1; level++) { if (!fields[level + 1].getName().equalsIgnoreCase(path.get(level))) { match = false; } } if (match) { index = columnIndex; } } } return index; }
private void setRepetitionLevel() { r[currentLevel] = currentColumnIO.getRepetitionLevel(); if (DEBUG) log("r: {}", r[currentLevel]); }
public static ColumnIO getArrayElementColumn(ColumnIO columnIO) { while (columnIO instanceof GroupColumnIO && !columnIO.getType().isRepetition(REPEATED)) { columnIO = ((GroupColumnIO) columnIO).getChild(0); } /* If array has a standard 3-level structure with middle level repeated group with a single field: * optional group my_list (LIST) { * repeated group element { * required binary str (UTF8); * }; * } */ if (columnIO instanceof GroupColumnIO && columnIO.getType().getOriginalType() == null && ((GroupColumnIO) columnIO).getChildrenCount() == 1 && !columnIO.getName().equals("array") && !columnIO.getName().equals(columnIO.getParent().getName() + "_tuple")) { return ((GroupColumnIO) columnIO).getChild(0); } /* Backward-compatibility support for 2-level arrays where a repeated field is not a group: * optional group my_list (LIST) { * repeated int32 element; * } */ return columnIO; }
void add(ColumnIO child) { children.add(child); childrenByName.put(child.getType().getName(), child); ++ childrenSize; }
private void writeNullForMissingFieldsAtCurrentLevel() { int currentFieldsCount = ((GroupColumnIO) currentColumnIO).getChildrenCount(); for (int i = 0; i < currentFieldsCount; i++) { if (!fieldsWritten[currentLevel].isWritten(i)) { try { ColumnIO undefinedField = ((GroupColumnIO) currentColumnIO).getChild(i); int d = currentColumnIO.getDefinitionLevel(); if (DEBUG) log(Arrays.toString(undefinedField.getFieldPath()) + ".writeNull(" + r[currentLevel] + "," + d + ")"); writeNull(undefinedField, r[currentLevel], d); } catch (RuntimeException e) { throw new ParquetEncodingException("error while writing nulls for fields of indexes " + i + " . current index: " + fieldsWritten[currentLevel], e); } } } }
@Override public void addBoolean(boolean value) { if (DEBUG) log("addBoolean({})", value); emptyField = false; getColumnWriter().write(value, r[currentLevel], currentColumnIO.getDefinitionLevel()); setRepetitionLevel(); if (DEBUG) printState(); }
private void printState() { if (DEBUG) { log(currentLevel + ", " + fieldsWritten[currentLevel] + ": " + Arrays.toString(currentColumnIO.getFieldPath()) + " r:" + r[currentLevel]); if (r[currentLevel] > currentColumnIO.getRepetitionLevel()) { // sanity check throw new InvalidRecordException(r[currentLevel] + "(r) > " + currentColumnIO.getRepetitionLevel() + " ( schema r)"); } } }