private void initializeColumnReaders() { for (PrimitiveColumnIO columnIO : columns) { RichColumnDescriptor column = new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType()); columnReaders[columnIO.getId()] = PrimitiveColumnReader.createReader(column); } }
private State(int id, PrimitiveColumnIO primitiveColumnIO, ColumnReader column, int[] nextLevel, GroupConverter[] groupConverterPath, PrimitiveConverter primitiveConverter) { this.id = id; this.primitiveColumnIO = primitiveColumnIO; this.maxDefinitionLevel = primitiveColumnIO.getDefinitionLevel(); this.maxRepetitionLevel = primitiveColumnIO.getRepetitionLevel(); this.column = column; this.nextLevel = nextLevel; this.groupConverterPath = groupConverterPath; this.primitiveConverter = primitiveConverter; this.primitive = primitiveColumnIO.getType().asPrimitiveType().getPrimitiveTypeName(); this.fieldPath = primitiveColumnIO.getFieldPath(); this.primitiveField = fieldPath[fieldPath.length - 1]; this.indexFieldPath = primitiveColumnIO.getIndexFieldPath(); this.primitiveFieldIndex = indexFieldPath[indexFieldPath.length - 1]; }
public boolean isFirst(int r) { return getFirst(r) == this; }
@Override void setLevels(int r, int d, String[] fieldPath, int[] fieldIndexPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, fieldIndexPath, repetition, path); PrimitiveType type = getType().asPrimitiveType(); this.columnDescriptor = new ColumnDescriptor( fieldPath, type, getRepetitionLevel(), getDefinitionLevel()); this.path = path.toArray(new ColumnIO[path.size()]); }
final int[] indexFieldPath = leafColumnIO.getIndexFieldPath(); groupConverterPaths[i] = new GroupConverter[indexFieldPath.length - 1]; GroupConverter current = this.recordRootConverter; columnReaders[i] = columnStore.getColumnReader(leafColumnIO.getColumnDescriptor()); int maxRepetitionLevel = leafColumnIO.getRepetitionLevel(); nextColumnIdxForRepLevel[i] = new int[maxRepetitionLevel+1]; if (leafColumnIO.isFirst(nextRepLevel)) { firstIndexForLevel[nextRepLevel] = i; } else if (leafColumnIO.isLast(nextRepLevel)) { // when we are at the last of the next repetition level we jump back to the first nextColIdx = firstIndexForLevel[nextRepLevel]; } else { // otherwise we just go back to the next. } else if (leafColumnIO.isLast(nextRepLevel)) { // reached the end of this level => close the repetition level ColumnIO parent = leafColumnIO.getParent(nextRepLevel); levelToClose[i][nextRepLevel] = parent.getFieldPath().length - 1; } else { // otherwise close until the next common parent levelToClose[i][nextRepLevel] = getCommonParentLevel( leafColumnIO.getFieldPath(), leaves[nextColIdx].getFieldPath()); if (levelToClose[i][nextRepLevel] > leaves[i].getFieldPath().length-1) { throw new ParquetEncodingException(Arrays.toString(leaves[i].getFieldPath())+" -("+nextRepLevel+")-> "+levelToClose[i][nextRepLevel]); int[] definitionLevelToDepth = new int[states[i].primitiveColumnIO.getDefinitionLevel() + 1]; final ColumnIO[] path = states[i].primitiveColumnIO.getPath();
public MessageColumnIORecordConsumer(ColumnWriteStore columns) { this.columns = columns; int maxDepth = 0; this.columnWriter = new ColumnWriter[MessageColumnIO.this.getLeaves().size()]; for (PrimitiveColumnIO primitiveColumnIO : MessageColumnIO.this.getLeaves()) { ColumnWriter w = columns.getColumnWriter(primitiveColumnIO.getColumnDescriptor()); maxDepth = Math.max(maxDepth, primitiveColumnIO.getFieldPath().length); columnWriter[primitiveColumnIO.getId()] = w; buildGroupToLeafWriterMap(primitiveColumnIO, w); } fieldsWritten = new FieldsMarker[maxDepth]; for (int i = 0; i < maxDepth; i++) { fieldsWritten[i] = new FieldsMarker(); } r = new int[maxDepth]; }
RichColumnDescriptor column = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType()); return Optional.of(new PrimitiveField(type, repetitionLevel, definitionLevel, required, column, primitiveColumnIO.getId()));
public static Optional<RichColumnDescriptor> getDescriptor(List<PrimitiveColumnIO> columns, List<String> path) { checkArgument(path.size() >= 1, "Parquet nested path should have at least one component"); int index = getPathIndex(columns, path); if (index == -1) { return Optional.empty(); } PrimitiveColumnIO columnIO = columns.get(index); return Optional.of(new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType())); }
private ColumnWriter getColumnWriter() { return columnWriter[((PrimitiveColumnIO) currentColumnIO).getId()]; }
public IncrementallyUpdatedFilterPredicateBuilderBase(List<PrimitiveColumnIO> leaves) { for (PrimitiveColumnIO leaf : leaves) { ColumnDescriptor descriptor = leaf.getColumnDescriptor(); ColumnPath path = ColumnPath.get(descriptor.getPath()); PrimitiveComparator<?> comparator = descriptor.getPrimitiveType().comparator(); comparatorsByColumn.put(path, comparator); } }
public PrimitiveTypeName getPrimitive() { return getType().asPrimitiveType().getPrimitiveTypeName(); }
private static int getPathIndex(List<PrimitiveColumnIO> columns, List<String> path) { int maxLevel = path.size(); int index = -1; for (int columnIndex = 0; columnIndex < columns.size(); columnIndex++) { ColumnIO[] fields = columns.get(columnIndex).getPath(); if (fields.length <= maxLevel) { continue; } if (fields[maxLevel].getName().equalsIgnoreCase(path.get(maxLevel - 1))) { boolean match = true; for (int level = 0; level < maxLevel - 1; level++) { if (!fields[level + 1].getName().equalsIgnoreCase(path.get(level))) { match = false; } } if (match) { index = columnIndex; } } } return index; }
@Override public void visit(PrimitiveType primitiveType) { if (!currentRequestedType.isPrimitive() || (this.strictTypeChecking && currentRequestedType.asPrimitiveType().getPrimitiveTypeName() != primitiveType.getPrimitiveTypeName())) { incompatibleSchema(primitiveType, currentRequestedType); } PrimitiveColumnIO newIO = new PrimitiveColumnIO(primitiveType, current, currentRequestedIndex, leaves.size()); current.add(newIO); leaves.add(newIO); }
public boolean isLast(int r) { return getLast(r) == this; }
@Override List<String[]> getColumnNames() { return Arrays.asList(new String[][] { getFieldPath() }); }
public static List<Integer> getIndexFieldPathList(PrimitiveColumnIO c) { return intArrayToList(c.getIndexFieldPath()); }
final int[] indexFieldPath = leafColumnIO.getIndexFieldPath(); groupConverterPaths[i] = new GroupConverter[indexFieldPath.length - 1]; GroupConverter current = this.recordRootConverter; columnReaders[i] = columnStore.getColumnReader(leafColumnIO.getColumnDescriptor()); int maxRepetitionLevel = leafColumnIO.getRepetitionLevel(); nextColumnIdxForRepLevel[i] = new int[maxRepetitionLevel+1]; if (leafColumnIO.isFirst(nextRepLevel)) { firstIndexForLevel[nextRepLevel] = i; } else if (leafColumnIO.isLast(nextRepLevel)) { // when we are at the last of the next repetition level we jump back to the first nextColIdx = firstIndexForLevel[nextRepLevel]; } else { // otherwise we just go back to the next. } else if (leafColumnIO.isLast(nextRepLevel)) { // reached the end of this level => close the repetition level ColumnIO parent = leafColumnIO.getParent(nextRepLevel); levelToClose[i][nextRepLevel] = parent.getFieldPath().length - 1; } else { // otherwise close until the next common parent levelToClose[i][nextRepLevel] = getCommonParentLevel( leafColumnIO.getFieldPath(), leaves[nextColIdx].getFieldPath()); if (levelToClose[i][nextRepLevel] > leaves[i].getFieldPath().length-1) { throw new ParquetEncodingException(Arrays.toString(leaves[i].getFieldPath())+" -("+nextRepLevel+")-> "+levelToClose[i][nextRepLevel]); int[] definitionLevelToDepth = new int[states[i].primitiveColumnIO.getDefinitionLevel() + 1]; final ColumnIO[] path = states[i].primitiveColumnIO.getPath();
public MessageColumnIORecordConsumer(ColumnWriteStore columns) { this.columns = columns; int maxDepth = 0; this.columnWriter = new ColumnWriter[MessageColumnIO.this.getLeaves().size()]; for (PrimitiveColumnIO primitiveColumnIO : MessageColumnIO.this.getLeaves()) { ColumnWriter w = columns.getColumnWriter(primitiveColumnIO.getColumnDescriptor()); maxDepth = Math.max(maxDepth, primitiveColumnIO.getFieldPath().length); columnWriter[primitiveColumnIO.getId()] = w; buildGroupToLeafWriterMap(primitiveColumnIO, w); } fieldsWritten = new FieldsMarker[maxDepth]; for (int i = 0; i < maxDepth; i++) { fieldsWritten[i] = new FieldsMarker(); } r = new int[maxDepth]; }
public static Optional<RichColumnDescriptor> getDescriptor(List<PrimitiveColumnIO> columns, List<String> path) { checkArgument(path.size() >= 1, "Parquet nested path should have at least one component"); int index = getPathIndex(columns, path); if (index == -1) { return Optional.empty(); } PrimitiveColumnIO columnIO = columns.get(index); return Optional.of(new RichColumnDescriptor(columnIO.getColumnDescriptor(), columnIO.getType().asPrimitiveType())); }
@Override void setLevels(int r, int d, String[] fieldPath, int[] fieldIndexPath, List<ColumnIO> repetition, List<ColumnIO> path) { super.setLevels(r, d, fieldPath, fieldIndexPath, repetition, path); PrimitiveType type = getType().asPrimitiveType(); this.columnDescriptor = new ColumnDescriptor( fieldPath, type, getRepetitionLevel(), getDefinitionLevel()); this.path = path.toArray(new ColumnIO[path.size()]); }