private static Map<String, Integer> buildPhysicalNameOrdinalMap(OrcReader reader) { ImmutableMap.Builder<String, Integer> physicalNameOrdinalMap = ImmutableMap.builder(); int ordinal = 0; for (String physicalColumnName : reader.getColumnNames()) { physicalNameOrdinalMap.put(physicalColumnName, ordinal); ordinal++; } return physicalNameOrdinalMap.build(); } }
private List<ColumnInfo> getColumnInfo(OrcReader reader) { Optional<OrcFileMetadata> metadata = getOrcFileMetadata(reader); if (metadata.isPresent()) { return getColumnInfoFromOrcUserMetadata(metadata.get()); } // support for legacy files without metadata return getColumnInfoFromOrcColumnTypes(reader.getColumnNames(), reader.getFooter().getTypes()); }
private static ColumnStats doComputeColumnStats(OrcReader orcReader, long columnId, Type type) throws IOException { int columnIndex = columnIndex(orcReader.getColumnNames(), columnId); OrcRecordReader reader = orcReader.createRecordReader(ImmutableMap.of(columnIndex, type), OrcPredicate.TRUE, UTC, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE); if (type.equals(BooleanType.BOOLEAN)) { return indexBoolean(type, reader, columnIndex, columnId); } if (type.equals(BigintType.BIGINT) || type.equals(DateType.DATE) || type.equals(TimestampType.TIMESTAMP)) { return indexLong(type, reader, columnIndex, columnId); } if (type.equals(DoubleType.DOUBLE)) { return indexDouble(type, reader, columnIndex, columnId); } if (type instanceof VarcharType) { return indexString(type, reader, columnIndex, columnId); } return null; }
private static List<HiveColumnHandle> getPhysicalHiveColumnHandles(List<HiveColumnHandle> columns, boolean useOrcColumnNames, OrcReader reader, Path path) { if (!useOrcColumnNames) { return columns; } verifyFileHasColumnNames(reader.getColumnNames(), path); Map<String, Integer> physicalNameOrdinalMap = buildPhysicalNameOrdinalMap(reader); int nextMissingColumnIndex = physicalNameOrdinalMap.size(); ImmutableList.Builder<HiveColumnHandle> physicalColumns = ImmutableList.builder(); for (HiveColumnHandle column : columns) { Integer physicalOrdinal = physicalNameOrdinalMap.get(column.getName()); if (physicalOrdinal == null) { // if the column is missing from the file, assign it a column number larger // than the number of columns in the file so the reader will fill it with nulls physicalOrdinal = nextMissingColumnIndex; nextMissingColumnIndex++; } physicalColumns.add(new HiveColumnHandle(column.getName(), column.getHiveType(), column.getTypeSignature(), physicalOrdinal, column.getColumnType(), column.getComment())); } return physicalColumns.build(); }
OrcReader reader = new OrcReader(dataSource, ORC, readerAttributes.getMaxMergeDistance(), readerAttributes.getMaxReadSize(), readerAttributes.getTinyStripeThreshold(), HUGE_MAX_READ_BLOCK_SIZE); Map<Long, Integer> indexMap = columnIdIndex(reader.getColumnNames()); ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder(); ImmutableList.Builder<Integer> columnIndexes = ImmutableList.builder();
public static OrcRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types) throws IOException { OrcReader orcReader = new OrcReader(dataSource, ORC, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE)); List<String> columnNames = orcReader.getColumnNames(); assertEquals(columnNames.size(), columnIds.size()); Map<Integer, Type> includedColumns = new HashMap<>(); int ordinal = 0; for (long columnId : columnIds) { assertEquals(columnNames.get(ordinal), String.valueOf(columnId)); includedColumns.put(ordinal, types.get(ordinal)); ordinal++; } return createRecordReader(orcReader, includedColumns); }
public static OrcRecordReader createReaderNoRows(OrcDataSource dataSource) throws IOException { OrcReader orcReader = new OrcReader(dataSource, ORC, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE)); assertEquals(orcReader.getColumnNames().size(), 0); return createRecordReader(orcReader, ImmutableMap.of()); }
validateWrite(validation -> validation.getColumnNames().equals(getColumnNames()), "Unexpected column names"); validateWrite(validation -> validation.getRowGroupMaxRowCount() == footer.getRowsInRowGroup(), "Unexpected rows in group"); if (writeValidation.isPresent()) {
validateWrite(validation -> validation.getColumnNames().equals(getColumnNames()), "Unexpected column names"); validateWrite(validation -> validation.getRowGroupMaxRowCount() == footer.getRowsInRowGroup(), "Unexpected rows in group"); if (writeValidation.isPresent()) {