public static Schema selectNot(Schema schema, Set<Integer> fieldIds) { Set<Integer> projectedIds = getProjectedIds(schema); projectedIds.removeAll(fieldIds); return select(schema, projectedIds); }
public static MessageType pruneColumns(MessageType fileSchema, Schema expectedSchema) { // column order must match the incoming type, so it doesn't matter that the ids are unordered Set<Integer> selectedIds = TypeUtil.getProjectedIds(expectedSchema); return (MessageType) ParquetTypeVisitor.visit(fileSchema, new PruneColumns(selectedIds)); }
@Override public void setSchema(Schema fileSchema) { this.fileSchema = fileSchema; Set<Integer> projectedIds = getProjectedIds(expectedSchema); Schema prunedSchema = AvroSchemaUtil.pruneColumns(fileSchema, projectedIds); this.readSchema = AvroSchemaUtil.buildAvroProjection(prunedSchema, expectedSchema, renames); this.wrapped = newDatumReader(); }
@Override public TableScan select(Collection<String> columns) { Set<Integer> requiredFieldIds = Sets.newHashSet(); // all of the filter columns are required requiredFieldIds.addAll( Binder.boundReferences(table.schema().asStruct(), Collections.singletonList(rowFilter))); // all of the projection columns are required requiredFieldIds.addAll(TypeUtil.getProjectedIds(table.schema().select(columns))); Schema projection = TypeUtil.select(table.schema(), requiredFieldIds); return new BaseTableScan(ops, table, snapshotId, projection, rowFilter); }
@Test public void testDeleteFields() { // use schema projection to test column deletes Set<Integer> ALL_IDS = ImmutableSet.copyOf(TypeUtil.getProjectedIds(SCHEMA)); List<String> columns = Lists.newArrayList("id", "data", "preferences", "preferences.feature1", "preferences.feature2", "locations", "locations.lat", "locations.long", "points", "points.x", "points.y", "doubles", "properties"); for (String name : columns) { Set<Integer> selected = Sets.newHashSet(ALL_IDS); // remove the id and any nested fields from the projection Types.NestedField nested = SCHEMA.findField(name); selected.remove(nested.fieldId()); selected.removeAll(TypeUtil.getProjectedIds(nested.type())); Schema del = new SchemaUpdate(SCHEMA, 19).deleteColumn(name).apply(); Assert.assertEquals("Should match projection with '" + name + "' removed", TypeUtil.select(SCHEMA, selected).asStruct(), del.asStruct()); } }