private void indexFields() { ImmutableMap.Builder<String, NestedField> byNameBuilder = ImmutableMap.builder(); ImmutableMap.Builder<Integer, NestedField> byIdBuilder = ImmutableMap.builder(); for (NestedField field : fields) { byNameBuilder.put(field.name(), field); byIdBuilder.put(field.fieldId(), field); } this.fieldsByName = byNameBuilder.build(); this.fieldsById = byIdBuilder.build(); } }
@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("PartitionData{"); for (int i = 0; i < data.length; i += 1) { if (i > 0) { sb.append(", "); } sb.append(partitionType.fields().get(i).name()) .append("=") .append(data[i]); } sb.append("}"); return sb.toString(); }
/** * Constructor. * * @param fields partition fields */ public IcebergFilterGenerator(final List<Types.NestedField> fields) { fieldMap = Maps.newHashMap(); for (final Types.NestedField field : fields) { fieldMap.put(field.name(), field); } }
public Type field(NestedField field) { Type.Repetition repetition = field.isOptional() ? Type.Repetition.OPTIONAL : Type.Repetition.REQUIRED; int id = field.fieldId(); String name = field.name(); if (field.type().isPrimitiveType()) { return primitive(field.type().asPrimitiveType(), repetition, id, name); } else { NestedType nested = field.type().asNestedType(); if (nested.isStructType()) { return struct(nested.asStructType(), repetition, id, name); } else if (nested.isMapType()) { return map(nested.asMapType(), repetition, id, name); } else if (nested.isListType()) { return list(nested.asListType(), repetition, id, name); } throw new UnsupportedOperationException("Can't convert unknown type: " + nested); } }
@Override public Type struct(Types.StructType struct, Iterable<Type> fieldTypes) { Preconditions.checkNotNull(sourceType, "Evaluation must start with a schema."); Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType); Types.StructType sourceStruct = sourceType.asStructType(); List<Types.NestedField> fields = struct.fields(); int length = fields.size(); List<Type> types = Lists.newArrayList(fieldTypes); List<Types.NestedField> newFields = Lists.newArrayListWithExpectedSize(length); for (int i = 0; i < length; i += 1) { Types.NestedField field = fields.get(i); int sourceFieldId = sourceStruct.field(field.name()).fieldId(); if (field.isRequired()) { newFields.add(Types.NestedField.required(sourceFieldId, field.name(), types.get(i))); } else { newFields.add(Types.NestedField.optional(sourceFieldId, field.name(), types.get(i))); } } return Types.StructType.of(newFields); }
@Override public List<String> getPredicateFields(String location, Job job) throws IOException { LOG.info(format("[%s]: getPredicateFields() -> %s", signature, location)); Schema schema = load(location, job).schema(); List<String> result = Lists.newArrayList(); for (Types.NestedField nf : schema.columns()) { switch (nf.type().typeId()) { case MAP: case LIST: case STRUCT: continue; default: result.add(nf.name()); } } return result; }
private static UnsafeProjection projection(Schema finalSchema, Schema readSchema) { StructType struct = convert(readSchema); List<AttributeReference> refs = seqAsJavaListConverter(struct.toAttributes()).asJava(); List<Attribute> attrs = Lists.newArrayListWithExpectedSize(struct.fields().length); List<org.apache.spark.sql.catalyst.expressions.Expression> exprs = Lists.newArrayListWithExpectedSize(struct.fields().length); for (AttributeReference ref : refs) { attrs.add(ref.toAttribute()); } for (Types.NestedField field : finalSchema.columns()) { int indexInReadSchema = struct.fieldIndex(field.name()); exprs.add(refs.get(indexInReadSchema)); } return UnsafeProjection.create( asScalaBufferConverter(exprs).asScala().toSeq(), asScalaBufferConverter(attrs).asScala().toSeq()); }
static void toJson(Types.StructType struct, JsonGenerator generator) throws IOException { generator.writeStartObject(); generator.writeStringField(TYPE, STRUCT); generator.writeArrayFieldStart(FIELDS); for (Types.NestedField field : struct.fields()) { generator.writeStartObject(); generator.writeNumberField(ID, field.fieldId()); generator.writeStringField(NAME, field.name()); generator.writeBooleanField(REQUIRED, field.isRequired()); generator.writeFieldName(TYPE); toJson(field.type(), generator); generator.writeEndObject(); } generator.writeEndArray(); generator.writeEndObject(); }
@Override public UpdateSchema renameColumn(String name, String newName) { Types.NestedField field = schema.findField(name); Preconditions.checkArgument(field != null, "Cannot rename missing column: %s", name); Preconditions.checkArgument(!deletes.contains(field.fieldId()), "Cannot rename a column that will be deleted: %s", field.name()); // merge with an update, if present int fieldId = field.fieldId(); Types.NestedField update = updates.get(fieldId); if (update != null) { updates.put(fieldId, Types.NestedField.required(fieldId, newName, update.type())); } else { updates.put(fieldId, Types.NestedField.required(fieldId, newName, field.type())); } return this; }
@Test public void testExistingTableUpdate() throws TException { com.netflix.iceberg.Table icebergTable = new HiveTables(hiveConf).load(DB_NAME, TABLE_NAME); // add a column icebergTable.updateSchema().addColumn("data", Types.LongType.get()).commit(); icebergTable = new HiveTables(hiveConf).load(DB_NAME, TABLE_NAME); // Only 2 snapshotFile Should exist and no manifests should exist Assert.assertEquals(2, metadataVersionFiles(TABLE_NAME).size()); Assert.assertEquals(0, manifestFiles(TABLE_NAME).size()); Assert.assertEquals(altered.asStruct(), icebergTable.schema().asStruct()); final Table table = metastoreClient.getTable(DB_NAME, TABLE_NAME); final List<String> hiveColumns = table.getSd().getCols().stream().map(f -> f.getName()).collect(Collectors.toList()); final List<String> icebergColumns = altered.columns().stream().map(f -> f.name()).collect(Collectors.toList()); Assert.assertEquals(icebergColumns, hiveColumns); }
private final List<FieldSchema> columns(Schema schema) { return schema.columns().stream().map(col -> new FieldSchema(col.name(), HiveTypeConverter.convert(col.type()), "")).collect(Collectors.toList()); }
@Override public Type field(Types.NestedField field, Supplier<Type> future) { Preconditions.checkArgument(sourceType.isStructType(), "Not a struct: " + sourceType); Types.StructType sourceStruct = sourceType.asStructType(); Types.NestedField sourceField = sourceStruct.field(field.name()); this.sourceType = sourceField.type(); try { return future.get(); } finally { sourceType = sourceStruct; } }
@Override public DataType struct(Types.StructType struct, List<DataType> fieldResults) { List<Types.NestedField> fields = struct.fields(); List<StructField> sparkFields = Lists.newArrayListWithExpectedSize(fieldResults.size()); for (int i = 0; i < fields.size(); i += 1) { Types.NestedField field = fields.get(i); DataType type = fieldResults.get(i); sparkFields.add(StructField.apply(field.name(), type, field.isOptional(), Metadata.empty())); } return StructType$.MODULE$.apply(sparkFields); }
private static Record projectFlat(Schema projection, Record record) { org.apache.avro.Schema avroSchema = AvroSchemaUtil.convert(projection, "test"); Record result = new Record(avroSchema); List<Types.NestedField> fields = projection.asStruct().fields(); for (int i = 0; i < fields.size(); i += 1) { Types.NestedField field = fields.get(i); result.put(i, record.get(field.name())); } return result; }
/** * Constructor. * * @param fields partition fields */ public IcebergFilterGenerator(final List<Types.NestedField> fields) { fieldMap = Maps.newHashMap(); for (final Types.NestedField field : fields) { fieldMap.put(field.name(), field); } }
@Override public Map<String, Integer> load(StructType struct) { Map<String, Integer> idToPos = Maps.newHashMap(); List<Types.NestedField> fields = struct.fields(); for (int i = 0; i < fields.size(); i += 1) { idToPos.put(fields.get(i).name(), i); } return idToPos; } });
private static ResourceFieldSchema convert(Types.NestedField field) throws IOException { ResourceFieldSchema result = convert(field.type()); result.setName(field.name()); result.setDescription(format("FieldId: %s", field.fieldId())); return result; }
@Override public Map<String, Integer> list(Types.ListType list, Map<String, Integer> elementResult) { for (Types.NestedField field : list.fields()) { addField(field.name(), field.fieldId()); } return null; }
@Override public Map<String, Integer> map(Types.MapType map, Map<String, Integer> keyResult, Map<String, Integer> valueResult) { for (Types.NestedField field : map.fields()) { addField(field.name(), field.fieldId()); } return null; }
@Override public Map<String, Integer> field(Types.NestedField field, Map<String, Integer> fieldResult) { addField(field.name(), field.fieldId()); return null; }