@Override public Iterator<D> iterator() { FileReader<D> reader = initMetadata(newFileReader()); if (start != null) { reader = new AvroRangeIterator<>(reader, start, end); } if (reuseContainers) { return new AvroReuseIterator<>(reader); } addCloseable(reader); return reader; }
@Override @SuppressWarnings("unchecked") public void setSchema(Schema schema) { this.writer = (ValueWriter<T>) visit(schema, new WriteBuilder()); }
private CloseableIterable<InternalRow> newAvroIterable(InputFile location, FileScanTask task, Schema readSchema) { return Avro.read(location) .reuseContainers() .project(readSchema) .split(task.start(), task.length()) .createReaderFunc(SparkAvroReader::new) .build(); }
public <D> FileAppender<D> build() throws IOException { Preconditions.checkNotNull(schema, "Schema is required"); Preconditions.checkNotNull(name, "Table name is required and cannot be null"); // add the Iceberg schema to keyValueMetadata meta("iceberg.schema", SchemaParser.toJson(schema)); return new AvroFileAppender<>( AvroSchemaUtil.convert(schema, name), file, createWriterFunc, codec(), metadata); } }
@Override public ValueReader<?> array(Schema array, ValueReader<?> elementReader) { if (array.getLogicalType() instanceof LogicalMap) { ValueReaders.StructReader<?> keyValueReader = (ValueReaders.StructReader) elementReader; ValueReader<?> keyReader = keyValueReader.reader(0); ValueReader<?> valueReader = keyValueReader.reader(1); return ValueReaders.arrayMap(keyReader, valueReader); } return ValueReaders.array(elementReader); }
@Override public ValueWriter<?> array(Schema array, ValueWriter<?> elementWriter) { if (array.getLogicalType() instanceof LogicalMap) { ValueWriters.StructWriter<?> keyValueWriter = (ValueWriters.StructWriter<?>) elementWriter; return ValueWriters.arrayMap(keyValueWriter.writer(0), keyValueWriter.writer(1)); } return ValueWriters.array(elementWriter); }
@SuppressWarnings("unchecked") private void initReader() { this.reader = (ValueReader<T>) AvroSchemaVisitor.visit(readSchema, new ReadBuilder(loader)); }
public static Type convert(Schema schema) { return AvroSchemaVisitor.visit(schema, new SchemaToType(schema)); }
@Override public void setSchema(Schema fileSchema) { this.fileSchema = fileSchema; Set<Integer> projectedIds = getProjectedIds(expectedSchema); Schema prunedSchema = AvroSchemaUtil.pruneColumns(fileSchema, projectedIds); this.readSchema = AvroSchemaUtil.buildAvroProjection(prunedSchema, expectedSchema, renames); this.wrapped = newDatumReader(); }
@Override public ValueReader<?> union(Schema union, List<ValueReader<?>> options) { return ValueReaders.union(options); }
public <D> AvroIterable<D> build() { Preconditions.checkNotNull(schema, "Schema is required"); return new AvroIterable<>(file, new ProjectionDatumReader<>(createReaderFunc, schema, renames), start, length, reuseContainers); } }
private int getId(Schema.Field field) { if (field.getObjectProp(AvroSchemaUtil.FIELD_ID_PROP) != null) { return AvroSchemaUtil.getFieldId(field); } else { return allocateId(); } }
public static Schema buildAvroProjection(Schema schema, com.netflix.iceberg.Schema expected, Map<String, String> renames) { return AvroCustomOrderSchemaVisitor.visit(schema, new BuildAvroProjection(expected, renames)); }
@Override public F get() { return visitor.field(field, new VisitFuture<>(field.schema(), visitor)); } }
AvroFileAppender(Schema schema, OutputFile file, Function<Schema, DatumWriter<?>> createWriterFunc, CodecFactory codec, Map<String, String> metadata) throws IOException { this.writer = newAvroWriter(schema, file, createWriterFunc, codec, metadata); }
@Override public T get() { return visit(schema, visitor); } }
public static <K, V> ValueWriter<Map<K, V>> map(ValueWriter<K> keyWriter, ValueWriter<V> valueWriter) { return new MapWriter<>(keyWriter, valueWriter); }