/** * Returns a supplier of processors for {@link AvroSources#filesBuilder}. */ @Nonnull public static <D, T> ProcessorMetaSupplier readFilesP( @Nonnull String directory, @Nonnull String glob, boolean sharedFileSystem, @Nonnull DistributedSupplier<? extends DatumReader<D>> datumReaderSupplier, @Nonnull DistributedBiFunction<String, ? super D, T> mapOutputFn ) { return ReadFilesP.metaSupplier(directory, glob, sharedFileSystem, path -> { DataFileReader<D> reader = new DataFileReader<>(path.toFile(), datumReaderSupplier.get()); return StreamSupport.stream(reader.spliterator(), false) .onClose(() -> uncheckRun(reader::close)); }, mapOutputFn); }
/** * Returns a stream which parses avro records into dimension rows. * * @param dataFileReader An open file reader for avro records * @param dimension The dimension object used to configure the dimension * * @return A stream over the open file which produces dimension rows * * @throws IllegalArgumentException thrown if JSON object `fields` is not present */ private Stream<DimensionRow> streamDimensionRows( DataFileReader<GenericRecord> dataFileReader, Dimension dimension ) throws IllegalArgumentException { // Validate Schema if (!doesSchemaContainAllDimensionFields(dimension, dataFileReader.getSchema())) { String msg = "The AVRO schema file does not contain all the configured dimension fields"; LOG.error(msg); throw new IllegalArgumentException(msg); } // Generates a set of dimension Rows after retrieving the appropriate fields return StreamSupport.stream(dataFileReader.spliterator(), false) .map(record -> recordToMap(record, dimension)) .map(dimension::parseDimensionRow); }