private static ParquetFileReader newReader(InputFile file, ParquetReadOptions options) { try { return ParquetFileReader.open(ParquetIO.file(file), options); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to open Parquet file: %s", file.location()); } } }
@Override public SeekableInputStream newStream() throws IOException { return stream(file.newStream()); } }
public static Metrics fromInputFile(InputFile file) { try (ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(file))) { return fromMetadata(reader.getFooter()); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to read footer of file: %s", file); } }
@Override public PositionOutputStream create(long ignored) throws IOException { return stream(file.create()); }
@SuppressWarnings("unchecked") ParquetWriter(Configuration conf, OutputFile output, Schema schema, long rowGroupSize, Map<String, String> metadata, Function<MessageType, ParquetValueWriter<?>> createWriterFunc, CompressionCodecName codec) { this.output = output; this.targetRowGroupSize = rowGroupSize; this.metadata = ImmutableMap.copyOf(metadata); this.compressor = new CodecFactory(conf, props.getPageSizeThreshold()).getCompressor(codec); this.parquetSchema = convert(schema, "table"); this.model = (ParquetValueWriter<T>) createWriterFunc.apply(parquetSchema); try { this.writer = new ParquetFileWriter(ParquetIO.file(output, conf), parquetSchema, ParquetFileWriter.Mode.OVERWRITE, rowGroupSize, 0); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to create Parquet file"); } try { writer.start(); } catch (IOException e) { throw new RuntimeIOException(e, "Failed to start Parquet file writer"); } startRowGroup(); }
@Override public PositionOutputStream createOrOverwrite(long ignored) throws IOException { return stream(file.createOrOverwrite()); }
conf, file, schema, rowGroupSize, metadata, createWriterFunc, codec()); } else { return new ParquetWriteAdapter<>(new ParquetWriteBuilder<D>(ParquetIO.file(file)) .setType(type) .setConfig(config)
ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile));
@BeforeClass public static void createInputFile() throws IOException { if (PARQUET_FILE.exists()) { Assert.assertTrue(PARQUET_FILE.delete()); } OutputFile outFile = Files.localOutput(PARQUET_FILE); try (FileAppender<Record> appender = Parquet.write(outFile) .schema(FILE_SCHEMA) .build()) { GenericRecordBuilder builder = new GenericRecordBuilder(convert(FILE_SCHEMA, "table")); // create 50 records for (int i = 0; i < 50; i += 1) { builder.set("_id", 30 + i); // min=30, max=79, num-nulls=0 builder.set("_no_stats", TOO_LONG_FOR_STATS); // value longer than 4k will produce no stats builder.set("_required", "req"); // required, always non-null builder.set("_all_nulls", null); // never non-null builder.set("_some_nulls", (i % 10 == 0) ? null : "some"); // includes some null values builder.set("_no_nulls", ""); // optional, but always non-null appender.add(builder.build()); } } InputFile inFile = Files.localInput(PARQUET_FILE); try (ParquetFileReader reader = ParquetFileReader.open(ParquetIO.file(inFile))) { Assert.assertEquals("Should create only one row group", 1, reader.getRowGroups().size()); ROW_GROUP_METADATA = reader.getRowGroups().get(0); PARQUET_SCHEMA = reader.getFileMetaData().getSchema(); } PARQUET_FILE.deleteOnExit(); }