protected static ParquetWriter<Group> initWriterFromFile() throws IOException { GroupWriteSupport.setSchema(schema, conf); return new ParquetWriter<>( file, new GroupWriteSupport(), GZIP, 1024 * 1024, 1024, 1024 * 1024, true, false, PARQUET_1_0, conf); }
/** * Creates a Builder for configuring ParquetWriter with the example object * model. THIS IS AN EXAMPLE ONLY AND NOT INTENDED FOR USE. * * @param file the output file to create * @return a {@link Builder} to create a {@link ParquetWriter} */ public static Builder builder(Path file) { return new Builder(file); }
public ExampleOutputFormat() { super(new GroupWriteSupport()); } }
private ApacheParquet(String outputPath, MessageType schema, WriterVersion writerVersion) throws IOException { this.schema = schema; this.outputPath = outputPath; Configuration configuration = new Configuration(); GroupWriteSupport.setSchema(schema, configuration); this.writer = ExampleParquetWriter.builder(new Path(outputPath)) .withType(schema) .withConf(configuration) .withPageSize(DEFAULT_PAGE_SIZE) .withDictionaryPageSize(DEFAULT_PAGE_SIZE) .withDictionaryEncoding(DEFAULT_IS_DICTIONARY_ENABLED) .withValidation(DEFAULT_IS_VALIDATING_ENABLED) .withWriterVersion(writerVersion) .withRowGroupSize(DEFAULT_BLOCK_SIZE) // set Parquet file block size and page size values .withCompressionCodec(CompressionCodecName.UNCOMPRESSED) //压缩类型 .build(); this.groupFactory = new SimpleGroupFactory(this.schema); }
/** * set the schema being written to the job conf * @param job a job * @param schema the schema of the data */ public static void setSchema(Job job, MessageType schema) { GroupWriteSupport.setSchema(schema, ContextUtil.getConfiguration(job)); }
/** * Opens the file to read using GroupReadSupport */ @Override protected InputStream openFile(Path path) throws IOException { InputStream is = super.openFile(path); GroupReadSupport readSupport = new GroupReadSupport(); readSupport.init(configuration, null, schema); reader = new ParquetReader<>(path, readSupport); return is; }
public ParquetReadSupport() { super(new GroupReadSupport()); }
/** * retrieve the schema from the conf * @param job a job * @return the schema */ public static MessageType getSchema(Job job) { return GroupWriteSupport.getSchema(ContextUtil.getConfiguration(job)); }
@Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema) { String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA); MessageType requestedProjection = getSchemaForRead(fileSchema, partialSchemaString); return new ReadContext(requestedProjection); }
@Override protected WriteSupport<Group> getWriteSupport(Configuration conf) { return new GroupWriteSupport(type, extraMetaData); }
/** * set the schema being written to the job conf * @param job a job * @param schema the schema of the data */ public static void setSchema(Job job, MessageType schema) { GroupWriteSupport.setSchema(schema, ContextUtil.getConfiguration(job)); }
/** * Opens the file to read using GroupReadSupport */ @Override protected InputStream openFile(Path path) throws IOException { InputStream is = super.openFile(path); GroupReadSupport readSupport = new GroupReadSupport(); readSupport.init(configuration, null, schema); reader = new ParquetReader<>(path, readSupport); return is; }
static void parquetReader(String inPath) throws Exception { GroupReadSupport readSupport = new GroupReadSupport(); ParquetReader<Group> reader = new ParquetReader<Group>(new Path(inPath), readSupport); Group line = null; while ((line = reader.read()) != null) { System.out.println(line.toString()); } System.out.println("读取结束"); }
@Override public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) { // if present, prefer the schema passed to the constructor if (schema == null) { schema = getSchema(configuration); } return new WriteContext(schema, this.extraMetaData); }
/** * Creates a Builder for configuring ParquetWriter with the example object * model. THIS IS AN EXAMPLE ONLY AND NOT INTENDED FOR USE. * * @param file the output file to create * @return a {@link Builder} to create a {@link ParquetWriter} */ public static Builder builder(Path file) { return new Builder(file); }
@Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema) { String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA); MessageType requestedProjection = getSchemaForRead(fileSchema, partialSchemaString); return new ReadContext(requestedProjection); }
@Override protected WriteSupport<Group> getWriteSupport(Configuration conf) { return new GroupWriteSupport(type, extraMetaData); }
/** * retrieve the schema from the conf * @param job a job * @return the schema */ public static MessageType getSchema(Job job) { return GroupWriteSupport.getSchema(ContextUtil.getConfiguration(job)); }
public ExampleOutputFormat() { super(new GroupWriteSupport()); } }
@Override public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) { // if present, prefer the schema passed to the constructor if (schema == null) { schema = getSchema(configuration); } return new WriteContext(schema, this.extraMetaData); }