org.apache.parquet.hadoop.example java code examples

protected static ParquetWriter<Group> initWriterFromFile() throws IOException {
 GroupWriteSupport.setSchema(schema, conf);
 return new ParquetWriter<>(
   file,
   new GroupWriteSupport(),
   GZIP, 1024 * 1024, 1024, 1024 * 1024,
   true, false, PARQUET_1_0, conf);
}

/**
 * Creates a Builder for configuring ParquetWriter with the example object
 * model. THIS IS AN EXAMPLE ONLY AND NOT INTENDED FOR USE.
 *
 * @param file the output file to create
 * @return a {@link Builder} to create a {@link ParquetWriter}
 */
public static Builder builder(Path file) {
 return new Builder(file);
}

 public ExampleOutputFormat() {
  super(new GroupWriteSupport());
 }
}

private ApacheParquet(String outputPath, MessageType schema, WriterVersion writerVersion)
    throws IOException
{
  this.schema = schema;
  this.outputPath = outputPath;
  Configuration configuration = new Configuration();
  GroupWriteSupport.setSchema(schema, configuration);
  this.writer = ExampleParquetWriter.builder(new Path(outputPath))
      .withType(schema)
      .withConf(configuration)
      .withPageSize(DEFAULT_PAGE_SIZE)
      .withDictionaryPageSize(DEFAULT_PAGE_SIZE)
      .withDictionaryEncoding(DEFAULT_IS_DICTIONARY_ENABLED)
      .withValidation(DEFAULT_IS_VALIDATING_ENABLED)
      .withWriterVersion(writerVersion)
      .withRowGroupSize(DEFAULT_BLOCK_SIZE) // set Parquet file block size and page size values
      .withCompressionCodec(CompressionCodecName.UNCOMPRESSED) //压缩类型
      .build();
  this.groupFactory = new SimpleGroupFactory(this.schema);
}

/**
 * set the schema being written to the job conf
 * @param job a job
 * @param schema the schema of the data
 */
public static void setSchema(Job job, MessageType schema) {
 GroupWriteSupport.setSchema(schema, ContextUtil.getConfiguration(job));
}

/**
 * Opens the file to read using GroupReadSupport
 */
@Override
protected InputStream openFile(Path path) throws IOException
{
 InputStream is = super.openFile(path);
 GroupReadSupport readSupport = new GroupReadSupport();
 readSupport.init(configuration, null, schema);
 reader = new ParquetReader<>(path, readSupport);
 return is;
}

public ParquetReadSupport() {
 super(new GroupReadSupport());
}

/**
 * retrieve the schema from the conf
 * @param job a job
 * @return the schema
 */
public static MessageType getSchema(Job job) {
 return GroupWriteSupport.getSchema(ContextUtil.getConfiguration(job));
}

@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(
  Configuration configuration, Map<String, String> keyValueMetaData,
  MessageType fileSchema) {
 String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);
 MessageType requestedProjection = getSchemaForRead(fileSchema, partialSchemaString);
 return new ReadContext(requestedProjection);
}

@Override
protected WriteSupport<Group> getWriteSupport(Configuration conf) {
 return new GroupWriteSupport(type, extraMetaData);
}

/**
 * set the schema being written to the job conf
 * @param job a job
 * @param schema the schema of the data
 */
public static void setSchema(Job job, MessageType schema) {
 GroupWriteSupport.setSchema(schema, ContextUtil.getConfiguration(job));
}

/**
 * Opens the file to read using GroupReadSupport
 */
@Override
protected InputStream openFile(Path path) throws IOException
{
 InputStream is = super.openFile(path);
 GroupReadSupport readSupport = new GroupReadSupport();
 readSupport.init(configuration, null, schema);
 reader = new ParquetReader<>(path, readSupport);
 return is;
}

static void parquetReader(String inPath) throws Exception {
  GroupReadSupport readSupport = new GroupReadSupport();
  ParquetReader<Group> reader = new ParquetReader<Group>(new Path(inPath), readSupport);
  Group line = null;
  while ((line = reader.read()) != null) {
    System.out.println(line.toString());
  }
  System.out.println("读取结束");
}

@Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
 // if present, prefer the schema passed to the constructor
 if (schema == null) {
  schema = getSchema(configuration);
 }
 return new WriteContext(schema, this.extraMetaData);
}

/**
 * Creates a Builder for configuring ParquetWriter with the example object
 * model. THIS IS AN EXAMPLE ONLY AND NOT INTENDED FOR USE.
 *
 * @param file the output file to create
 * @return a {@link Builder} to create a {@link ParquetWriter}
 */
public static Builder builder(Path file) {
 return new Builder(file);
}

@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(
  Configuration configuration, Map<String, String> keyValueMetaData,
  MessageType fileSchema) {
 String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);
 MessageType requestedProjection = getSchemaForRead(fileSchema, partialSchemaString);
 return new ReadContext(requestedProjection);
}

@Override
protected WriteSupport<Group> getWriteSupport(Configuration conf) {
 return new GroupWriteSupport(type, extraMetaData);
}

/**
 * retrieve the schema from the conf
 * @param job a job
 * @return the schema
 */
public static MessageType getSchema(Job job) {
 return GroupWriteSupport.getSchema(ContextUtil.getConfiguration(job));
}

 public ExampleOutputFormat() {
  super(new GroupWriteSupport());
 }
}

@Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
 // if present, prefer the schema passed to the constructor
 if (schema == null) {
  schema = getSchema(configuration);
 }
 return new WriteContext(schema, this.extraMetaData);
}

How to use org.apache.parquet.hadoop.example

Best Java code snippets using org.apache.parquet.hadoop.example (Showing top 20 results out of 315)