public WriteAvroResultWithSchema(final Schema schema, final OutputStream out, final CodecFactory codec) throws IOException { super(out); this.schema = schema; final GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.setCodec(codec); dataFileWriter.create(schema, out); }
/** * Create a new {@link DataFileWriter} for writing Avro records. * * @param codecFactory a {@link CodecFactory} object for building the compression codec * @throws IOException if there is something wrong creating a new {@link DataFileWriter} */ private DataFileWriter<GenericRecord> createDataFileWriter(CodecFactory codecFactory) throws IOException { @SuppressWarnings("resource") DataFileWriter<GenericRecord> writer = new DataFileWriter<>(this.datumWriter); writer.setCodec(codecFactory); // Open the file and return the DataFileWriter return writer.create(this.schema, this.stagingFileOutputStream); }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, JobConf job) throws UnsupportedEncodingException { CodecFactory factory = getCodecFactory(job); if (factory != null) { writer.setCodec(factory); } writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), StandardCharsets.ISO_8859_1.name()) .getBytes(StandardCharsets.ISO_8859_1)); } }
AvroKeyValueWriter(Schema keySchema, Schema valueSchema, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue .getSchema(keySchema, valueSchema); // Create an Avro container file and a writer to it. DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>( mKeyValuePairSchema); mAvroFileWriter = new DataFileWriter<GenericRecord>( genericDatumWriter); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Create a reusable output record. mOutputRecord = new AvroKeyValue<Object, Object>( new GenericData.Record(mKeyValuePairSchema)); }
public static void main(String[] args) throws Exception { if (args.length < 3 || args.length > 4) { System.out.println("Usage: RandomData <schemafile> <outputfile> <count> [codec]"); System.exit(-1); } Schema sch = new Schema.Parser().parse(new File(args[0])); DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); writer.create(sch, new File(args[1])); try { for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) { writer.append(datum); } } finally { writer.close(); } } }
dataFileWriter.setCodec(codec.getCodecFactory());
public AvroFileWriter(LogFilePath logFilePath, CompressionCodec codec) throws IOException { file = new File(logFilePath.getLogFilePath()); file.getParentFile().mkdirs(); LOG.debug("Creating Brand new Writer for path {}", logFilePath.getLogFilePath()); topic = logFilePath.getTopic(); Schema schema = schemaRegistryClient.getSchema(topic); SpecificDatumWriter specificDatumWriter= new SpecificDatumWriter(schema); writer = new DataFileWriter(specificDatumWriter); writer.setCodec(getCodecFactory(codec)); writer.create(schema, file); }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
private byte[] initAvroWriter(ProcessSession session, String codec, DataFileStream<GenericRecord> reader, DataFileWriter<GenericRecord> writer, AtomicReference<FlowFile> flowFileRef) { writer.setCodec(CodecFactory.fromString(codec)); // Transfer metadata (this is a subset of the incoming file) for (String metaKey : reader.getMetaKeys()) { if (!RESERVED_METADATA.contains(metaKey)) { writer.setMeta(metaKey, reader.getMeta(metaKey)); } } final ByteArrayOutputStream avroHeader = new ByteArrayOutputStream(); flowFileRef.set(session.append(flowFileRef.get(), (out) -> { // Create writer so that records can be appended later. writer.create(reader.getSchema(), avroHeader); writer.close(); final byte[] header = avroHeader.toByteArray(); out.write(header); })); // Capture the Avro header byte array that is just written to the FlowFile. // This is needed when Avro records are appended to the same FlowFile. return avroHeader.toByteArray(); }
@Override public void init(final DataFileStream<GenericRecord> reader, final String codec, final OutputStream out) throws IOException { writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>()); if (transferMetadata) { for (String metaKey : reader.getMetaKeys()) { if (!RESERVED_METADATA.contains(metaKey)) { writer.setMeta(metaKey, reader.getMeta(metaKey)); } } } writer.setCodec(CodecFactory.fromString(codec)); writer.create(reader.getSchema(), out); }
dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { logger.warn("Unable to instantiate avro codec with name (" +
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
/** * Constructor. * * @param writerSchema The writer schema for the records in the Avro container file. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyRecordWriter(Schema writerSchema, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<T>(dataModel.createDatumWriter(writerSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(writerSchema, outputStream); } /**
fileWriter.setCodec(codecFactory); int result = innerRecover(fileReader, fileWriter, out, err, recoverPrior, recoverAfter, schema, outfile);
@Override public void configure(Context context) { int syncIntervalBytes = context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES); String compressionCodec = context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC); writer = new ReflectDatumWriter<T>(getSchema()); dataFileWriter = new DataFileWriter<T>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes); try { CodecFactory codecFactory = CodecFactory.fromString(compressionCodec); dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { logger.warn("Unable to instantiate avro codec with name (" + compressionCodec + "). Compression disabled. Exception follows.", e); } }
private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception { File inputFile = new File(INPUT_DIR.getRoot(), file); Schema schema = Schema.create(type); try (DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { for (Entry<String, String> metadatum : metadata.entrySet()) { writer.setMeta(metadatum.getKey(), metadatum.getValue()); } writer.setCodec(codec); writer.create(schema, inputFile); for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) { writer.append(aDatum(type, i)); } } return inputFile; }
/** * Constructor. * * @param keyConverter A key to Avro datum converter. * @param valueConverter A value to Avro datum converter. * @param dataModel The data model for key and value. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyValueRecordWriter(AvroDatumConverter<K, ?> keyConverter, AvroDatumConverter<V, ?> valueConverter, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue.getSchema( keyConverter.getWriterSchema(), valueConverter.getWriterSchema()); // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<GenericRecord>( dataModel.createDatumWriter(mKeyValuePairSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Keep a reference to the converters. mKeyConverter = keyConverter; mValueConverter = valueConverter; // Create a reusable output record. mOutputRecord = new AvroKeyValue<>(new GenericData.Record(mKeyValuePairSchema)); }
private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception { File inputFile = new File(DIR.getRoot(), file); inputFile.deleteOnExit(); Schema schema = null; if(type.equals(Schema.Type.INT)) { schema = INTSCHEMA; } if(type.equals(Schema.Type.STRING)) { schema = STRINGSCHEMA; } DataFileWriter<Object> writer = new DataFileWriter<>( new GenericDatumWriter<>(schema)); for(Entry<String, String> metadatum : metadata.entrySet()) { writer.setMeta(metadatum.getKey(), metadatum.getValue()); } writer.setCodec(codec); writer.create(schema, inputFile); for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) { writer.append(aDatum(type, i)); } writer.close(); return inputFile; }
.setSyncInterval(100); if (codec != null) { writer.setCodec(codec);
@Test public void testWriteAndRead() throws IOException { Schema schema = Schema.create(Type.STRING); // Write it DataFileWriter<Utf8> w = new DataFileWriter<>(new GenericDatumWriter<>(schema)); w.setCodec(CodecFactory.deflateCodec(6)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); w.create(schema, baos); w.append(new Utf8("hello world")); w.append(new Utf8("hello moon")); w.sync(); w.append(new Utf8("bye bye world")); w.append(new Utf8("bye bye moon")); w.close(); // Read it DataFileStream<Utf8> r = new DataFileStream<>( new ByteArrayInputStream(baos.toByteArray()), new GenericDatumReader<>(schema)); assertEquals("hello world", r.next().toString()); assertEquals("hello moon", r.next().toString()); assertEquals("bye bye world", r.next().toString()); assertEquals("bye bye moon", r.next().toString()); assertFalse(r.hasNext()); } }