static <T> void configureDataFileWriter(DataFileWriter<T> writer, JobConf job) throws UnsupportedEncodingException { CodecFactory factory = getCodecFactory(job); if (factory != null) { writer.setCodec(factory); } writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), StandardCharsets.ISO_8859_1.name()) .getBytes(StandardCharsets.ISO_8859_1)); } }
AvroKeyValueWriter(Schema keySchema, Schema valueSchema, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue .getSchema(keySchema, valueSchema); // Create an Avro container file and a writer to it. DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>( mKeyValuePairSchema); mAvroFileWriter = new DataFileWriter<GenericRecord>( genericDatumWriter); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Create a reusable output record. mOutputRecord = new AvroKeyValue<Object, Object>( new GenericData.Record(mKeyValuePairSchema)); }
dataFileWriter = new DataFileWriter<Object>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes);
@Test public void testBlockSizeSetInvalid() { int exceptions = 0; for (int i = -1; i < 33; i++) { // 33 invalid, one valid try { new DataFileWriter<>(new GenericDatumWriter<>()).setSyncInterval(i); } catch (IllegalArgumentException iae) { exceptions++; } } Assert.assertEquals(33, exceptions); } }
dataFileWriter = new DataFileWriter<>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes);
/** * Constructor. * * @param writerSchema The writer schema for the records in the Avro container file. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyRecordWriter(Schema writerSchema, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<T>(dataModel.createDatumWriter(writerSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(writerSchema, outputStream); } /**
@Override public void configure(Context context) { int syncIntervalBytes = context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES); String compressionCodec = context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC); writer = new ReflectDatumWriter<T>(getSchema()); dataFileWriter = new DataFileWriter<T>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes); try { CodecFactory codecFactory = CodecFactory.fromString(compressionCodec); dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { logger.warn("Unable to instantiate avro codec with name (" + compressionCodec + "). Compression disabled. Exception follows.", e); } }
/** * Constructor. * * @param keyConverter A key to Avro datum converter. * @param valueConverter A value to Avro datum converter. * @param dataModel The data model for key and value. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyValueRecordWriter(AvroDatumConverter<K, ?> keyConverter, AvroDatumConverter<V, ?> valueConverter, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue.getSchema( keyConverter.getWriterSchema(), valueConverter.getWriterSchema()); // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<GenericRecord>( dataModel.createDatumWriter(mKeyValuePairSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Keep a reference to the converters. mKeyConverter = keyConverter; mValueConverter = valueConverter; // Create a reusable output record. mOutputRecord = new AvroKeyValue<>(new GenericData.Record(mKeyValuePairSchema)); }
OutputStream dataOutputStream = fileSystem.create(dataFilePath); mDataFileWriter = new DataFileWriter<>(datumWriter) .setSyncInterval(1 << 20) // Set the auto-sync interval sufficiently large, since
public void testGenericWrite() throws IOException { DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()) .setSyncInterval(100); if (codec != null) { writer.setCodec(codec);
DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()) .setSyncInterval(syncInterval); if (codec != null) { writer.setCodec(codec); .setSyncInterval(syncInterval); if (codec2 != null) { writer2.setCodec(codec2); .setSyncInterval(syncInterval); concatinto.appendTo(file1); DataFileReader<Object> concatfrom =
static <T> void configureDataFileWriter(DataFileWriter<T> writer, JobConf job) throws UnsupportedEncodingException { CodecFactory factory = getCodecFactory(job); if (factory != null) { writer.setCodec(factory); } writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = conf.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job,String codecName,int deflateLevel) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
LogFileOutputStream(Location location, String filePermissions, int syncIntervalBytes, long createTime, Closeable closeable) throws IOException { this.location = location; this.closeable = closeable; this.serializer = new LoggingEventSerializer(); Schema schema = serializer.getAvroSchema(); try { this.outputStream = filePermissions.isEmpty() ? location.getOutputStream() : location.getOutputStream(filePermissions); this.dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(schema)); this.dataFileWriter.create(schema, outputStream); this.dataFileWriter.setSyncInterval(syncIntervalBytes); this.createTime = createTime; this.fileSize = 0; } catch (IOException e) { Closeables.closeQuietly(outputStream); Closeables.closeQuietly(dataFileWriter); throw e; } }
@SuppressWarnings("deprecation") // uses internal test functionality. @Override protected void prepareWrite(WritableByteChannel channel) throws Exception { logger.info("jdbcavroio : Preparing write..."); connection = jdbcAvroArgs.jdbcConnectionConfiguration().createConnection(); Void destination = getDestination(); CodecFactory codec = dynamicDestinations.getCodec(destination); Schema schema = dynamicDestinations.getSchema(destination); dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(schema)) .setCodec(codec) .setSyncInterval(syncInterval); dataFileWriter.setMeta("created_by", this.getClass().getCanonicalName()); dataFileWriter.create(schema, Channels.newOutputStream(channel)); this.metering = JdbcAvroMetering.create(); logger.info("jdbcavroio : Write prepared"); }
@Override public void write(Event event) throws IOException { try { Map<String, Object> orderedData = getOrderedData(event); List<Object> orderedList = new ArrayList<>(orderedData.values()); Schema schema = schemaCreator.createSchema(orderedData); GenericRecord record = createGenericRecord(orderedData, orderedList, schema); GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes); try { CodecFactory codecFactory = CodecFactory.fromString(compressionCodec); dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { LOGGER.warn("Unable to instantiate avro codec with name (" + compressionCodec + "). Compression disabled. Exception follows.", e); } dataFileWriter.create(schema, out); dataFileWriter.append(record); dataFileWriter.flush(); } catch (Exception e) { throw new IOException(e); } }
/** * Constructor. * * @param writerSchema The writer schema for the records in the Avro container file. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyRecordWriter(Schema writerSchema, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<T>(dataModel.createDatumWriter(writerSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(writerSchema, outputStream); } /**
@Override public void configure(Context context) { int syncIntervalBytes = context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES); String compressionCodec = context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC); writer = new ReflectDatumWriter<T>(getSchema()); dataFileWriter = new DataFileWriter<T>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes); try { CodecFactory codecFactory = CodecFactory.fromString(compressionCodec); dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { logger.warn("Unable to instantiate avro codec with name (" + compressionCodec + "). Compression disabled. Exception follows.", e); } }
public static void writeToAvro(File inputFile, OutputStream outputStream) throws IOException { DataFileWriter<Stock> writer = //<co id="ch03_avrospecific_comment1"/> new DataFileWriter<Stock>( new SpecificDatumWriter<Stock>()) .setSyncInterval(100); //<co id="ch03_avrospecific_comment2"/> writer.setCodec(CodecFactory.snappyCodec()); //<co id="ch03_avrospecific_comment3"/> writer.create(Stock.SCHEMA$, outputStream); //<co id="ch03_avrospecific_comment4"/> for(String line: FileUtils.readLines(inputFile)) { writer.append(createStock(line)); //<co id="ch03_avrospecific_comment5"/> } IOUtils.closeStream(writer); IOUtils.closeStream(outputStream); }