/** * Creates a new {@link RawMessageEncoder} that uses the given * {@link GenericData data model} to deconstruct datum instances described by * the {@link Schema schema}. * <p> * If {@code shouldCopy} is true, then buffers returned by {@link #encode(D)} * are copied and will not be modified by future calls to {@code encode}. * <p> * If {@code shouldCopy} is false, then buffers returned by {@code encode} * wrap a thread-local buffer that can be reused by future calls to * {@code encode}, but may not be. Callers should only set {@code shouldCopy} * to false if the buffer will be copied before the current thread's next call * to {@code encode}. * * @param model the {@link GenericData data model} for datum instances * @param schema the {@link Schema} for datum instances * @param shouldCopy whether to copy buffers before returning encoded results */ public RawMessageEncoder(GenericData model, Schema schema, boolean shouldCopy) { this.writeSchema = schema; this.copyOutputBytes = shouldCopy; this.writer = model.createDatumWriter(this.writeSchema); }
/** * Creates a new {@link RawMessageEncoder} that uses the given * {@link GenericData data model} to deconstruct datum instances described by * the {@link Schema schema}. * <p> * If {@code shouldCopy} is true, then buffers returned by {@link #encode(D)} * are copied and will not be modified by future calls to {@code encode}. * <p> * If {@code shouldCopy} is false, then buffers returned by {@code encode} * wrap a thread-local buffer that can be reused by future calls to * {@code encode}, but may not be. Callers should only set {@code shouldCopy} * to false if the buffer will be copied before the current thread's next call * to {@code encode}. * * @param model the {@link GenericData data model} for datum instances * @param schema the {@link Schema} for datum instances * @param shouldCopy whether to copy buffers before returning encoded results */ public RawMessageEncoder(GenericData model, Schema schema, boolean shouldCopy) { this.writeSchema = schema; this.copyOutputBytes = shouldCopy; this.writer = model.createDatumWriter(this.writeSchema); }
@SuppressWarnings("unchecked") public static <D> DatumWriter<D> newDatumWriter(Schema schema, Class<D> dClass) { return (DatumWriter<D>) GenericData.get().createDatumWriter(schema); }
/** * Gets an object capable of serializing output from a Mapper. * * @param c The class to get a serializer for. * @return A serializer for objects of class <code>c</code>. */ @Override public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); Schema schema; if (AvroKey.class.isAssignableFrom(c)) { schema = getKeyWriterSchema(conf); } else if (AvroValue.class.isAssignableFrom(c)) { schema = getValueWriterSchema(conf); } else { throw new IllegalStateException("Only AvroKey and AvroValue are supported."); } GenericData dataModel = createDataModel(conf); DatumWriter<T> datumWriter = dataModel.createDatumWriter(schema); return new AvroSerializer<>(schema, datumWriter); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
/** * Constructor. * * @param writerSchema The writer schema for the records in the Avro container file. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyRecordWriter(Schema writerSchema, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<T>(dataModel.createDatumWriter(writerSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(writerSchema, outputStream); } /**
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
mRecordSchema = AvroKeyValue.getSchema(mKeySchema, mValueSchema); DatumWriter<GenericRecord> datumWriter = model.createDatumWriter(mRecordSchema); OutputStream dataOutputStream = fileSystem.create(dataFilePath); mDataFileWriter = new DataFileWriter<>(datumWriter) mIndexSchema = AvroKeyValue.getSchema(mKeySchema, Schema.create(Schema.Type.LONG)); DatumWriter<GenericRecord> indexWriter = model.createDatumWriter(mIndexSchema); OutputStream indexOutputStream = fileSystem.create(indexFilePath); mIndexFileWriter = new DataFileWriter<>(indexWriter)
/** * Constructor. * * @param keyConverter A key to Avro datum converter. * @param valueConverter A value to Avro datum converter. * @param dataModel The data model for key and value. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyValueRecordWriter(AvroDatumConverter<K, ?> keyConverter, AvroDatumConverter<V, ?> valueConverter, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue.getSchema( keyConverter.getWriterSchema(), valueConverter.getWriterSchema()); // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<GenericRecord>( dataModel.createDatumWriter(mKeyValuePairSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Keep a reference to the converters. mKeyConverter = keyConverter; mValueConverter = valueConverter; // Create a reusable output record. mOutputRecord = new AvroKeyValue<>(new GenericData.Record(mKeyValuePairSchema)); }
@SuppressWarnings("unchecked") private <D> File write(GenericData model, Schema schema, D... data) throws IOException { File file = temp.newFile(); DatumWriter<D> writer = model.createDatumWriter(schema); DataFileWriter<D> fileWriter = new DataFileWriter<>(writer); try { fileWriter.create(schema, file); for (D datum : data) { fileWriter.append(datum); } } finally { fileWriter.close(); } return file; } }
@SuppressWarnings("unchecked") private <D> File write(GenericData model, Schema schema, D... data) throws IOException { File file = temp.newFile(); DatumWriter<D> writer = model.createDatumWriter(schema); DataFileWriter<D> fileWriter = new DataFileWriter<>(writer); try { fileWriter.create(schema, file); for (D datum : data) { fileWriter.append(datum); } } finally { fileWriter.close(); } return file; }
@SuppressWarnings("unchecked") private <D> File write(GenericData model, Schema schema, D... data) throws IOException { File file = temp.newFile(); DatumWriter<D> writer = model.createDatumWriter(schema); DataFileWriter<D> fileWriter = new DataFileWriter<>(writer); try { fileWriter.create(schema, file); for (D datum : data) { fileWriter.append(datum); } } finally { fileWriter.close(); } return file; } }
/** * Creates a new {@link RawMessageEncoder} that uses the given * {@link GenericData data model} to deconstruct datum instances described by * the {@link Schema schema}. * <p> * If {@code shouldCopy} is true, then buffers returned by {@link #encode(D)} * are copied and will not be modified by future calls to {@code encode}. * <p> * If {@code shouldCopy} is false, then buffers returned by {@code encode} * wrap a thread-local buffer that can be reused by future calls to * {@code encode}, but may not be. Callers should only set {@code shouldCopy} * to false if the buffer will be copied before the current thread's next call * to {@code encode}. * * @param model the {@link GenericData data model} for datum instances * @param schema the {@link Schema} for datum instances * @param shouldCopy whether to copy buffers before returning encoded results */ public RawMessageEncoder(GenericData model, Schema schema, boolean shouldCopy) { this.writeSchema = schema; this.copyOutputBytes = shouldCopy; this.writer = model.createDatumWriter(this.writeSchema); }
@SuppressWarnings("unchecked") public static <D> DatumWriter<D> newDatumWriter(Schema schema, Class<D> dClass) { return (DatumWriter<D>) GenericData.get().createDatumWriter(schema); }
/** * Gets an object capable of serializing output from a Mapper. * * @param c The class to get a serializer for. * @return A serializer for objects of class <code>c</code>. */ @Override public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); Schema schema; if (AvroKey.class.isAssignableFrom(c)) { schema = getKeyWriterSchema(conf); } else if (AvroValue.class.isAssignableFrom(c)) { schema = getValueWriterSchema(conf); } else { throw new IllegalStateException("Only AvroKey and AvroValue are supported."); } GenericData dataModel = createDataModel(conf); DatumWriter<T> datumWriter = dataModel.createDatumWriter(schema); return new AvroSerializer<T>(schema, datumWriter); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
/** * Constructor. * * @param writerSchema The writer schema for the records in the Avro container file. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyRecordWriter(Schema writerSchema, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<T>(dataModel.createDatumWriter(writerSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(writerSchema, outputStream); } /**
mRecordSchema = AvroKeyValue.getSchema(mKeySchema, mValueSchema); DatumWriter<GenericRecord> datumWriter = model.createDatumWriter(mRecordSchema); OutputStream dataOutputStream = fileSystem.create(dataFilePath); mDataFileWriter = new DataFileWriter<GenericRecord>(datumWriter) mIndexSchema = AvroKeyValue.getSchema(mKeySchema, Schema.create(Schema.Type.LONG)); DatumWriter<GenericRecord> indexWriter = model.createDatumWriter(mIndexSchema); OutputStream indexOutputStream = fileSystem.create(indexFilePath); mIndexFileWriter = new DataFileWriter<GenericRecord>(indexWriter)
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
/** * Constructor. * * @param keyConverter A key to Avro datum converter. * @param valueConverter A value to Avro datum converter. * @param dataModel The data model for key and value. * @param compressionCodec A compression codec factory for the Avro container file. * @param outputStream The output stream to write the Avro container file to. * @param syncInterval The sync interval for the Avro container file. * @throws IOException If the record writer cannot be opened. */ public AvroKeyValueRecordWriter(AvroDatumConverter<K, ?> keyConverter, AvroDatumConverter<V, ?> valueConverter, GenericData dataModel, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue.getSchema( keyConverter.getWriterSchema(), valueConverter.getWriterSchema()); // Create an Avro container file and a writer to it. mAvroFileWriter = new DataFileWriter<GenericRecord>( dataModel.createDatumWriter(mKeyValuePairSchema)); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Keep a reference to the converters. mKeyConverter = keyConverter; mValueConverter = valueConverter; // Create a reusable output record. mOutputRecord = new AvroKeyValue<Object, Object>(new GenericData.Record(mKeyValuePairSchema)); }