/** Set the compression codec. */ public Options withCodec(String codec) { this.codec = CodecFactory.fromString(codec); return this; }
private CodecFactory getCodecFactory(CompressionCodec codec) { CompressionCodecName codecName = CompressionCodecName .fromCompressionCodec(codec != null ? codec.getClass() : null); try { return CodecFactory.fromString(codecName.name().toLowerCase()); } catch (AvroRuntimeException e) { LOG.error("Error creating codec factory", e); } return CodecFactory.fromString("null"); }
private CodecFactory getCompressionCodec(Map<String, String> conf) { if (getBoolean(conf, CONF_COMPRESS, false)) { int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL); int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL); String outputCodec = conf.get(CONF_COMPRESS_CODEC); if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) { return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel); } else { return CodecFactory.fromString(outputCodec); } } return CodecFactory.nullCodec(); }
@Override public void init(final DataFileStream<GenericRecord> reader, final String codec, final OutputStream out) throws IOException { writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>()); if (transferMetadata) { for (String metaKey : reader.getMetaKeys()) { if (!RESERVED_METADATA.contains(metaKey)) { writer.setMeta(metaKey, reader.getMeta(metaKey)); } } } writer.setCodec(CodecFactory.fromString(codec)); writer.create(reader.getSchema(), out); }
Codec resolveCodec() { String codecStr = getMetaString(DataFileConstants.CODEC); if (codecStr != null) { return CodecFactory.fromString(codecStr).createInstance(); } else { return CodecFactory.nullCodec().createInstance(); } }
Codec resolveCodec() { String codecStr = getMetaString(DataFileConstants.CODEC); if (codecStr != null) { return CodecFactory.fromString(codecStr).createInstance(); } else { return CodecFactory.nullCodec().createInstance(); } }
static CodecFactory codecFactory(OptionSet opts, OptionSpec<String> codec, OptionSpec<Integer> level, String defaultCodec) { String codecName = opts.hasArgument(codec) ? codec.value(opts) : defaultCodec; if(codecName.equals(DEFLATE_CODEC)) { return CodecFactory.deflateCodec(level.value(opts)); } else if(codecName.equals(DataFileConstants.XZ_CODEC)) { return CodecFactory.xzCodec(level.value(opts)); } else { return CodecFactory.fromString(codec.value(opts)); } } }
private CodecFactory getCodec(File output) throws Exception { try (DataFileStream<GenericRecord> reader = new DataFileStream<>( new FileInputStream(output), new GenericDatumReader<>())) { String codec = reader.getMetaString(DataFileConstants.CODEC); return codec == null ? CodecFactory.nullCodec() : CodecFactory.fromString(codec); } }
@Test public void testHadoopCodecFactoryDeflate(){ CodecFactory hadoopDeflateCodec = HadoopCodecFactory.fromHadoopString("org.apache.hadoop.io.compress.DeflateCodec"); CodecFactory avroDeflateCodec = CodecFactory.fromString("deflate"); assertTrue(hadoopDeflateCodec.getClass().equals(avroDeflateCodec.getClass())); }
@Test public void testBZip2CodecUsingAvroCodec() { CodecFactory avroBZip2Codec = CodecFactory.fromString("bzip2"); JobConf job = new JobConf(); job.set("mapred.output.compress", "true"); job.set(AvroJob.OUTPUT_CODEC, "bzip2"); CodecFactory factory = AvroOutputFormat.getCodecFactory(job); assertNotNull(factory); assertEquals(factory.getClass(), avroBZip2Codec.getClass()); }
@Test public void testDeflateCodecUsingAvroCodec() { CodecFactory avroDeflateCodec = CodecFactory.fromString("deflate"); JobConf job = new JobConf(); job.set("mapred.output.compress", "true"); job.set(AvroJob.OUTPUT_CODEC, "deflate"); CodecFactory factory = AvroOutputFormat.getCodecFactory(job); assertNotNull(factory); assertEquals(factory.getClass(), avroDeflateCodec.getClass()); }
@Test public void testHadoopCodecFactorySnappy(){ CodecFactory hadoopSnappyCodec = HadoopCodecFactory.fromHadoopString("org.apache.hadoop.io.compress.SnappyCodec"); CodecFactory avroSnappyCodec = CodecFactory.fromString("snappy"); assertTrue(hadoopSnappyCodec.getClass().equals(avroSnappyCodec.getClass())); }
@Test public void testHadoopCodecFactoryBZip2(){ CodecFactory hadoopSnappyCodec = HadoopCodecFactory.fromHadoopString("org.apache.hadoop.io.compress.BZip2Codec"); CodecFactory avroSnappyCodec = CodecFactory.fromString("bzip2"); assertTrue(hadoopSnappyCodec.getClass().equals(avroSnappyCodec.getClass())); }
@Test public void testHadoopCodecFactoryGZip(){ CodecFactory hadoopSnappyCodec = HadoopCodecFactory.fromHadoopString("org.apache.hadoop.io.compress.GZipCodec"); CodecFactory avroSnappyCodec = CodecFactory.fromString("deflate"); assertTrue(hadoopSnappyCodec.getClass().equals(avroSnappyCodec.getClass())); }
@Test public void testBZip2CodecUsingHadoopClass() { CodecFactory avroBZip2Codec = CodecFactory.fromString("bzip2"); JobConf job = new JobConf(); job.set("mapred.output.compress", "true"); job.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec"); CodecFactory factory = AvroOutputFormat.getCodecFactory(job); assertNotNull(factory); assertEquals(factory.getClass(), avroBZip2Codec.getClass()); }
@Test public void testSnappyCodecUsingHadoopClass() { CodecFactory avroSnappyCodec = CodecFactory.fromString("snappy"); JobConf job = new JobConf(); job.set("mapred.output.compress", "true"); job.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec"); CodecFactory factory = AvroOutputFormat.getCodecFactory(job); assertNotNull(factory); assertEquals(factory.getClass(), avroSnappyCodec.getClass()); }
@Test public void testDeflateCodecUsingHadoopClass() { CodecFactory avroDeflateCodec = CodecFactory.fromString("deflate"); JobConf job = new JobConf(); job.set("mapred.output.compress", "true"); job.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.DeflateCodec"); CodecFactory factory = AvroOutputFormat.getCodecFactory(job); assertNotNull(factory); assertEquals(factory.getClass(), avroDeflateCodec.getClass()); }
@Test public void testGZipCodecUsingHadoopClass() { CodecFactory avroDeflateCodec = CodecFactory.fromString("deflate"); JobConf job = new JobConf(); job.set("mapred.output.compress", "true"); job.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GZipCodec"); CodecFactory factory = AvroOutputFormat.getCodecFactory(job); assertNotNull(factory); assertEquals(factory.getClass(), avroDeflateCodec.getClass()); } }
@Test public void testSnappyCodecUsingAvroCodec() { CodecFactory avroSnappyCodec = CodecFactory.fromString("snappy"); JobConf job = new JobConf(); job.set("mapred.output.compress", "true"); job.set(AvroJob.OUTPUT_CODEC, "snappy"); CodecFactory factory = AvroOutputFormat.getCodecFactory(job); assertNotNull(factory); assertEquals(factory.getClass(), avroSnappyCodec.getClass()); }
@Override public void configure(Context context) { int syncIntervalBytes = context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES); String compressionCodec = context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC); writer = new ReflectDatumWriter<T>(getSchema()); dataFileWriter = new DataFileWriter<T>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes); try { CodecFactory codecFactory = CodecFactory.fromString(compressionCodec); dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { logger.warn("Unable to instantiate avro codec with name (" + compressionCodec + "). Compression disabled. Exception follows.", e); } }