@Test public void testWithSnappyCode() throws IOException { Configuration conf = new Configuration(); conf.setBoolean("mapred.output.compress", true); conf.set(AvroJob.CONF_OUTPUT_CODEC, DataFileConstants.SNAPPY_CODEC); conf.setInt(SYNC_INTERVAL_KEY, TEST_SYNC_INTERVAL); testGetRecordWriter(conf, CodecFactory.snappyCodec(), TEST_SYNC_INTERVAL); }
@Test public void testWithSnappyCodeWithHadoopConfig() throws IOException { Configuration conf = new Configuration(); conf.setBoolean("mapred.output.compress", true); conf.set("mapred.output.compression.codec","org.apache.hadoop.io.compress.SnappyCodec"); testGetRecordWriter(conf, CodecFactory.snappyCodec(), DataFileConstants.DEFAULT_SYNC_INTERVAL); }
public static CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
private CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
protected CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case NONE: return CodecFactory.nullCodec(); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: default: return CodecFactory.snappyCodec(); } }
@Parameters public static List<Object[]> codecs() { List<Object[]> r = new ArrayList<>(); r.add(new Object[] { null }); r.add(new Object[] { CodecFactory.deflateCodec(0) }); r.add(new Object[] { CodecFactory.deflateCodec(1) }); r.add(new Object[] { CodecFactory.deflateCodec(9) }); r.add(new Object[] { CodecFactory.nullCodec() }); r.add(new Object[] { CodecFactory.snappyCodec() }); r.add(new Object[] { CodecFactory.xzCodec(0) }); r.add(new Object[] { CodecFactory.xzCodec(1) }); r.add(new Object[] { CodecFactory.xzCodec(6) }); r.add(new Object[] { CodecFactory.zstandardCodec() }); return r; }
public static void runCsvToAvro(SampleOptions options) throws IOException, IllegalArgumentException { FileSystems.setDefaultPipelineOptions(options); // Get Avro Schema String schemaJson = getSchema(options.getAvroSchema()); Schema schema = new Schema.Parser().parse(schemaJson); // Check schema field types before starting the Dataflow job checkFieldTypes(schema); // Create the Pipeline object with the options we defined above. Pipeline pipeline = Pipeline.create(options); // Convert CSV to Avro pipeline.apply("Read CSV files", TextIO.read().from(options.getInputFile())) .apply("Convert CSV to Avro formatted data", ParDo.of(new ConvertCsvToAvro(schemaJson, options.getCsvDelimiter()))) .setCoder(AvroCoder.of(GenericRecord.class, schema)) .apply("Write Avro formatted data", AvroIO.writeGenericRecords(schemaJson) .to(options.getOutput()).withCodec(CodecFactory.snappyCodec()).withSuffix(".avro")); // Run the pipeline. pipeline.run().waitUntilFinish(); }
public CodecFactory getCodecFactory() { if (avroCodec().equals("snappy")) { return CodecFactory.snappyCodec(); } else if (avroCodec().startsWith("deflate")) { return CodecFactory.deflateCodec(Integer.valueOf(avroCodec().replace("deflate", ""))); } throw new IllegalArgumentException("Invalid avroCodec " + avroCodec()); }
private synchronized DataFileWriter getDataWriterCreateIfNull( final String typeName, final GeoWaveAvroFormatPlugin plugin) { if (!cachedWriters.containsKey(typeName)) { FSDataOutputStream out = null; final DataFileWriter dfw = new DataFileWriter(new GenericDatumWriter()); cachedWriters.put(typeName, dfw); dfw.setCodec(CodecFactory.snappyCodec()); try { // TODO: we should probably clean up the type name to make it // HDFS path safe in case there are invalid characters // also, if a file already exists do we want to delete it or // append to it? out = fs.create(new Path(hdfsBaseDirectory, typeName)); dfw.create(plugin.getAvroSchema(), out); } catch (final IOException e) { LOGGER.error("Unable to create output stream", e); // cache a null value so we don't continually try to recreate cachedWriters.put(typeName, null); return null; } } return cachedWriters.get(typeName); }
private CodecFactory getCodecFactory() { switch (compressionType) { case Snappy: return CodecFactory.snappyCodec(); case Deflate: return CodecFactory.deflateCodec(9); case Bzip2: return CodecFactory.bzip2Codec(); default: throw new IllegalArgumentException(String.format( "Unsupported compression format %s. Supported formats: %s", compressionType.getName(), Arrays.toString( Formats.AVRO.getSupportedCompressionTypes().toArray()))); } } }
@Override public void setCompression( COMPRESSION compression ) { switch ( compression ) { case SNAPPY: codecFactory = CodecFactory.snappyCodec(); break; case DEFLATE: codecFactory = CodecFactory.deflateCodec( Deflater.DEFAULT_COMPRESSION ); break; default: codecFactory = CodecFactory.nullCodec(); break; } }
@Override public void open() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); logger.debug( "Opening data file with pathTmp:{} (final path will be path:{})", pathTmp, path); writer = new ReflectDatumWriter<E>(); dataFileWriter = new DataFileWriter<E>(writer); /* * We may want to expose the codec in the writer and simply rely on the * builder and proper instantiation from dataset-level configuration. * Hard-coding snappy seems a little too draconian. */ if (enableCompression) { dataFileWriter.setCodec(CodecFactory.snappyCodec()); } try { out = fileSystem.create(pathTmp, true); dataFileWriter.create(schema, out); } catch (IOException e) { throw new DatasetWriterException("Unable to create writer to path:" + pathTmp, e); } state = ReaderWriterState.OPEN; }
public static CodecFactory getCodec(String name) { if (name == null || name.equalsIgnoreCase("null")) { return CodecFactory.nullCodec(); } CodecFactory codecFactory; switch (name) { case "snappy": codecFactory = CodecFactory.snappyCodec(); break; case "gzip": case "deflate": codecFactory = CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); break; case "bzip2": codecFactory = CodecFactory.bzip2Codec(); break; case "xz": codecFactory = CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); break; default: codecFactory = CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); break; } return codecFactory; }
protected void dump(LogManager manager, String name, int partition, String group, int limit, String codec, Path output) throws InterruptedException { log.info("Dump record to file: " + output); Schema schema = ReflectData.get().getSchema(Record.class); DatumWriter<Record> datumWriter = new ReflectDatumWriter<>(schema); int count = 0; try (DataFileWriter<Record> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.setCodec(CodecFactory.snappyCodec()); dataFileWriter.create(schema, output.toFile()); try (LogTailer<Record> tailer = getTailer(manager, name, partition, group, codec)) { do { LogRecord<Record> record = tailer.read(Duration.ofMillis(1000)); if (record == null) { break; } dataFileWriter.append(record.message()); count++; } while (limit < 0 || (count < limit)); } } catch (IOException e) { throw new StreamRuntimeException(e); } log.info(String.format("%d record(s) dumped", count)); }
public static CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
protected CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case NONE: return CodecFactory.nullCodec(); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: default: return CodecFactory.snappyCodec(); } }
public static <T> CodecFactory getCodecFactory (ConfigurableFactory<TileSerializer<T>> subFactory) throws ConfigurationException { CodecType codecType = subFactory.getPropertyValue(CODEC_TYPE); switch (codecType) { case Snappy: return CodecFactory.snappyCodec(); case Deflate: int deflateLevel = subFactory.getPropertyValue(DEFLATE_LEVEL); return CodecFactory.deflateCodec(deflateLevel); case None: return CodecFactory.nullCodec(); case BZip2: default: return CodecFactory.bzip2Codec(); } } }
public static void writeToAvro(File inputFile, OutputStream outputStream) throws IOException { DataFileWriter<Stock> writer = //<co id="ch03_avrospecific_comment1"/> new DataFileWriter<Stock>( new SpecificDatumWriter<Stock>()) .setSyncInterval(100); //<co id="ch03_avrospecific_comment2"/> writer.setCodec(CodecFactory.snappyCodec()); //<co id="ch03_avrospecific_comment3"/> writer.create(Stock.SCHEMA$, outputStream); //<co id="ch03_avrospecific_comment4"/> for(String line: FileUtils.readLines(inputFile)) { writer.append(createStock(line)); //<co id="ch03_avrospecific_comment5"/> } IOUtils.closeStream(writer); IOUtils.closeStream(outputStream); }
@Test public void testWriteWithDefaultCodec() throws Exception { AvroIO.Write<String> write = AvroIO.write(String.class).to("/tmp/foo/baz"); assertEquals(CodecFactory.snappyCodec().toString(), write.inner.getCodec().toString()); }
@Test public void testWriteWithCustomCodec() throws Exception { AvroIO.Write<String> write = AvroIO.write(String.class).to("/tmp/foo/baz").withCodec(CodecFactory.snappyCodec()); assertEquals(SNAPPY_CODEC, write.inner.getCodec().toString()); }