Refine search
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
private CodecFactory getCodecFactory(String property) { CodecType type = CodecType.valueOf(property); switch (type) { case BZIP2: return CodecFactory.bzip2Codec(); case DEFLATE: return CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL); case LZO: return CodecFactory.xzCodec(CodecFactory.DEFAULT_XZ_LEVEL); case SNAPPY: return CodecFactory.snappyCodec(); case NONE: default: return CodecFactory.nullCodec(); } }
Codec resolveCodec() { String codecStr = getMetaString(DataFileConstants.CODEC); if (codecStr != null) { return CodecFactory.fromString(codecStr).createInstance(); } else { return CodecFactory.nullCodec().createInstance(); } }
private CodecFactory getCompressionCodec(Map<String, String> conf) { if (getBoolean(conf, CONF_COMPRESS, false)) { int deflateLevel = getInt(conf, CONF_DEFLATE_LEVEL, CodecFactory.DEFAULT_DEFLATE_LEVEL); int xzLevel = getInt(conf, CONF_XZ_LEVEL, CodecFactory.DEFAULT_XZ_LEVEL); String outputCodec = conf.get(CONF_COMPRESS_CODEC); if (DataFileConstants.DEFLATE_CODEC.equals(outputCodec)) { return CodecFactory.deflateCodec(deflateLevel); } else if (DataFileConstants.XZ_CODEC.equals(outputCodec)) { return CodecFactory.xzCodec(xzLevel); } else { return CodecFactory.fromString(outputCodec); } } return CodecFactory.nullCodec(); }
private synchronized DataFileWriter getDataWriterCreateIfNull( final String typeName, final GeoWaveAvroFormatPlugin plugin) { if (!cachedWriters.containsKey(typeName)) { FSDataOutputStream out = null; final DataFileWriter dfw = new DataFileWriter(new GenericDatumWriter()); cachedWriters.put(typeName, dfw); dfw.setCodec(CodecFactory.snappyCodec()); try { // TODO: we should probably clean up the type name to make it // HDFS path safe in case there are invalid characters // also, if a file already exists do we want to delete it or // append to it? out = fs.create(new Path(hdfsBaseDirectory, typeName)); dfw.create(plugin.getAvroSchema(), out); } catch (final IOException e) { LOGGER.error("Unable to create output stream", e); // cache a null value so we don't continually try to recreate cachedWriters.put(typeName, null); return null; } } return cachedWriters.get(typeName); }
public static void main(String[] args) throws Exception { if (args.length < 3 || args.length > 4) { System.out.println("Usage: RandomData <schemafile> <outputfile> <count> [codec]"); System.exit(-1); } Schema sch = new Schema.Parser().parse(new File(args[0])); DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); writer.create(sch, new File(args[1])); try { for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) { writer.append(datum); } } finally { writer.close(); } } }
@Override public void configure(Context context) { int syncIntervalBytes = context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES); String compressionCodec = context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC); writer = new ReflectDatumWriter<T>(getSchema()); dataFileWriter = new DataFileWriter<T>(writer); dataFileWriter.setSyncInterval(syncIntervalBytes); try { CodecFactory codecFactory = CodecFactory.fromString(compressionCodec); dataFileWriter.setCodec(codecFactory); } catch (AvroRuntimeException e) { logger.warn("Unable to instantiate avro codec with name (" + compressionCodec + "). Compression disabled. Exception follows.", e); } }
@Test public void testWriteAndRead() throws IOException { Schema schema = Schema.create(Type.STRING); // Write it DataFileWriter<Utf8> w = new DataFileWriter<>(new GenericDatumWriter<>(schema)); w.setCodec(CodecFactory.deflateCodec(6)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); w.create(schema, baos); w.append(new Utf8("hello world")); w.append(new Utf8("hello moon")); w.sync(); w.append(new Utf8("bye bye world")); w.append(new Utf8("bye bye moon")); w.close(); // Read it DataFileStream<Utf8> r = new DataFileStream<>( new ByteArrayInputStream(baos.toByteArray()), new GenericDatumReader<>(schema)); assertEquals("hello world", r.next().toString()); assertEquals("hello moon", r.next().toString()); assertEquals("bye bye world", r.next().toString()); assertEquals("bye bye moon", r.next().toString()); assertFalse(r.hasNext()); } }
output = Util.createFromFS(lastArg); writer = new DataFileWriter<>( new GenericDatumWriter<>()); ? CodecFactory.fromString(DataFileConstants.NULL_CODEC) : CodecFactory.fromString(codecName); writer.setCodec(codec); for (String key : reader.getMetaKeys()) { if (!DataFileWriter.isReservedMeta(key)) { writer.setMeta(key, reader.getMeta(key));
DataFileWriter<GenericRecord> writer = new DataFileWriter<>( new GenericDatumWriter<>()); Schema schema = null; if (!DataFileWriter.isReservedMeta(key)) { byte[] metadatum = reader.getMeta(key); metadata.put(key, metadatum); writer.setMeta(key, metadatum); inputCodec = DataFileConstants.NULL_CODEC; writer.setCodec(CodecFactory.fromString(inputCodec)); writer.create(schema, output); } else {
@Override public void init(final DataFileStream<GenericRecord> reader, final String codec, final OutputStream out) throws IOException { writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>()); if (transferMetadata) { for (String metaKey : reader.getMetaKeys()) { if (!RESERVED_METADATA.contains(metaKey)) { writer.setMeta(metaKey, reader.getMeta(metaKey)); } } } writer.setCodec(CodecFactory.fromString(codec)); writer.create(reader.getSchema(), out); }
private byte[] initAvroWriter(ProcessSession session, String codec, DataFileStream<GenericRecord> reader, DataFileWriter<GenericRecord> writer, AtomicReference<FlowFile> flowFileRef) { writer.setCodec(CodecFactory.fromString(codec)); // Transfer metadata (this is a subset of the incoming file) for (String metaKey : reader.getMetaKeys()) { if (!RESERVED_METADATA.contains(metaKey)) { writer.setMeta(metaKey, reader.getMeta(metaKey)); } } final ByteArrayOutputStream avroHeader = new ByteArrayOutputStream(); flowFileRef.set(session.append(flowFileRef.get(), (out) -> { // Create writer so that records can be appended later. writer.create(reader.getSchema(), avroHeader); writer.close(); final byte[] header = avroHeader.toByteArray(); out.write(header); })); // Capture the Avro header byte array that is just written to the FlowFile. // This is needed when Avro records are appended to the same FlowFile. return avroHeader.toByteArray(); }
static CodecFactory codecFactory(OptionSet opts, OptionSpec<String> codec, OptionSpec<Integer> level, String defaultCodec) { String codecName = opts.hasArgument(codec) ? codec.value(opts) : defaultCodec; if(codecName.equals(DEFLATE_CODEC)) { return CodecFactory.deflateCodec(level.value(opts)); } else if(codecName.equals(DataFileConstants.XZ_CODEC)) { return CodecFactory.xzCodec(level.value(opts)); } else { return CodecFactory.fromString(codec.value(opts)); } } }
public VariantAvroWriter(Schema schema, String codecName, OutputStream outputStream) { this.schema = schema; this.outputStream = outputStream; if (codecName == null || codecName.isEmpty()) { codecName = "null"; } else { codecName = codecName.replace("gzip", "deflate"); } this.codecName = codecName; datumWriter = new SpecificDatumWriter<>(); writer = new DataFileWriter<>(datumWriter); writer.setCodec(CodecFactory.fromString(this.codecName)); // writer.setCodec(CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL)); }
static <T> void configureDataFileWriter(DataFileWriter<T> writer, TaskAttemptContext job,String codecName,int deflateLevel) throws UnsupportedEncodingException { Configuration conf = job.getConfiguration(); if (FileOutputFormat.getCompressOutput(job)) { CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(deflateLevel) : CodecFactory.fromString(codecName); writer.setCodec(factory); } writer.setSyncInterval(conf.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : conf) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1") .getBytes("ISO-8859-1")); } }
/** Set the compression codec. */ public Options withCodec(String codec) { this.codec = CodecFactory.fromString(codec); return this; }
@Parameters public static List<Object[]> codecs() { List<Object[]> r = new ArrayList<>(); r.add(new Object[] { null }); r.add(new Object[] { CodecFactory.deflateCodec(0) }); r.add(new Object[] { CodecFactory.deflateCodec(1) }); r.add(new Object[] { CodecFactory.deflateCodec(9) }); r.add(new Object[] { CodecFactory.nullCodec() }); r.add(new Object[] { CodecFactory.snappyCodec() }); r.add(new Object[] { CodecFactory.xzCodec(0) }); r.add(new Object[] { CodecFactory.xzCodec(1) }); r.add(new Object[] { CodecFactory.xzCodec(6) }); r.add(new Object[] { CodecFactory.zstandardCodec() }); return r; }
/** * Creates a {@link CodecFactory} based on the specified codec name and deflate level. If codecName is absent, then * a {@link CodecFactory#deflateCodec(int)} is returned. Otherwise the codecName is converted into a * {@link CodecFactory} via the {@link CodecFactory#fromString(String)} method. * * @param codecName the name of the codec to use (e.g. deflate, snappy, xz, etc.). * @param deflateLevel must be an integer from [0-9], and is only applicable if the codecName is "deflate". * @return a {@link CodecFactory}. */ public static CodecFactory getCodecFactory(Optional<String> codecName, Optional<String> deflateLevel) { if (!codecName.isPresent()) { return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL); } else if (codecName.get().equalsIgnoreCase(DataFileConstants.DEFLATE_CODEC)) { if (!deflateLevel.isPresent()) { return CodecFactory.deflateCodec(ConfigurationKeys.DEFAULT_DEFLATE_LEVEL); } return CodecFactory.deflateCodec(Integer.parseInt(deflateLevel.get())); } else { return CodecFactory.fromString(codecName.get().toLowerCase()); } }
/** Open a writer appending to an existing file. * <strong>Since 1.9.0 this method does not close in.</strong> * @param in reading the existing file. * @param out positioned at the end of the existing file. */ public DataFileWriter<D> appendTo(SeekableInput in, OutputStream out) throws IOException { assertNotOpen(); DataFileReader<D> reader = new DataFileReader<>(in, new GenericDatumReader<>()); this.schema = reader.getSchema(); this.sync = reader.getHeader().sync; this.meta.putAll(reader.getHeader().meta); byte[] codecBytes = this.meta.get(DataFileConstants.CODEC); if (codecBytes != null) { String strCodec = new String(codecBytes, "UTF-8"); this.codec = CodecFactory.fromString(strCodec).createInstance(); } else { this.codec = CodecFactory.nullCodec().createInstance(); } init(out); return this; }
@Parameters public static List<Object[]> codecs() { List<Object[]> r = new ArrayList<>(); r.add(new Object[] { null , null, false}); r.add(new Object[] { null , null, true}); r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), false }); r.add(new Object[] { CodecFactory.deflateCodec(1), CodecFactory.deflateCodec(6), true }); r.add(new Object[] { CodecFactory.deflateCodec(3), CodecFactory.nullCodec(), false }); r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.deflateCodec(6), false }); r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), false }); r.add(new Object[] { CodecFactory.xzCodec(1), CodecFactory.xzCodec(2), true }); r.add(new Object[] { CodecFactory.xzCodec(2), CodecFactory.nullCodec(), false }); r.add(new Object[] { CodecFactory.nullCodec(), CodecFactory.xzCodec(2), false }); return r; }