Refine search
public FsAuditSink(Config config, ValueAuditRuntimeMetadata auditMetadata) throws IOException { this.auditDirPath = new Path(ConfigUtils.getString(config, FS_SINK_AUDIT_OUTPUT_PATH_KEY, FS_SINK_AUDIT_OUTPUT_DEFAULT_PATH)); this.fs = this.auditDirPath.getFileSystem(new Configuration()); this.auditMetadata = auditMetadata; this.auditFileOutputStream = closer.register(fs.create(getAuditFilePath())); DataFileWriter<GenericRecord> dataFileWriter = this.closer.register(new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())); this.writer = this.closer.register(dataFileWriter.create(this.auditMetadata.getTableMetadata().getTableSchema(), this.auditFileOutputStream)); }
private static byte[] convertRecordToAvro(Schema schema, Map<String, Object> values) { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); GenericData.Record record = new GenericData.Record(schema); values.forEach(record::put); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { dataFileWriter.create(schema, outputStream); dataFileWriter.append(record); dataFileWriter.close(); } catch (IOException e) { throw new UncheckedIOException("Failed to convert to Avro.", e); } return outputStream.toByteArray(); }
@Override public org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties properties, Progressable progressable) throws IOException { Schema schema; try { schema = AvroSerdeUtils.determineSchemaOrThrowException(jobConf, properties); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(schema); DataFileWriter<GenericRecord> dfw = new DataFileWriter<GenericRecord>(gdw); if (isCompressed) { int level = jobConf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); String codecName = jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC); CodecFactory factory = codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName); dfw.setCodec(factory); } dfw.create(schema, path.getFileSystem(jobConf).create(path)); return new AvroGenericRecordWriter(dfw); }
public AvroGenericRecordHDFSWriter(FileRotationPolicy policy, Path path, FSDataOutputStream stream, Schema schema) throws IOException { super(policy, path); this.out = stream; this.schema = schema; DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); avroWriter = new DataFileWriter<>(datumWriter); avroWriter.create(this.schema, this.out); }
/** * Create a new {@link DataFileWriter} for writing Avro records. * * @param codecFactory a {@link CodecFactory} object for building the compression codec * @throws IOException if there is something wrong creating a new {@link DataFileWriter} */ private DataFileWriter<GenericRecord> createDataFileWriter(CodecFactory codecFactory) throws IOException { @SuppressWarnings("resource") DataFileWriter<GenericRecord> writer = new DataFileWriter<>(this.datumWriter); writer.setCodec(codecFactory); // Open the file and return the DataFileWriter return writer.create(this.schema, this.stagingFileOutputStream); }
private void createAvroFile() throws IOException { Path inputPath = new Path(INPUT_PATH); FileSystem fs = FileSystem.get(new Configuration()); fs.delete(inputPath, true); DataFileWriter<User> fileWriter = new DataFileWriter<>(new GenericDatumWriter<User>(User.SCHEMA)); fileWriter.create(User.SCHEMA, fs.create(new Path(inputPath, "file.avro"))); IntStream.range(0, 100) .mapToObj(i -> new User("name" + i, "pass" + i, i, i % 2 == 0)) .forEach(user -> Util.uncheckRun(() -> fileWriter.append(user))); fileWriter.close(); fs.close(); }
FileSystem fileSystem = options.getPath().getFileSystem(options.getConfiguration()); if (!fileSystem.mkdirs(options.getPath())) { throw new IOException( "Unable to create directory for SortedKeyValueFile: " + options.getPath()); Path dataFilePath = new Path(options.getPath(), DATA_FILENAME); LOG.debug("Creating writer for avro data file: " + dataFilePath); mRecordSchema = AvroKeyValue.getSchema(mKeySchema, mValueSchema); DatumWriter<GenericRecord> datumWriter = model.createDatumWriter(mRecordSchema); OutputStream dataOutputStream = fileSystem.create(dataFilePath); mDataFileWriter = new DataFileWriter<>(datumWriter) .setSyncInterval(1 << 20) // Set the auto-sync interval sufficiently large, since .setCodec(options.getCodec()) .create(mRecordSchema, dataOutputStream); Path indexFilePath = new Path(options.getPath(), INDEX_FILENAME); LOG.debug("Creating writer for avro index file: " + indexFilePath); mIndexSchema = AvroKeyValue.getSchema(mKeySchema, Schema.create(Schema.Type.LONG)); DatumWriter<GenericRecord> indexWriter = model.createDatumWriter(mIndexSchema); OutputStream indexOutputStream = fileSystem.create(indexFilePath); mIndexFileWriter = new DataFileWriter<>(indexWriter) .create(mIndexSchema, indexOutputStream);
public void setUpThread(Path intermDir, JobMetadata metadata) { if (intermDir == null) { return; } inProgressHistFile = new Path(intermDir, HistoryFileUtils.generateFileName(metadata)); try { out = myFs.create(inProgressHistFile); dataFileWriter.create(Event.SCHEMA$, out); } catch (IOException e) { LOG.error("Failed to set up writer", e); } }
AvroKeyValueWriter(Schema keySchema, Schema valueSchema, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue .getSchema(keySchema, valueSchema); // Create an Avro container file and a writer to it. DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>( mKeyValuePairSchema); mAvroFileWriter = new DataFileWriter<GenericRecord>( genericDatumWriter); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Create a reusable output record. mOutputRecord = new AvroKeyValue<Object, Object>( new GenericData.Record(mKeyValuePairSchema)); }
@Override public void open() throws IOException { writer = new ReflectDatumWriter<E>(); dataFileWriter = new DataFileWriter<E>(writer); if (enableCompression) { dataFileWriter.setCodec(getCodecFactory()); } out = fileSystem.create(path, true); dataFileWriter.create(schema, out); }
@BeforeClass public static void writeSampleFile() throws IOException { sampleFile = new File(DIR.getRoot(), TestDataFileTools.class.getName() + ".avro"); schema = Schema.create(Type.INT); schemaFile = new File(DIR.getRoot(), "schema-temp.schema"); try(FileWriter fw = new FileWriter(schemaFile)) { fw.append(schema.toString()); } StringBuilder builder = new StringBuilder(); try(DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { writer.setMeta(KEY_NEEDING_ESCAPES, ""); writer.create(schema, sampleFile); for (int i = 0; i < COUNT; ++i) { builder.append(Integer.toString(i)); builder.append("\n"); writer.append(i); } } jsonData = builder.toString(); }
@Test public void testWriteAndRead() throws IOException { Schema schema = Schema.create(Type.STRING); // Write it DataFileWriter<Utf8> w = new DataFileWriter<>(new GenericDatumWriter<>(schema)); w.setCodec(CodecFactory.deflateCodec(6)); ByteArrayOutputStream baos = new ByteArrayOutputStream(); w.create(schema, baos); w.append(new Utf8("hello world")); w.append(new Utf8("hello moon")); w.sync(); w.append(new Utf8("bye bye world")); w.append(new Utf8("bye bye moon")); w.close(); // Read it DataFileStream<Utf8> r = new DataFileStream<>( new ByteArrayInputStream(baos.toByteArray()), new GenericDatumReader<>(schema)); assertEquals("hello world", r.next().toString()); assertEquals("hello moon", r.next().toString()); assertEquals("bye bye world", r.next().toString()); assertEquals("bye bye moon", r.next().toString()); assertFalse(r.hasNext()); } }
private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException { Schema s = Schema.parse(schemaToUse); GenericData.Record record = new GenericData.Record(s); GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema()); innerRecord.put("int1", 42); innerRecord.put("boolean1", true); innerRecord.put("long1", 42432234234l); if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) { innerRecord.put("string1", "new value"); } record.put("aRecord", innerRecord); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s); ByteArrayOutputStream out = new ByteArrayOutputStream(); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter); dataFileWriter.create(s, out); dataFileWriter.append(record); dataFileWriter.close(); byte[] data = out.toByteArray(); out.close(); return data; }
@Test(expected=AvroRuntimeException.class) public void testUseMetaAfterCreate() throws IOException { DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>()); w.create(Schema.create(Type.NULL), new ByteArrayOutputStream()); w.setMeta("foo", "bar"); }
public static void main(String[] args) throws Exception { if (args.length < 3 || args.length > 4) { System.out.println("Usage: RandomData <schemafile> <outputfile> <count> [codec]"); System.exit(-1); } Schema sch = new Schema.Parser().parse(new File(args[0])); DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.setCodec(CodecFactory.fromString(args.length >= 4 ? args[3] : "null")); writer.create(sch, new File(args[1])); try { for (Object datum : new RandomData(sch, Integer.parseInt(args[2]))) { writer.append(datum); } } finally { writer.close(); } } }
private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception { File inputFile = new File(INPUT_DIR.getRoot(), file); Schema schema = Schema.create(type); try (DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { for (Entry<String, String> metadatum : metadata.entrySet()) { writer.setMeta(metadatum.getKey(), metadatum.getValue()); } writer.setCodec(codec); writer.create(schema, inputFile); for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) { writer.append(aDatum(type, i)); } } return inputFile; }
private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception { File inputFile = new File(DIR.getRoot(), file); inputFile.deleteOnExit(); Schema schema = null; if(type.equals(Schema.Type.INT)) { schema = INTSCHEMA; } if(type.equals(Schema.Type.STRING)) { schema = STRINGSCHEMA; } DataFileWriter<Object> writer = new DataFileWriter<>( new GenericDatumWriter<>(schema)); for(Entry<String, String> metadatum : metadata.entrySet()) { writer.setMeta(metadatum.getKey(), metadatum.getValue()); } writer.setCodec(codec); writer.create(schema, inputFile); for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) { writer.append(aDatum(type, i)); } writer.close(); return inputFile; }
DataFileWriter<GenericRecord> writer = new DataFileWriter<>( new GenericDatumWriter<>()); Schema schema = null; if (!DataFileWriter.isReservedMeta(key)) { byte[] metadatum = reader.getMeta(key); metadata.put(key, metadatum); writer.setMeta(key, metadatum); inputCodec = DataFileConstants.NULL_CODEC; writer.setCodec(CodecFactory.fromString(inputCodec)); writer.create(schema, output); } else { if (!schema.equals(reader.getSchema())) { err.println("input files have different schemas"); reader.close(); if (!DataFileWriter.isReservedMeta(key)) { byte[] metadatum = reader.getMeta(key); byte[] writersMetadatum = metadata.get(key); writer.appendAllFrom(reader, /*recompress*/ false); reader.close(); writer.close(); return 0;
RandomData data1 = new RandomData(SCHEMA, COUNT, SEED); RandomData data2 = new RandomData(SCHEMA, COUNT, SEED+1); File file1 = makeFile((codec == null ? "null" : codec.toString()) + "-A"); File file2 = makeFile((codec2 == null ? "null" : codec2.toString()) + "-B"); DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()) .setSyncInterval(syncInterval); if (codec != null) { writer.setCodec(codec); writer.create(SCHEMA, file1); try { for (Object datum : data1) { writer.append(datum); writer.close(); new DataFileWriter<>(new GenericDatumWriter<>()) .setSyncInterval(syncInterval); if (codec2 != null) { writer2.setCodec(codec2); writer2.create(SCHEMA, file2); try { for (Object datum : data2) { writer2.append(datum); writer2.close();
output = Util.createFromFS(lastArg); writer = new DataFileWriter<>( new GenericDatumWriter<>()); ? CodecFactory.fromString(DataFileConstants.NULL_CODEC) : CodecFactory.fromString(codecName); writer.setCodec(codec); for (String key : reader.getMetaKeys()) { if (!DataFileWriter.isReservedMeta(key)) { writer.setMeta(key, reader.getMeta(key)); writer.create(schema, output); System.out.println(totalCopied + " records written."); writer.flush(); writer.close(); Util.close(out); return 0;