/** Set a metadata property. */ public DataFileWriter<D> setMeta(String key, String value) { return setMeta(key, value.getBytes(StandardCharsets.UTF_8)); } /** Set a metadata property. */
/** Set a metadata property. */ public DataFileWriter<D> setMeta(String key, long value) { return setMeta(key, Long.toString(value)); }
/** Set a metadata property. */ public DataFileWriter<D> setMeta(String key, long value) { return setMeta(key, Long.toString(value)); }
/** Set a metadata property. */ public DataFileWriter<D> setMeta(String key, String value) { try { return setMeta(key, value.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } /** Set a metadata property. */
static <T> void configureDataFileWriter(DataFileWriter<T> writer, JobConf job) throws UnsupportedEncodingException { CodecFactory factory = getCodecFactory(job); if (factory != null) { writer.setCodec(factory); } writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL)); // copy metadata from job for (Map.Entry<String,String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), StandardCharsets.ISO_8859_1.name()) .getBytes(StandardCharsets.ISO_8859_1)); } }
private byte[] initAvroWriter(ProcessSession session, String codec, DataFileStream<GenericRecord> reader, DataFileWriter<GenericRecord> writer, AtomicReference<FlowFile> flowFileRef) { writer.setCodec(CodecFactory.fromString(codec)); // Transfer metadata (this is a subset of the incoming file) for (String metaKey : reader.getMetaKeys()) { if (!RESERVED_METADATA.contains(metaKey)) { writer.setMeta(metaKey, reader.getMeta(metaKey)); } } final ByteArrayOutputStream avroHeader = new ByteArrayOutputStream(); flowFileRef.set(session.append(flowFileRef.get(), (out) -> { // Create writer so that records can be appended later. writer.create(reader.getSchema(), avroHeader); writer.close(); final byte[] header = avroHeader.toByteArray(); out.write(header); })); // Capture the Avro header byte array that is just written to the FlowFile. // This is needed when Avro records are appended to the same FlowFile. return avroHeader.toByteArray(); }
@Override public void init(final DataFileStream<GenericRecord> reader, final String codec, final OutputStream out) throws IOException { writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>()); if (transferMetadata) { for (String metaKey : reader.getMetaKeys()) { if (!RESERVED_METADATA.contains(metaKey)) { writer.setMeta(metaKey, reader.getMeta(metaKey)); } } } writer.setCodec(CodecFactory.fromString(codec)); writer.create(reader.getSchema(), out); }
@Test(expected=AvroRuntimeException.class) public void testUseReservedMeta() { DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>()); w.setMeta("avro.foo", "bar"); }
if (!key.startsWith("avro.")) { byte[] val = fileReader.getMeta(key); fileWriter.setMeta(key, val);
@Test(expected=AvroRuntimeException.class) public void testUseMetaAfterCreate() throws IOException { DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>()); w.create(Schema.create(Type.NULL), new ByteArrayOutputStream()); w.setMeta("foo", "bar"); }
for (String key : reader.getMetaKeys()) { if (!DataFileWriter.isReservedMeta(key)) { writer.setMeta(key, reader.getMeta(key));
@BeforeClass public static void writeSampleFile() throws IOException { sampleFile = new File(DIR.getRoot(), TestDataFileTools.class.getName() + ".avro"); schema = Schema.create(Type.INT); schemaFile = new File(DIR.getRoot(), "schema-temp.schema"); try(FileWriter fw = new FileWriter(schemaFile)) { fw.append(schema.toString()); } StringBuilder builder = new StringBuilder(); try(DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { writer.setMeta(KEY_NEEDING_ESCAPES, ""); writer.create(schema, sampleFile); for (int i = 0; i < COUNT; ++i) { builder.append(Integer.toString(i)); builder.append("\n"); writer.append(i); } } jsonData = builder.toString(); }
@Test() public void testUseMeta() throws IOException { DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>()); File f = new File(DIR.getRoot().getPath(), "testDataFileMeta.avro"); w.setMeta("hello", "bar"); w.create(Schema.create(Type.NULL), f); w.close(); DataFileStream<Void> r = new DataFileStream<>(new FileInputStream(f), new GenericDatumReader<>()); assertTrue(r.getMetaKeys().contains("hello")); assertEquals("bar", r.getMetaString("hello")); }
private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception { File inputFile = new File(INPUT_DIR.getRoot(), file); Schema schema = Schema.create(type); try (DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { for (Entry<String, String> metadatum : metadata.entrySet()) { writer.setMeta(metadatum.getKey(), metadatum.getValue()); } writer.setCodec(codec); writer.create(schema, inputFile); for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) { writer.append(aDatum(type, i)); } } return inputFile; }
byte[] metadatum = reader.getMeta(key); metadata.put(key, metadatum); writer.setMeta(key, metadatum);
writer.setMeta("file", target); writer.create(TransferStateFileMeta.SCHEMA$, trackerFile); reader = new DataFileReader<TransferStateFileMeta>(trackerFile, din);
private File generateData(String file, Type type, Map<String, String> metadata, CodecFactory codec) throws Exception { File inputFile = new File(DIR.getRoot(), file); inputFile.deleteOnExit(); Schema schema = null; if(type.equals(Schema.Type.INT)) { schema = INTSCHEMA; } if(type.equals(Schema.Type.STRING)) { schema = STRINGSCHEMA; } DataFileWriter<Object> writer = new DataFileWriter<>( new GenericDatumWriter<>(schema)); for(Entry<String, String> metadatum : metadata.entrySet()) { writer.setMeta(metadatum.getKey(), metadatum.getValue()); } writer.setCodec(codec); writer.create(schema, inputFile); for (int i = 0; i < ROWS_IN_INPUT_FILES; i++) { writer.append(aDatum(type, i)); } writer.close(); return inputFile; }
for (String key : reader.getMetaKeys()) { if (!DataFileWriter.isReservedMeta(key)) { writer.setMeta(key, reader.getMeta(key));
DataFileWriter<String> writer = new DataFileWriter<>( new GenericDatumWriter<String>(schema)) .setMeta(metaKey, metaValue) .create(schema, inputFile);
/** Set a metadata property. */ public DataFileWriter<D> setMeta(String key, String value) { try { return setMeta(key, value.getBytes("UTF-8")); } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } /** Set a metadata property. */