static ColumnFileMetaData read(InputBuffer in) throws IOException { ColumnFileMetaData result = new ColumnFileMetaData(); MetaData.read(in, result); return result; }
private ColumnFileMetaData createFileMeta() { return new ColumnFileMetaData() .setCodec(codec) .setChecksum(checksum); }
/** Construct a reader for a file. */ public AvroColumnReader(Params params) throws IOException { this.reader = new ColumnFileReader(params.input); this.model = params.model; this.fileSchema = Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY)); this.readSchema = params.schema == null ? fileSchema : params.schema; initialize(); }
static ColumnFileMetaData filterMetadata(final JobConf job) { final ColumnFileMetaData meta = new ColumnFileMetaData(); for (Map.Entry<String,String> e : job) if (e.getKey().startsWith(META_PREFIX)) meta.put(e.getKey().substring(META_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8)); return meta; }
AvroColumnWriter<Object> writer = new AvroColumnWriter<>(reader.getSchema(), new ColumnFileMetaData() .setCodec(codec.value(opts))); for (Object datum : reader) writer.write(datum);
private void readHeader() throws IOException { InputBuffer in = new InputBuffer(file, 0); readMagic(in); this.rowCount = in.readFixed64(); this.columnCount = in.readFixed32(); this.metaData = ColumnFileMetaData.read(in); this.columnsByName = new HashMap<>(columnCount); columns = new ColumnDescriptor[columnCount]; readColumnMetaData(in); readColumnStarts(in); }
static ColumnFileMetaData filterMetadata(final Configuration configuration) { final ColumnFileMetaData meta = new ColumnFileMetaData(); Iterator<Entry<String, String>> keyIterator = configuration.iterator(); while (keyIterator.hasNext()) { Entry<String, String> confEntry = keyIterator.next(); if (confEntry.getKey().startsWith(META_PREFIX)) meta.put(confEntry.getKey().substring(META_PREFIX.length()), confEntry .getValue().getBytes(MetaData.UTF8)); } return meta; } }
AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(reader.getSchema(), new ColumnFileMetaData() .setCodec(codec.value(opts))); for (Object datum : reader) writer.write(datum);
private void readHeader() throws IOException { InputBuffer in = new InputBuffer(file, 0); readMagic(in); this.rowCount = in.readFixed64(); this.columnCount = in.readFixed32(); this.metaData = ColumnFileMetaData.read(in); this.columnsByName = new HashMap<String,ColumnDescriptor>(columnCount); columns = new ColumnDescriptor[columnCount]; readColumnMetaData(in); readColumnStarts(in); }
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = new Schema.Parser().parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }
private ColumnFileMetaData createFileMeta() { return new ColumnFileMetaData() .setCodec("null") .setChecksum("null"); }
: AvroJob.getOutputSchema(job); final ColumnFileMetaData meta = new ColumnFileMetaData(); for (Map.Entry<String,String> e : job) if (e.getKey().startsWith(META_PREFIX)) meta.put(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue().getBytes(MetaData.UTF8));
/** Construct a reader for a file. */ public AvroColumnReader(Params params) throws IOException { this.reader = new ColumnFileReader(params.input); this.model = params.model; this.fileSchema = Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY)); this.readSchema = params.schema == null ? fileSchema : params.schema; initialize(); }
private void checkWrite(Schema schema) throws IOException { AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); int count = 0; for (Object datum : new RandomData(schema, COUNT, SEED)) { //System.out.println("datum="+datum); writer.write(datum); } writer.writeTo(FILE); }
private ColumnFileMetaData createFileMeta() { return new ColumnFileMetaData() .setCodec(codec) .setChecksum(checksum); }
private void runCase(File dir) throws Exception { Schema schema = Schema.parse(new File(dir, "input.avsc")); List<Object> data = fromJson(schema, new File(dir, "input.json")); // write full data AvroColumnWriter<Object> writer = new AvroColumnWriter<>(schema, new ColumnFileMetaData()); for (Object datum : data) writer.write(datum); writer.writeTo(FILE); // test that the full schema reads correctly checkRead(schema, data); // test that sub-schemas read correctly for (File f : dir.listFiles()) if (f.isDirectory() && !f.getName().startsWith(".")) { Schema s = Schema.parse(new File(f, "sub.avsc")); checkRead(s, fromJson(s, new File(f, "sub.json"))); } }
@Test public void testTrevniEvolvedRead() throws IOException { AvroColumnWriter<GenericRecord> acw = new AvroColumnWriter<>(writer, new ColumnFileMetaData()); acw.write(writtenRecord); File serializedTrevni = File.createTempFile("trevni", null); acw.writeTo(serializedTrevni); AvroColumnReader.Params params = new Params(serializedTrevni); params.setSchema(evolved); AvroColumnReader<GenericRecord> acr = new AvroColumnReader<>(params); GenericRecord readRecord = acr.next(); Assert.assertEquals(evolvedRecord, readRecord); Assert.assertFalse(acr.hasNext()); }
static ColumnFileMetaData read(InputBuffer in) throws IOException { ColumnFileMetaData result = new ColumnFileMetaData(); MetaData.read(in, result); return result; }
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = Schema.parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }
@Override public int run(InputStream stdin, PrintStream out, PrintStream err, List<String> args) throws Exception { if (args.size() != 3) { err.println("Usage: schemaFile count outputFile"); return 1; } File schemaFile = new File(args.get(0)); int count = Integer.parseInt(args.get(1)); File outputFile = new File(args.get(2)); Schema schema = Schema.parse(schemaFile); AvroColumnWriter<Object> writer = new AvroColumnWriter<Object>(schema, new ColumnFileMetaData()); for (Object datum : new RandomData(schema, count)) writer.write(datum); writer.writeTo(outputFile); return 0; } }