/** Open a reader for a file. */ public static <D> FileReader<D> openReader(File file, DatumReader<D> reader) throws IOException { return openReader(new SeekableFileInput(file), reader); }
public AvroAsTextRecordReader(JobConf job, FileSplit split) throws IOException { this(DataFileReader.openReader (new FsInput(split.getPath(), job), new GenericDatumReader<>()), split); }
public AvroFileInputStream(FileStatus status) throws IOException { pos = 0; buffer = new byte[0]; GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); FileContext fc = FileContext.getFileContext(new Configuration()); fileReader = DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader); Schema schema = fileReader.getSchema(); writer = new GenericDatumWriter<Object>(schema); output = new ByteArrayOutputStream(); encoder = EncoderFactory.get().jsonEncoder(schema, output); }
private FileReader<GenericRecord> openFile(State state) throws Exception { DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(new File(TestConstants.TEST_FILE_NAME), reader); return fileReader; } }
/** Open a reader for a file. */ public static <D> FileReader<D> openReader(File file, DatumReader<D> reader) throws IOException { SeekableFileInput input = new SeekableFileInput( file ); try { return openReader( input, reader ); } catch ( final Throwable e ) { IOUtils.closeQuietly( input ); throw e; } }
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
static void readFile(File f, DatumReader<? extends Object> datumReader) throws IOException { FileReader<? extends Object> reader = DataFileReader.openReader(f, datumReader); for (Object datum : reader) { assertNotNull(datum); } }
private <T extends Object> void readFiles(DatumReaderProvider<T> provider) throws IOException { for (File f : Objects.requireNonNull(DATAFILE_DIR.listFiles())) { System.out.println("Reading: " + f.getName()); try(FileReader<? extends Object> reader = DataFileReader.openReader(f, provider.get())) { for (Object datum : reader) { Assert.assertNotNull(datum); } } } }
public static List<GenericRecord> getRecordFromFile(String path) throws IOException { Configuration config = new Configuration(); SeekableInput input = new FsInput(new Path(path), config); DatumReader<GenericRecord> reader1 = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1); List<GenericRecord> records = new ArrayList<>(); for (GenericRecord datum : fileReader) { records.add(datum); } fileReader.close(); return records; }
public static List<GenericRecord> getRecordFromFile(String path) throws IOException { Configuration config = new Configuration(); SeekableInput input = new FsInput(new Path(path), config); DatumReader<GenericRecord> reader1 = new GenericDatumReader<>(); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader1); List<GenericRecord> records = new ArrayList<>(); for (GenericRecord datum : fileReader) { records.add(datum); } fileReader.close(); return records; }
public AvroRecordReader(JobConf job, FileSplit split) throws IOException { this(DataFileReader.openReader (new FsInput(split.getPath(), job), AvroJob.createInputDataModel(job) .createDatumReader(AvroJob.getInputSchema(job))), split); }
private X readOneXFromAvro(Schema schema, ByteArrayOutputStream bout) throws IOException { SeekableByteArrayInput input = new SeekableByteArrayInput(bout.toByteArray()); ReflectDatumReader<X> datumReader = new ReflectDatumReader<>(schema); FileReader<X> reader = DataFileReader.openReader(input, datumReader); Iterator<X> it = reader.iterator(); assertTrue("missing first record",it.hasNext()); X record = it.next(); assertFalse("should be no more records - only wrote one out",it.hasNext()); return record; }
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException { Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*"); FileStatus[] statuses = fileSystem.globStatus(logsPath); List<TestLogData> resultTestLogs = new ArrayList<>(); Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString()); for (FileStatus status : statuses) { FileReader<GenericRecord> fileReader = null; try { SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf()); DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema); fileReader = DataFileReader.openReader(input, datumReader); for (GenericRecord record : fileReader) { RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD); Assert.assertEquals(header, recordHeader); TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD); resultTestLogs.add(recordData); } } finally { IOUtils.closeQuietly(fileReader); } } Assert.assertEquals(testLogs, resultTestLogs); }
dataFileReader = DataFileReader.openReader(sbai, datumReader, fakeHeader, true); Schema schema = dataFileReader.getSchema(); GenericRecord gr = new GenericData.Record(schema);
public void testReadWithHeader() throws IOException { File file = makeFile(); DataFileReader<Object> reader = new DataFileReader<>(file, new GenericDatumReader<>()); // get a header for this file DataFileStream.Header header = reader.getHeader(); // re-open to an arbitrary position near the middle, with sync == true SeekableFileInput sin = new SeekableFileInput(file); sin.seek(sin.length() / 2); reader = DataFileReader.openReader(sin, new GenericDatumReader<>(), header, true); assertNotNull("Should be able to reopen from arbitrary point", reader.next()); long validPos = reader.previousSync(); // post sync, we know of a valid sync point: re-open with seek (sync == false) sin.seek(validPos); reader = DataFileReader.openReader(sin, new GenericDatumReader<>(), header, false); assertEquals("Should not move from sync point on reopen", validPos, sin.tell()); assertNotNull("Should be able to reopen at sync point", reader.next()); }
DatumReader<GenericData.Record> datumReader = new GenericDatumReader<>(userSchema); try (FileReader<GenericData.Record> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) { User rec = dataFileReader.next();
@Test public void testSerialization() throws Exception { Schema testSchema = getTestSchema(); GenericRecord message = new Record(testSchema); message.put("name", "testValue"); byte[] data = getSerializedMessage(message, testSchema); GenericDatumReader<IndexedRecord> reader = new GenericDatumReader<>(testSchema); SeekableInput in = new SeekableByteArrayInput(data); FileReader<IndexedRecord> dfr = null; IndexedRecord result = null; try { dfr = DataFileReader.openReader(in, reader); result = dfr.next(); } finally { if (dfr != null) { dfr.close(); } } Assert.assertNotNull(result); Assert.assertTrue(result instanceof GenericRecord); Assert.assertEquals(new Utf8("testValue"), ((GenericRecord) result).get("name")); } }
DatumReader<GenericRecord> indexReader = new GenericDatumReader<>( AvroKeyValue.getSchema(options.getKeySchema(), Schema.create(Schema.Type.LONG))); FileReader<GenericRecord> indexFileReader = DataFileReader.openReader(indexFile, indexReader);
public static <T extends SpecificRecordBase> T deserializeAvroMetadata(byte[] bytes, Class<T> clazz) throws IOException { DatumReader<T> reader = new SpecificDatumReader<>(clazz); FileReader<T> fileReader = DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader); Preconditions .checkArgument(fileReader.hasNext(), "Could not deserialize metadata of type " + clazz); return fileReader.next(); } }