/** {@inheritDoc} */ @Override public void close() throws IOException { if (null != mAvroFileReader) { try { mAvroFileReader.close(); } finally { mAvroFileReader = null; } } }
/** {@inheritDoc} */ @Override public void close() throws IOException { mDataFileReader.close(); }
@Override public void close() throws IOException { reader.close(); }
public void close() throws IOException { reader.close(); }
@Override public void close() { try { this.dataFileReader.close(); } catch (IOException ioe) { // ignored } }
@Override public void close() throws IOException { if (isOpen) { writer.close(); reader.close(); isOpen = false; } }
/** Reads and returns the first datum in a data file. */ static Object datumFromFile(Schema schema, String file) throws IOException { DataFileReader<Object> in = new DataFileReader<>(new File(file), new GenericDatumReader<>(schema)); try { return in.next(); } finally { in.close(); } }
public Schema getAvroSchema(String file) throws FileBasedHelperException { DataFileReader<GenericRecord> dfr = null; try { if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER, ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) { dfr = new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()), new GenericDatumReader<GenericRecord>()); } else { dfr = new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()), new GenericDatumReader<GenericRecord>()); } return dfr.getSchema(); } catch (IOException e) { throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e); } finally { if (dfr != null) { try { dfr.close(); } catch (IOException e) { LOGGER.error("Failed to close avro file " + file, e); } } } }
static <T> T runOnPreview(byte[] bits, AvroPreviewProcessor<T> processor) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); SeekableByteArrayInput sbai = new SeekableByteArrayInput(bits); DataFileReader<GenericRecord> dataFileReader = null; try { dataFileReader = new DataFileReader<>(sbai, datumReader); int headerLen = (int) dataFileReader.previousSync(); byte[] header = Arrays.copyOf(bits, headerLen); if (dataFileReader.hasNext()) { GenericRecord gr = dataFileReader.next(); return processor.process(header, gr, dataFileReader.getBlockCount(), dataFileReader.getBlockSize()); } else { throw new RuntimeException("Empty Avro file - cannot run preview! "); } } finally { try { if (dataFileReader!=null) dataFileReader.close(); } catch (IOException safeToIgnore) {} } }
private void check(String... extraArgs) throws Exception { ArrayList<String> args = new ArrayList<>(); args.addAll(Arrays.asList(new String[] { OUT_FILE.toString(), "--count", COUNT, "--schema-file", SCHEMA_FILE.toString(), "--seed", Long.toString(SEED) })); args.addAll(Arrays.asList(extraArgs)); run(args); DataFileReader<Object> reader = new DataFileReader<Object>(OUT_FILE, new GenericDatumReader<>()); Iterator<Object> found = reader.iterator(); for (Object expected : new RandomData(schemaParser.parse(SCHEMA_FILE), Integer.parseInt(COUNT), SEED)) assertEquals(expected, found.next()); reader.close(); }
/** Open a writer appending to an existing file. * @param in reading the existing file. * @param out positioned at the end of the existing file. */ public DataFileWriter<D> appendTo(SeekableInput in, OutputStream out) throws IOException { assertNotOpen(); DataFileReader<D> reader = new DataFileReader<D>(in, new GenericDatumReader<D>()); this.schema = reader.getSchema(); this.sync = reader.getHeader().sync; this.meta.putAll(reader.getHeader().meta); byte[] codecBytes = this.meta.get(DataFileConstants.CODEC); if (codecBytes != null) { String strCodec = new String(codecBytes, "UTF-8"); this.codec = CodecFactory.fromString(strCodec).createInstance(); } else { this.codec = CodecFactory.nullCodec().createInstance(); } reader.close(); init(out); return this; }
private void testConversion(RestEntry<JsonObject> expected, WorkUnitState actualWorkUnitState) throws DataConversionException, IOException, JSONException { Schema schema = new Schema.Parser().parse(getClass().getResourceAsStream("/converter/nested.avsc")); GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); File tmp = File.createTempFile(this.getClass().getSimpleName(), null); tmp.deleteOnExit(); try { FileUtils.copyInputStreamToFile(getClass().getResourceAsStream("/converter/nested.avro"), tmp); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(tmp, datumReader); GenericRecord avroRecord = dataFileReader.next(); AvroToRestJsonEntryConverter converter = new AvroToRestJsonEntryConverter(); RestEntry<JsonObject> actual = converter.convertRecord(null, avroRecord, actualWorkUnitState).iterator().next(); Assert.assertEquals(actual.getResourcePath(), expected.getResourcePath()); JSONAssert.assertEquals(expected.getRestEntryVal().toString(), actual.getRestEntryVal().toString(), false); converter.close(); dataFileReader.close(); } finally { if (tmp != null) { tmp.delete(); } } } }
counts.put(record.name.toString(), record.count); reader.close();
public void testSplits() throws IOException { File file = makeFile(); DataFileReader<Object> reader = new DataFileReader<>(file, new GenericDatumReader<>()); Random rand = new Random(SEED); try { int splits = 10; // number of splits int length = (int)file.length(); // length of file int end = length; // end of split int remaining = end; // bytes remaining int count = 0; // count of entries while (remaining > 0) { int start = Math.max(0, end - rand.nextInt(2*length/splits)); reader.sync(start); // count entries in split while (!reader.pastSync(end)) { reader.next(); count++; } remaining -= end-start; end = start; } assertEquals(COUNT, count); } finally { reader.close(); } }
/** Uses default mapper with no reduces for a map-only identity job. */ @Test @SuppressWarnings("deprecation") public void testMapOnly() throws Exception { JobConf job = new JobConf(); String inDir = System.getProperty("share.dir","../../../share")+"/test/data"; Path input = new Path(inDir+"/weather.avro"); Path output = new Path("target/test/weather-ident"); output.getFileSystem(job).delete(output); job.setJobName("identity map weather"); AvroJob.setInputSchema(job, Weather.SCHEMA$); AvroJob.setOutputSchema(job, Weather.SCHEMA$); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); FileOutputFormat.setCompressOutput(job, true); job.setNumReduceTasks(0); // map-only JobClient.runJob(job); // check output is correct DatumReader<Weather> reader = new SpecificDatumReader<>(); DataFileReader<Weather> check = new DataFileReader<> (new File(inDir + "/weather.avro"), reader); DataFileReader<Weather> sorted = new DataFileReader<> (new File(output.toString() + "/part-00000.avro"), reader); for (Weather w : sorted) assertEquals(check.next(), w); check.close(); sorted.close(); }
public void testGenericRead() throws IOException { DataFileReader<Object> reader = new DataFileReader<>(makeFile(), new GenericDatumReader<>()); try { Object datum = null; if (VALIDATE) { for (Object expected : new RandomData(SCHEMA, COUNT, SEED)) { datum = reader.next(datum); assertEquals(expected, datum); } } else { for (int i = 0; i < COUNT; i++) { datum = reader.next(datum); } } } finally { reader.close(); } }
public void testSyncDiscovery() throws IOException { File file = makeFile(); DataFileReader<Object> reader = new DataFileReader<>(file, new GenericDatumReader<>()); try { // discover the sync points ArrayList<Long> syncs = new ArrayList<>(); long previousSync = -1; while (reader.hasNext()) { if (reader.previousSync() != previousSync) { previousSync = reader.previousSync(); syncs.add(previousSync); } reader.next(); } // confirm that the first point is the one reached by sync(0) reader.sync(0); assertEquals((long)reader.previousSync(), (long)syncs.get(0)); // and confirm that all points are reachable for (Long sync : syncs) { reader.seek(sync); assertNotNull(reader.next()); } } finally { reader.close(); } }
@Test public void testNamedCodecs() throws IOException { Configuration conf = new Configuration(); Path myfile = new Path(mTempDir.getRoot().getPath(), "myfile"); Schema key = Schema.create(Schema.Type.STRING); Schema value = Schema.create(Schema.Type.STRING); Schema recordSchema = AvroKeyValue.getSchema(key, value); DatumReader<GenericRecord> datumReader = SpecificData.get().createDatumReader(recordSchema); DataFileReader<GenericRecord> reader; SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(key) .withValueSchema(value) .withConfiguration(conf) .withPath(myfile); SortedKeyValueFile.Writer<CharSequence, CharSequence> writer; for(String codec : new String[]{"null", "deflate", "snappy", "bzip2"}) { LOG.debug("Using " + codec + "codec for a SortedKeyValueFile..."); options.withCodec(codec); writer = new SortedKeyValueFile.Writer<>(options); writer.close(); reader = new DataFileReader<>( new FsInput(new Path(myfile, SortedKeyValueFile.DATA_FILENAME), conf), datumReader); assertEquals(codec, reader.getMetaString("avro.codec")); reader.close(); } }
dataFileReader.close();
@Test public void testDeflateClassCodec() throws IOException { Configuration conf = new Configuration(); Path myfile = new Path(mTempDir.getRoot().getPath(), "myfile"); Schema key = Schema.create(Schema.Type.STRING); Schema value = Schema.create(Schema.Type.STRING); Schema recordSchema = AvroKeyValue.getSchema(key, value); DatumReader<GenericRecord> datumReader = SpecificData.get().createDatumReader(recordSchema); DataFileReader<GenericRecord> reader; LOG.debug("Using CodecFactory.deflateCodec() for a SortedKeyValueFile..."); SortedKeyValueFile.Writer.Options options = new SortedKeyValueFile.Writer.Options() .withKeySchema(key) .withValueSchema(value) .withConfiguration(conf) .withPath(myfile) .withCodec(CodecFactory.deflateCodec(9)); SortedKeyValueFile.Writer<CharSequence, CharSequence> writer = new SortedKeyValueFile.Writer<>(options); writer.close(); reader = new DataFileReader<>( new FsInput(new Path(myfile, SortedKeyValueFile.DATA_FILENAME), conf), datumReader); assertEquals("deflate", reader.getMetaString("avro.codec")); reader.close(); }