@Override public DatasetReader<Record> newReader(Path path, Schema schema) { return new FileSystemDatasetReader<Record>(fs, path, schema, Record.class); }
public int count(FileSystem fs, Path path) { FileSystemDatasetReader<String> reader = new FileSystemDatasetReader<String>(fs, path, schema, String.class); int count = 0; reader.initialize(); for (String s : reader) { count += 1; System.err.println(s); } reader.close(); return count; } }
@Test(expected = IllegalArgumentException.class) public void testNullFileSystem() { DatasetReader<String> reader = new FileSystemDatasetReader<String>( null, new Path("/tmp/does-not-exist.avro"), STRING_SCHEMA, String.class); }
@Test(expected = IllegalArgumentException.class) public void testNullFile() { DatasetReader<String> reader = new FileSystemDatasetReader<String>( fileSystem, null, STRING_SCHEMA, String.class); }
@Override public DatasetReader<Record> newReader() throws IOException { return new FileSystemDatasetReader<Record>( LocalFileSystem.getInstance(), new Path(Resources.getResource("data/strings-100.avro").getFile()), STRING_SCHEMA, Record.class); }
@Test(expected = DatasetIOException.class) public void testMissingFile() { AbstractDatasetReader<String> reader = new FileSystemDatasetReader<String>( fileSystem, new Path("/tmp/does-not-exist.avro"), STRING_SCHEMA, String.class); // the reader should not fail until open() Assert.assertNotNull(reader); reader.initialize(); }
@SuppressWarnings("unchecked") // See https://github.com/Parquet/parquet-mr/issues/106 private void openNextReader() { if (Formats.PARQUET.equals(descriptor.getFormat())) { this.reader = new ParquetFileSystemDatasetReader(fileSystem, filesIter.next(), accessor.getReadSchema(), accessor.getType()); } else if (Formats.JSON.equals(descriptor.getFormat())) { this.reader = new JSONFileReader<E>( fileSystem, filesIter.next(), accessor); } else if (Formats.CSV.equals(descriptor.getFormat())) { this.reader = new CSVFileReader<E>(fileSystem, filesIter.next(), descriptor, accessor); } else if (Formats.INPUTFORMAT.equals(descriptor.getFormat())) { this.reader = new InputFormatReader(fileSystem, filesIter.next(), descriptor); } else { this.reader = new FileSystemDatasetReader<E>(fileSystem, filesIter.next(), accessor.getReadSchema(), accessor.getType()); } reader.initialize(); this.readerIterator = Iterators.filter(reader, constraints.toEntityPredicate( (pathIter != null ? pathIter.getStorageKey() : null), accessor)); }
@Test(expected = DatasetIOException.class) public void testEmptyFile() throws IOException { final Path emptyFile = new Path("/tmp/empty-file.avro"); // outside the try block; if this fails then it isn't correct to remove it Assert.assertTrue("Failed to create a new empty file", fileSystem.createNewFile(emptyFile)); try { AbstractDatasetReader<String> reader = new FileSystemDatasetReader<String>( fileSystem, emptyFile, STRING_SCHEMA, String.class); // the reader should not fail until open() Assert.assertNotNull(reader); reader.initialize(); } finally { Assert.assertTrue("Failed to clean up empty file", fileSystem.delete(emptyFile, true)); } } }
@Test public void testEvolvedSchema() throws IOException { Schema schema = SchemaBuilder.record("mystring").fields() .requiredString("text") .name("text2").type().stringType().stringDefault("N/A") .endRecord(); FileSystemDatasetReader<Record> reader = new FileSystemDatasetReader<Record>( fileSystem, new Path(Resources.getResource("data/strings-100.avro") .getFile()), schema, Record.class); checkReaderBehavior(reader, 100, new RecordValidator<Record>() { @Override public void validate(Record record, int recordNum) { Assert.assertEquals(String.valueOf(recordNum), record.get("text").toString()); Assert.assertEquals("N/A", record.get("text2").toString()); } }); }