@Override public void process(InputStream in, OutputStream out) throws IOException { try (CSVFileReader<Record> reader = new CSVFileReader<>( in, props, schema, Record.class)) { reader.initialize(); try (DataFileWriter<Record> w = writer.create(schema, out)) { while (reader.hasNext()) { try { Record record = reader.next(); w.append(record); written.incrementAndGet(); } catch (DatasetRecordException e) { failures.add(e); } } } } } });
@Override public RecordReader<E, Void> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = Hadoop.TaskAttemptContext .getConfiguration.invoke(context); Path path; if (split instanceof FileSplit) { path = ((FileSplit) split).getPath(); } else { throw new DatasetOperationException( "Split is not a FileSplit: %s:%s", split.getClass().getCanonicalName(), split); } CSVFileReader<E> reader = new CSVFileReader<E>( path.getFileSystem(conf), path, descriptor, accessor); reader.initialize(); return reader.asRecordReader(); } }
@Override public void process(InputStream in, OutputStream out) throws IOException { try (CSVFileReader<Record> reader = new CSVFileReader<>( in, props, schema, Record.class)) { reader.initialize(); try (DataFileWriter<Record> w = writer.create(schema, out)) { while (reader.hasNext()) { try { Record record = reader.next(); w.append(record); written.incrementAndGet(); } catch (DatasetRecordException e) { failures.add(e); } } } } } });
public int count(FileSystem fs, Path path, DatasetDescriptor descriptor) { CSVFileReader<GenericRecord> reader = new CSVFileReader<GenericRecord>( fs, path, descriptor, DataModelUtil.accessor(GenericRecord.class, descriptor.getSchema())); int count = 0; reader.initialize(); for (GenericRecord r : reader) { count += 1; System.err.println(r); } reader.close(); return count; }
@Test public void testBadNumericSchema() { final DatasetDescriptor desc = new DatasetDescriptor.Builder() .schema(TYPE_ERROR_SCHEMA) .build(); final CSVFileReader<GenericData.Record> reader = new CSVFileReader<GenericData.Record>(localfs, csvFile, desc, DataModelUtil.accessor(GenericData.Record.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); TestHelpers.assertThrows("Should reject float value for integer schema", DatasetRecordException.class, new Runnable() { @Override public void run() { reader.next(); } }); }
DataModelUtil.accessor(TestBean.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); TestBean bean = reader.next();
DataModelUtil.accessor(TestBean.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); TestBean bean = reader.next();
DataModelUtil.accessor(GenericData.Record.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); GenericData.Record rec = reader.next();
DataModelUtil.accessor(GenericData.Record.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); GenericData.Record rec = reader.next();
DataModelUtil.accessor(TestGenericRecord.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); TestGenericRecord record = reader.next();
DataModelUtil.accessor(GenericData.Record.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); GenericData.Record rec = reader.next();
DataModelUtil.accessor(GenericData.Record.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); GenericData.Record rec = reader.next();
DataModelUtil.accessor(GenericData.Record.class, desc.getSchema())); reader.initialize(); Assert.assertTrue(reader.hasNext()); GenericData.Record rec = reader.next();