@Override public void process(InputStream in, OutputStream out) throws IOException { try (JSONFileReader<Record> reader = new JSONFileReader<>( in, schema, Record.class)) { reader.initialize(); try (DataFileWriter<Record> w = writer.create(schema, out)) { while (reader.hasNext()) { try { Record record = reader.next(); w.append(record); written.incrementAndGet(); } catch (final DatasetRecordException e) { failures.add(e); } } } } } });
@Override public RecordReader<E, Void> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = Hadoop.TaskAttemptContext .getConfiguration.invoke(context); Path path; if (split instanceof FileSplit) { path = ((FileSplit) split).getPath(); } else { throw new DatasetOperationException( "Split is not a FileSplit: %s:%s", split.getClass().getCanonicalName(), split); } JSONFileReader<E> reader = new JSONFileReader<E>( path.getFileSystem(conf), path, accessor); reader.initialize(); return reader.asRecordReader(); } }
@Override public void process(InputStream in, OutputStream out) throws IOException { try (JSONFileReader<Record> reader = new JSONFileReader<>( in, schema, Record.class)) { reader.initialize(); try (DataFileWriter<Record> w = writer.create(schema, out)) { while (reader.hasNext()) { try { Record record = reader.next(); w.append(record); written.incrementAndGet(); } catch (final DatasetRecordException e) { failures.add(e); } } } } } });
@SuppressWarnings("unchecked") // See https://github.com/Parquet/parquet-mr/issues/106 private void openNextReader() { if (Formats.PARQUET.equals(descriptor.getFormat())) { this.reader = new ParquetFileSystemDatasetReader(fileSystem, filesIter.next(), accessor.getReadSchema(), accessor.getType()); } else if (Formats.JSON.equals(descriptor.getFormat())) { this.reader = new JSONFileReader<E>( fileSystem, filesIter.next(), accessor); } else if (Formats.CSV.equals(descriptor.getFormat())) { this.reader = new CSVFileReader<E>(fileSystem, filesIter.next(), descriptor, accessor); } else if (Formats.INPUTFORMAT.equals(descriptor.getFormat())) { this.reader = new InputFormatReader(fileSystem, filesIter.next(), descriptor); } else { this.reader = new FileSystemDatasetReader<E>(fileSystem, filesIter.next(), accessor.getReadSchema(), accessor.getType()); } reader.initialize(); this.readerIterator = Iterators.filter(reader, constraints.toEntityPredicate( (pathIter != null ? pathIter.getStorageKey() : null), accessor)); }