@Override public RecordReader<E, Void> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { RecordReader<E, Void> unfilteredRecordReader = createUnfilteredRecordReader (inputSplit, taskAttemptContext); if (view != null) { // use the constraints to filter out entities from the reader return new FilteredRecordReader<E>(unfilteredRecordReader, ((AbstractRefinableView) view).getConstraints(), view.getAccessor()); } return unfilteredRecordReader; }
@Override public DatasetReader<E> newReader() { checkSchemaForRead(); AbstractDatasetReader<E> reader = new MultiFileDatasetReader<E>(fs, pathIterator(), dataset.getDescriptor(), constraints, getAccessor()); reader.initialize(); return reader; }
@Override public DatasetWriter<E> newWriter() { checkSchemaForWrite(); AbstractDatasetWriter<E> writer; if (dataset.getDescriptor().isPartitioned()) { writer = PartitionedDatasetWriter.newWriter(this); } else { writer = FileSystemWriter.newWriter( fs, root, -1, -1 /* get from descriptor */, dataset.getDescriptor(), this.getAccessor().getWriteSchema()); } writer.initialize(); return writer; }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="BC_UNCONFIRMED_CAST_OF_RETURN_VALUE", justification="Writer is known to be IncrementalWriter") public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState(view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), conf.getRollIntervalMillis(), conf.getTargetFileSize(), dataset.getDescriptor(), view.getAccessor().getWriteSchema()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded( dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return (FileSystemWriter.IncrementalWriter<E>) writer; }
@Override public FileSystemWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState(view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), conf.getRollIntervalMillis(), conf.getTargetFileSize(), dataset.getDescriptor(), view.getAccessor().getWriteSchema()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded( dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return writer; }
private PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument(descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); int defaultMaxWriters = partitionStrategy.getCardinality(); if (defaultMaxWriters < 0 || defaultMaxWriters > DEFAULT_WRITER_CACHE_SIZE) { defaultMaxWriters = DEFAULT_WRITER_CACHE_SIZE; } this.maxWriters = DescriptorUtil.getInt(WRITER_CACHE_SIZE_PROP, descriptor, defaultMaxWriters); this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); this.accessor = view.getAccessor(); this.provided = view.getProvidedValues(); // get file rolling properties if (!Formats.PARQUET.equals(descriptor.getFormat())) { this.targetFileSize = DescriptorUtil.getLong( TARGET_FILE_SIZE_PROP, descriptor, -1); } else { targetFileSize = -1; } this.rollIntervalMillis = 1000 * DescriptorUtil.getLong( ROLL_INTERVAL_S_PROP, descriptor, -1); }