@Override protected <T> AbstractRefinableView<T> project(Schema schema, Class<T> type) { return new FileSystemView<T>(this, schema, type); }
@Override public void signalReady() { if (signalManager != null) { signalManager.signalReady(getConstraints()); } }
@Override public DatasetReader<E> newReader() { checkSchemaForRead(); AbstractDatasetReader<E> reader = new MultiFileDatasetReader<E>(fs, pathIterator(), dataset.getDescriptor(), constraints, getAccessor()); reader.initialize(); return reader; }
public FileSystemViewKeyInputFormat(FileSystemView<E> view, Configuration conf) { this.dataset = (FileSystemDataset<E>) view.getDataset(); this.view = view; LOG.debug("View: {}", view); Format format = dataset.getDescriptor().getFormat(); setConfigProperties(conf, format, view.getSchema(), view.getType()); }
@Override public long getLastModified() { long lastMod = -1; for (Iterator<Path> i = dirIterator(); i.hasNext(); ) { Path dir = i.next(); try { for (FileStatus st : fs.listStatus(dir)) { if (lastMod < st.getModificationTime()) { lastMod = st.getModificationTime(); } } } catch (IOException e) { throw new DatasetIOException("Cannot find last modified time of of " + dir, e); } } // if view was marked ready more recently count it as the modified time if (signalManager != null) { long readyTimestamp = signalManager.getReadyTimestamp(getConstraints()); if (lastMod < readyTimestamp) { lastMod = readyTimestamp; } } return lastMod; }
@Test public void testNullSignalManager() { FileSystemDataset<StandardEvent> ds = (FileSystemDataset<StandardEvent>) unbounded.getDataset(); FileSystemView<StandardEvent> view = new FileSystemView<StandardEvent>(ds, null, null, StandardEvent.class); // getlast modified Assert.assertTrue("Last modified does not require access to signal manager", view.getLastModified() >= -1); view.signalReady(); Assert.assertFalse("View should not be signaled without manager", view.isReady()); }
private PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument(descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); int defaultMaxWriters = partitionStrategy.getCardinality(); if (defaultMaxWriters < 0 || defaultMaxWriters > DEFAULT_WRITER_CACHE_SIZE) { defaultMaxWriters = DEFAULT_WRITER_CACHE_SIZE; } this.maxWriters = DescriptorUtil.getInt(WRITER_CACHE_SIZE_PROP, descriptor, defaultMaxWriters); this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); this.accessor = view.getAccessor(); this.provided = view.getProvidedValues(); // get file rolling properties if (!Formats.PARQUET.equals(descriptor.getFormat())) { this.targetFileSize = DescriptorUtil.getLong( TARGET_FILE_SIZE_PROP, descriptor, -1); } else { targetFileSize = -1; } this.rollIntervalMillis = 1000 * DescriptorUtil.getLong( ROLL_INTERVAL_S_PROP, descriptor, -1); }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="BC_UNCONFIRMED_CAST_OF_RETURN_VALUE", justification="Writer is known to be IncrementalWriter") public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState(view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), conf.getRollIntervalMillis(), conf.getTargetFileSize(), dataset.getDescriptor(), view.getAccessor().getWriteSchema()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded( dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return (FileSystemWriter.IncrementalWriter<E>) writer; }
@Override public DatasetWriter<E> newWriter() { checkSchemaForWrite(); AbstractDatasetWriter<E> writer; if (dataset.getDescriptor().isPartitioned()) { writer = PartitionedDatasetWriter.newWriter(this); } else { writer = FileSystemWriter.newWriter( fs, root, -1, -1 /* get from descriptor */, dataset.getDescriptor(), this.getAccessor().getWriteSchema()); } writer.initialize(); return writer; }
public DatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
@Override public boolean deleteAll() { if (!constraints.alignedWithBoundaries()) { throw new UnsupportedOperationException( "Cannot cleanly delete view: " + this); } return deleteAllUnsafe(false); }
@Override public Iterable<PartitionView<E>> getCoveringPartitions() { // filter the matching partitions to return those contained by this one return Iterables.filter(super.getCoveringPartitions(), new Predicate<PartitionView<E>>() { @Override public boolean apply(@Nullable PartitionView<E> input) { return input != null && contains( URI.create(location.toString()), root, input.getLocation()); } }); }
@Override public long getSize() { long size = 0; for (Iterator<Path> i = dirIterator(); i.hasNext(); ) { Path dir = i.next(); try { for (FileStatus st : fs.listStatus(dir)) { size += st.getLen(); } } catch (IOException e) { throw new DatasetIOException("Cannot find size of " + dir, e); } } return size; }
@Override public boolean equals(Object other) { if (this == other) { return true; } if (!super.equals(other)) { return false; } if (other == null || getClass() != other.getClass()) { return false; } FileSystemPartitionView that = (FileSystemPartitionView) other; return Objects.equal(this.location, that.location); }
@Override public RecordReader<E, Void> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { RecordReader<E, Void> unfilteredRecordReader = createUnfilteredRecordReader (inputSplit, taskAttemptContext); if (view != null) { // use the constraints to filter out entities from the reader return new FilteredRecordReader<E>(unfilteredRecordReader, ((AbstractRefinableView) view).getConstraints(), view.getAccessor()); } return unfilteredRecordReader; }
@Override public FileSystemWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState(view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), conf.getRollIntervalMillis(), conf.getTargetFileSize(), dataset.getDescriptor(), view.getAccessor().getWriteSchema()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded( dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return writer; }
public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
@Override public boolean moveToTrash() { if (!constraints.alignedWithBoundaries()) { throw new UnsupportedOperationException( "Cannot cleanly move view to trash: " + this); } return deleteAllUnsafe(true); }
Iterable<PartitionView<E>> existingPartitions = dest .toConstraintsView() .getCoveringPartitions(); for (PartitionView<E> partition : existingPartitions) { FileSystemPartitionView<E> toReplace =
public Iterator<Path> getDirectoryIterator(View view) { if (view instanceof FileSystemView) { return ((FileSystemView<?>) view).dirIterator(); } else if (view instanceof FileSystemDataset) { return ((FileSystemDataset<?>) view).dirIterator(); } else { throw new DatasetException( "Underlying Dataset must be a FileSystemDataset"); } }