public DatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
public FileSystemViewKeyInputFormat(FileSystemView<E> view, Configuration conf) { this.dataset = (FileSystemDataset<E>) view.getDataset(); this.view = view; LOG.debug("View: {}", view); Format format = dataset.getDescriptor().getFormat(); setConfigProperties(conf, format, view.getSchema(), view.getType()); }
@Override @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="BC_UNCONFIRMED_CAST_OF_RETURN_VALUE", justification="Writer is known to be IncrementalWriter") public FileSystemWriter.IncrementalWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState(view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), conf.getRollIntervalMillis(), conf.getTargetFileSize(), dataset.getDescriptor(), view.getAccessor().getWriteSchema()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded( dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return (FileSystemWriter.IncrementalWriter<E>) writer; }
@Override public FileSystemWriter<E> load(StorageKey key) throws Exception { Preconditions.checkState(view.getDataset() instanceof FileSystemDataset, "FileSystemWriters cannot create writer for " + view.getDataset()); FileSystemDataset dataset = (FileSystemDataset) view.getDataset(); Path partition = convert.fromKey(key); FileSystemWriter<E> writer = FileSystemWriter.newWriter( dataset.getFileSystem(), new Path(dataset.getDirectory(), partition), conf.getRollIntervalMillis(), conf.getTargetFileSize(), dataset.getDescriptor(), view.getAccessor().getWriteSchema()); PartitionListener listener = dataset.getPartitionListener(); if (listener != null) { listener.partitionAdded( dataset.getNamespace(), dataset.getName(), partition.toString()); } // initialize the writer after calling the listener // this lets the listener decide if and how to create the // partition directory writer.initialize(); return writer; }
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "Unable to open a writer from state:%s", state); DatasetDescriptor descriptor = view.getDataset().getDescriptor(); ValidationException.check( FileSystemWriter.isSupportedFormat(descriptor), "Not a supported format: %s", descriptor.getFormat()); LOG.debug("Opening partitioned dataset writer w/strategy:{}", partitionStrategy); cachedWriters = CacheBuilder.newBuilder().maximumSize(maxWriters) .removalListener(new DatasetWriterCloser<E>()) .build(createCacheLoader()); state = ReaderWriterState.OPEN; }
private PartitionedDatasetWriter(FileSystemView<E> view) { final DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Preconditions.checkArgument(descriptor.isPartitioned(), "Dataset " + view.getDataset() + " is not partitioned"); this.view = view; this.partitionStrategy = descriptor.getPartitionStrategy(); int defaultMaxWriters = partitionStrategy.getCardinality(); if (defaultMaxWriters < 0 || defaultMaxWriters > DEFAULT_WRITER_CACHE_SIZE) { defaultMaxWriters = DEFAULT_WRITER_CACHE_SIZE; } this.maxWriters = DescriptorUtil.getInt(WRITER_CACHE_SIZE_PROP, descriptor, defaultMaxWriters); this.state = ReaderWriterState.NEW; this.reusedKey = new StorageKey(partitionStrategy); this.accessor = view.getAccessor(); this.provided = view.getProvidedValues(); // get file rolling properties if (!Formats.PARQUET.equals(descriptor.getFormat())) { this.targetFileSize = DescriptorUtil.getLong( TARGET_FILE_SIZE_PROP, descriptor, -1); } else { targetFileSize = -1; } this.rollIntervalMillis = 1000 * DescriptorUtil.getLong( ROLL_INTERVAL_S_PROP, descriptor, -1); }
static <E> PartitionedDatasetWriter<E, ?> newWriter(FileSystemView<E> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); Format format = descriptor.getFormat(); if (Formats.PARQUET.equals(format)) { // by default, Parquet is not durable if (DescriptorUtil.isDisabled( FileSystemProperties.NON_DURABLE_PARQUET_PROP, descriptor)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } } else if (Formats.AVRO.equals(format) || Formats.CSV.equals(format)) { return new IncrementalPartitionedDatasetWriter<E>(view); } else { return new NonDurablePartitionedDatasetWriter<E>(view); } }