.name(name) .fileSystem(fileSystem) .uri(uri) .descriptor(new DatasetDescriptor.Builder(descriptor) .location(partitionDirectory)
@Before public void setUp() throws IOException { fileSystem = FileSystem.get(new Configuration()); testDirectory = fileSystem.makeQualified( new Path(Files.createTempDir().getAbsolutePath())); partitionStrategy = new PartitionStrategy.Builder() .hash("username", "username_part", 2).hash("email", 3).build(); dataset = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("partitioned-users") .configuration(new Configuration()) .uri(URI.create("test")) .descriptor(new DatasetDescriptor.Builder() .schema(USER_SCHEMA) .location(testDirectory) .partitionStrategy(partitionStrategy) .build()) .type(Record.class) .build(); }
.name(name) .fileSystem(fileSystem) .uri(uri) .descriptor(new DatasetDescriptor.Builder(descriptor) .location(p)
.descriptor(newDescriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(newDescriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener())
@Test public void testReadySignalUpdatesModifiedTime() { final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor( new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format) .location(testDirectory).build()) .type(Record.class) .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name")) .build(); Assert.assertFalse("Dataset should not be ready before being signaled", ds.isReady()); // the modified time depends on the filesystem, and may only be granular to the second // signal and check until the modified time is after the current time, or until // enough time has past that the signal should have been distinguishable long signaledTime = 0; long currentTime = System.currentTimeMillis(); while(currentTime >= signaledTime && (System.currentTimeMillis() - currentTime) <= 2000) { ds.signalReady(); signaledTime = ds.getLastModified(); } Assert.assertTrue("Dataset should have been signaled as ready", ds.isReady()); Assert.assertTrue("Signal should update the modified time", signaledTime > currentTime); Assert.assertFalse("Only the dataset should have been signaled", ((Signalable)ds.with("username", "bob")).isReady()); }
@Override public <E> Dataset<E> update(String namespace, String name, DatasetDescriptor descriptor, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); Preconditions.checkNotNull(descriptor, "Descriptor cannot be null"); DatasetDescriptor oldDescriptor = metadataProvider.load(namespace, name); // oldDescriptor is valid if load didn't throw NoSuchDatasetException Compatibility.checkUpdate(oldDescriptor, descriptor); DatasetDescriptor updatedDescriptor = metadataProvider.update(namespace, name, descriptor); LOG.debug("Updated dataset: {} schema: {} location: {}", new Object[] { name, updatedDescriptor.getSchema(), updatedDescriptor.getLocation() }); return new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(updatedDescriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(updatedDescriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); }
@Override public <E> Dataset<E> load(String namespace, String name, Class<E> type) { Preconditions.checkNotNull(namespace, "Namespace cannot be null"); Preconditions.checkNotNull(name, "Dataset name cannot be null"); LOG.debug("Loading dataset: {}", name); DatasetDescriptor descriptor = metadataProvider.load(namespace, name); FileSystemDataset<E> ds = new FileSystemDataset.Builder<E>() .namespace(namespace) .name(name) .configuration(conf) .descriptor(descriptor) .type(type) .uri(new URIBuilder(getUri(), namespace, name).build()) .partitionKey(descriptor.isPartitioned() ? new PartitionKey() : null) .partitionListener(getPartitionListener()) .build(); LOG.debug("Loaded dataset:{}", ds); return ds; }
@Test public void signalReadyOnUnboundedDataset() { final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>() .namespace("ns") .name("users") .configuration(getConfiguration()) .descriptor( new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format) .location(testDirectory).build()) .type(Record.class) .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name")) .build(); Assert.assertFalse("Unbounded dataset has not been signaled", ds.isReady()); ds.signalReady(); Assert.assertTrue("Unbounded dataset has been signaled and should be ready", ds.isReady()); }