DEFAULT_SYNCABLE_SYNC_ON_BATCH) && (Formats.AVRO.equals(format)); this.datasetName = view.getDataset().getName();
boolean deleteAllUnsafe(boolean useTrash) { boolean deleted = false; if (dataset.getDescriptor().isPartitioned()) { for (StorageKey key : partitionIterator()) { deleted = (useTrash ? FileSystemUtil.cleanlyMoveToTrash(fs, root, key.getPath()) : FileSystemUtil.cleanlyDelete(fs, root, key.getPath())) || deleted; if (listener != null) { // the relative path is the partition name, so we can simply delete it // in Hive listener.partitionDeleted(dataset.getNamespace(), dataset.getName(), key.getPath().toString()); } } } else { for (Path path : pathIterator()) { deleted = (useTrash ? FileSystemUtil.cleanlyMoveToTrash(fs, root, path) : FileSystemUtil.cleanlyDelete(fs, root, path)) || deleted; } } return deleted; }
@Test public void testRelative() { DatasetRepository repo = DatasetRepositories.repositoryFor("repo:file:target/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:file:target/data/ns/test", Record.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Path cwd = localFS.makeQualified(new Path(".")); Assert.assertEquals("Locations should match", new Path(cwd, "target/data/ns/test").toUri(), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
ObjectMapper mapper = new ObjectMapper(); mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); JsonNode node = mapper.readTree(new URL(url)); node = node.get("duObjects"); TypeReference<List<Dataset>> typeRef = new TypeReference<List<Dataset>>() { }; List<Dataset> list = mapper.readValue(node.traverse(), typeRef); for (int i = 0; i < list.size(); i++) { Dataset dataSet = list.get(i); System.out.println(dataSet.getName()); }
@Test public void testRelative() { DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hdfs://" + hdfsAuth + "/data?absolute=false"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:hdfs://" + hdfsAuth + "/data/ns/test?absolute=false", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Path cwd = getDFS().makeQualified(new Path(".")); Assert.assertEquals("Locations should match", new Path(cwd, "data/ns/test").toUri(), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Override @SuppressWarnings("unchecked") public void configureSource(Job job, int inputId) throws IOException { Configuration conf = job.getConfiguration(); if (inputId == -1) { job.setMapperClass(CrunchMapper.class); job.setInputFormatClass(formatBundle.getFormatClass()); formatBundle.configure(conf); } else { Path dummy = new Path("/view/" + view.getDataset().getName()); CrunchInputs.addInputPath(job, dummy, formatBundle, inputId); } }
@Test public void testAbsolute() { DatasetRepository repo = DatasetRepositories.repositoryFor("repo:file:/tmp/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:file:/tmp/data/ns/test", Record.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("file:/tmp/data/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Override public void run() { repo.load("ns", unbounded.getDataset().getName(), IncompatibleEvent.class); } });
@Test public void testAbsoluteTrailingSlash() { DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hdfs://" + hdfsAuth + "/tmp/data/"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:hdfs://" + hdfsAuth + "/tmp/data/ns/test/", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("hdfs://" + hdfsAuth + "/tmp/data/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
private DatasetWriter<GenericRecord> newWriter( final UserGroupInformation login, final URI uri) { View<GenericRecord> view = KerberosUtil.runPrivileged(login, new PrivilegedExceptionAction<Dataset<GenericRecord>>() { @Override public Dataset<GenericRecord> run() { return Datasets.load(uri); } }); DatasetDescriptor descriptor = view.getDataset().getDescriptor(); String formatName = descriptor.getFormat().getName(); Preconditions.checkArgument(allowedFormats().contains(formatName), "Unsupported format: " + formatName); Schema newSchema = descriptor.getSchema(); if (targetSchema == null || !newSchema.equals(targetSchema)) { this.targetSchema = descriptor.getSchema(); // target dataset schema has changed, invalidate all readers based on it readers.invalidateAll(); } this.reuseDatum = !("parquet".equals(formatName)); this.datasetName = view.getDataset().getName(); return view.newWriter(); }
@Test public void testAbsoluteRoot() { DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hdfs://" + hdfsAuth + "/"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:hdfs://" + hdfsAuth + "/ns/test", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("hdfs://" + hdfsAuth + "/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Test public void testLoad() { ensureCreated(); Dataset dataset = repo.load(NAMESPACE, NAME); Assert.assertNotNull("Dataset is loaded and produced", dataset); Assert.assertEquals("Dataset name is propagated", NAME, dataset.getName()); Assert.assertEquals("Dataset schema is loaded", testSchema, dataset.getDescriptor().getSchema()); }
@Test public void testAbsolute() { DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hdfs://" + hdfsAuth + "/tmp/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:hdfs://" + hdfsAuth + "/tmp/data/ns/test", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("hdfs://" + hdfsAuth + "/tmp/data/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Test public void testSpecificProjectionLoad() throws IOException { DatasetWriter<StandardEvent> writer = null; try { writer = unbounded.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } Dataset<SmallEvent> dataset = repo.load( "ns", unbounded.getDataset().getName(), SmallEvent.class); Set<SmallEvent> expected = Sets.newHashSet(toSmallEvent(sepEvent), toSmallEvent(octEvent), toSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
@Test public void testAbsoluteWebHdfs() { Assume.assumeTrue(!Hadoop.isHadoop1()); String webhdfsAuth = getConfiguration().get( DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); DatasetRepository repo = DatasetRepositories .repositoryFor("repo:webhdfs://" + webhdfsAuth + "/tmp/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:webhdfs://" + webhdfsAuth + "/tmp/data/ns/test", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("webhdfs://" + webhdfsAuth + "/tmp/data/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); } }
@Test public void testMixedProjection() throws IOException { Dataset<StandardEvent> original = repo.create("ns", "mixedProjection", new DatasetDescriptor.Builder() .schema(StandardEvent.class) .build(), StandardEvent.class); DatasetWriter<StandardEvent> writer = null; try { writer = original.newWriter(); writer.write(sepEvent); writer.write(octEvent); writer.write(novEvent); } finally { Closeables.close(writer, false); } Dataset<ReflectSmallEvent> dataset = repo.load("ns", original.getName(), ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
@Test public void testViewConstraints() { DatasetRepository repo = DatasetRepositories.repositoryFor("repo:file:/tmp/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); RefinableView<Record> v = Datasets.<Record, RefinableView<Record>> load("view:file:/tmp/data/ns/test?username=user", Record.class); Assert.assertNotNull("Should load view", v); Assert.assertTrue(v instanceof FileSystemView); Assert.assertEquals("Locations should match", URI.create("file:/tmp/data/ns/test"), v.getDataset().getDescriptor().getLocation()); DatasetDescriptor loaded = repo.load("ns", "test").getDescriptor(); Assert.assertEquals("Descriptors should match", loaded, v.getDataset().getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", v.getDataset().getNamespace()); Assert.assertEquals("Should report correct name", "test", v.getDataset().getName()); Constraints withUser = new Constraints(loaded.getSchema()) .with("username", new Utf8("user")); Assert.assertEquals("Constraints should be username=user", withUser, ((FileSystemView) v).getConstraints()); repo.delete("ns", "test"); }
@Test public void testReflectProjectionLoad() throws IOException { Dataset<ReflectStandardEvent> original = repo.create( "ns", "reflectProjection", new DatasetDescriptor.Builder() .schema(ReflectStandardEvent.class) .build(), ReflectStandardEvent.class); DatasetWriter<ReflectStandardEvent> writer = null; try { writer = original.newWriter(); writer.write(new ReflectStandardEvent(sepEvent)); writer.write(new ReflectStandardEvent(octEvent)); writer.write(new ReflectStandardEvent(novEvent)); } finally { Closeables.close(writer, false); } View<ReflectSmallEvent> dataset = repo.load("ns", original.getName(), ReflectSmallEvent.class); Set<ReflectSmallEvent> expected = Sets.newHashSet( new ReflectSmallEvent(sepEvent), new ReflectSmallEvent(octEvent), new ReflectSmallEvent(novEvent)); assertContentEquals(expected, dataset); }
@Test public void testCreate() { Assert.assertFalse("Sanity check", testProvider.exists(NAMESPACE, NAME)); Dataset dataset = repo.create(NAMESPACE, NAME, testDescriptor); Assert.assertNotNull("Dataset should be returned", dataset); Assert.assertTrue("Dataset should exist", repo.exists(NAMESPACE, NAME)); DatasetDescriptor saved = testProvider.load(NAMESPACE, NAME); Assert.assertNotNull("Dataset metadata is stored under name", saved); Assert.assertEquals("Saved metadata is returned", saved, dataset.getDescriptor()); // TODO: Add test for namespace accessor Assert.assertEquals("Dataset name is propagated", NAME, dataset.getName()); Assert.assertEquals("Dataset schema is propagated", testDescriptor.getSchema(), saved.getSchema()); Assert.assertNotNull("Dataset should have a URI location", saved.getLocation()); Assert.assertNotNull("Dataset location should have a scheme", saved.getLocation().getScheme()); }
@Test public void testCreatePartitioned() throws IOException { DatasetDescriptor requested = new DatasetDescriptor.Builder(testDescriptor) .partitionStrategy( new PartitionStrategy.Builder().hash("username", 3).build()) .build(); Assert.assertFalse("Sanity check", testProvider.exists(NAMESPACE, "test2")); Dataset dataset = repo.create(NAMESPACE, "test2", requested); DatasetDescriptor saved = testProvider.load(NAMESPACE, "test2"); Assert.assertNotNull("Dataset metadata is stored under name", saved); Assert.assertEquals("Saved metadata is returned", saved, dataset.getDescriptor()); Assert.assertEquals("Dataset name is propagated", "test2", dataset.getName()); Assert.assertEquals("Dataset schema is propagated", requested.getSchema(), saved.getSchema()); Assert.assertEquals("Dataset partition strategy propagated", requested.getPartitionStrategy(), saved.getPartitionStrategy()); }