org.kitesdk.data.DatasetDescriptor$Builder.location java code examples

/**
 * Configure the dataset's location (optional).
 *
 * @param uri A location String URI
 * @return An instance of the builder for method chaining.
 *
 * @since 0.8.0
 */
public Builder location(String uri) {
 return location(URI.create(uri));
}

/**
 * Configure the dataset's location (optional).
 *
 * @param path A location Path
 * @return An instance of the builder for method chaining.
 *
 * @since 0.8.0
 */
public Builder location(Path path) {
 return location(path.toString());
}

@Override
public DatasetDescriptor create(String namespace, String name, DatasetDescriptor descriptor) {
 Preconditions.checkNotNull(namespace, "Namespace cannot be null");
 Preconditions.checkNotNull(name, "Name cannot be null");
 Preconditions.checkNotNull(descriptor, "Descriptor cannot be null");
 if (exists(namespace, name)) {
  throw new DatasetExistsException(
    "Dataset already exists for name:" + name);
 }
 DatasetDescriptor newDescriptor;
 if (descriptor.getLocation() == null) {
  newDescriptor = new DatasetDescriptor.Builder(descriptor)
    .location(fs.makeQualified(new Path(newLocation(name))))
    .build();
 } else {
  // don't need to modify it
  newDescriptor = descriptor;
 }
 // save and return
 if (!descriptors.containsKey(namespace)) {
  descriptors.put(namespace, Maps.<String, DatasetDescriptor>newHashMap());
 }
 Map<String, DatasetDescriptor> datasets = descriptors.get(namespace);
 datasets.put(name, newDescriptor);
 return newDescriptor;
}

@Test
public void testUnpartitionedReplace() {
 // recreate temporary without a partition strategy
 Datasets.delete("dataset:file:/tmp/datasets/temporary");
 DatasetDescriptor descriptor = new DatasetDescriptor
   .Builder(unpartitioned.getDescriptor())
   .location((URI) null) // clear the location
   .build();
 temporary = Datasets.create("dataset:file:/tmp/datasets/temporary",
   descriptor, TestRecord.class);
 Assert.assertTrue("Should allow replacing an unpartitioned dataset",
   unpartitioned.canReplace(unpartitioned));
 // make sure there are multiple files
 writeTestRecords(unpartitioned);
 writeTestRecords(unpartitioned);
 writeTestRecords(temporary);
 writeTestRecords(temporary);
 Set<String> originalFiles = Sets.newHashSet(
   Iterators.transform(unpartitioned.pathIterator(), new GetFilename()));
 Set<String> replacementFiles = Sets.newHashSet(
   Iterators.transform(temporary.pathIterator(), new GetFilename()));
 Iterators.transform(temporary.pathIterator(), new GetFilename());
 Assert.assertFalse("Sanity check", originalFiles.equals(replacementFiles));
 unpartitioned.replace(unpartitioned, temporary);
 Set<String> replacedFiles = Sets.newHashSet(
   Iterators.transform(unpartitioned.pathIterator(), new GetFilename()));
 Assert.assertEquals("Should contain the replacement files",
   replacementFiles, replacedFiles);
}

@Test(expected = ValidationException.class)
public void testCannotMergeDatasetsWithDifferentFormats() throws IOException {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .format(Formats.AVRO)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 FileSystemDataset<Record> dsUpdate = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .format(Formats.PARQUET)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 ds.merge(dsUpdate);
}

@Test
@SuppressWarnings("deprecation")
public void testGetPartitionReturnsNullIfNoAutoCreate() throws IOException {
 PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
  "username", 2).build();
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("partitioned-users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .format(format)
     .location(testDirectory)
     .partitionStrategy(partitionStrategy)
     .build())
   .type(Record.class)
   .build();
 Assert
  .assertNull(ds.getPartition(new PartitionKey(1), false));
}

@Test(expected = ValidationException.class)
public void testCannotMergeDatasetsWithDifferentSchemas() throws IOException {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(STRING_SCHEMA)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 FileSystemDataset<Record> dsUpdate = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 ds.merge(dsUpdate);
}

@Test
public void testCreateWithLocation() throws URISyntaxException {
 Assert.assertFalse("Sanity check", provider.exists(NAMESPACE, NAME));
 String auth = getDFS().getUri().getAuthority();
 URI requestedLocation = new URI("hdfs://" + auth + "/tmp/data/my_data_set");
 DatasetDescriptor requested = new DatasetDescriptor.Builder(testDescriptor)
   .location(requestedLocation)
   .build();
 final DatasetDescriptor created;
 try {
  created = provider.create(NAMESPACE, NAME, requested);
 } catch (UnsupportedOperationException ex) {
  // this is expected if the provider doesn't support requested locations
  return;
 }
 // if supported, the location should be unchanged.
 Assert.assertNotNull("Descriptor should be returned", created);
 Assert.assertTrue("Descriptor should exist", provider.exists(NAMESPACE, NAME));
 Assert.assertEquals("Requested locations should match",
   requestedLocation, created.getLocation());
}

@Test
public void testUpdateFailsWithLocationChange() {
 ensureCreated();
 Dataset<Record> dataset = repo.load(NAMESPACE, NAME);
 URI location = dataset.getDescriptor().getLocation();
 DatasetDescriptor changed =
   new DatasetDescriptor.Builder(dataset.getDescriptor())
     .location(new Path(testDirectory, "newDataLocation").toUri())
     .build();
 try {
  repo.update(NAMESPACE, NAME, changed);
  Assert.fail("Should fail due to data location change");
 } catch (ValidationException ex) {
  // expected
 }
 Assert.assertEquals(
   location, repo.load(NAMESPACE, NAME).getDescriptor().getLocation());
}

@Before
public void setUp() throws IOException {
 fileSystem = FileSystem.get(new Configuration());
 testDirectory = fileSystem.makeQualified(
   new Path(Files.createTempDir().getAbsolutePath()));
 partitionStrategy = new PartitionStrategy.Builder()
   .hash("username", "username_part", 2).hash("email", 3).build();
 dataset = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("partitioned-users")
   .configuration(new Configuration())
   .uri(URI.create("test"))
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .location(testDirectory)
     .partitionStrategy(partitionStrategy)
     .build())
   .type(Record.class)
   .build();
}

@Test
public void testWriteAndRead() throws IOException {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("test")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schemaUri(USER_SCHEMA_URL)
     .format(format)
     .compressionType(compressionType)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 Assert.assertFalse("Dataset is not partitioned", ds.getDescriptor()
  .isPartitioned());
 writeTestUsers(ds, 10);
 checkTestUsers(ds, 10);
}

@Test
public void testPathIterator_Directory() {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .format(format)
     .compressionType(compressionType)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 List<Path> dirPaths = Lists.newArrayList(ds.dirIterator());
 Assert.assertEquals("dirIterator for non-partitioned dataset should yield a single path.", 1, dirPaths.size());
 Assert.assertEquals("dirIterator should yield absolute paths.", testDirectory, dirPaths.get(0));
}

@Test
public void signalReadyOnUnboundedDataset() {
 final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(
     new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
       .location(testDirectory).build())
   .type(Record.class)
   .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name"))
   .build();
 Assert.assertFalse("Unbounded dataset has not been signaled", ds.isReady());
 ds.signalReady();
 Assert.assertTrue("Unbounded dataset has been signaled and should be ready", ds.isReady());
}

@Test
public void testMoveToTrashWithoutPartitions() {
 final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(
     new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
       .location(testDirectory).build())
   .type(Record.class)
   .build();
 writeTestUsers(ds, 10);
 Assert.assertTrue(ds.moveToTrash());
 checkReaderBehavior(ds.newReader(), 0, (RecordValidator<Record>) null);
}

private static DatasetDescriptor descriptor(FileSystem fs, Result.Table table)
  throws IOException {
 // inspect the path to determine the partition strategy
 PartitionStrategy strategy = strategy(fs, table.location);
 DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder()
   .format(table.format)
   .schema(table.schema)
   .partitionStrategy(strategy)
   .location(table.location);
 if (table.depth < 0) {
  builder.property("kite.filesystem.mixed-depth", "true");
 }
 return builder.build();
}

@Test
public void testDeleteAllWithoutPartitions() {
 final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(
     new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
       .location(testDirectory).build())
   .type(Record.class)
   .build();
 
 writeTestUsers(ds, 10);
 
 Assert.assertTrue(ds.deleteAll());
 
 checkReaderBehavior(ds.newReader(), 0, (RecordValidator<Record>) null);
}

private static DatasetDescriptor copy(DatasetDescriptor descriptor) {
 // don't reuse the previous dataset's location and don't use durable
 // parquet writers because fault-tolerance is handled by OutputCommitter
 return new DatasetDescriptor.Builder(descriptor)
   .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "true")
   .location((URI) null)
   .build();
}

private static DatasetDescriptor getDatasetDescriptor(Schema schema, URI location) {
 return new DatasetDescriptor.Builder()
   .schema(schema)
   .location(location)
   .build();
}

private static DatasetDescriptor copy(DatasetDescriptor descriptor) {
 // don't reuse the previous dataset's location and don't use durable
 // parquet writers because fault-tolerance is handled by OutputCommitter
 return new DatasetDescriptor.Builder(descriptor)
   .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "true")
   .location((URI) null)
   .build();
}

private static DatasetDescriptor getDatasetDescriptor(Schema schema, URI location) {
 return new DatasetDescriptor.Builder()
   .schema(schema)
   .location(location)
   .build();
}

Javadoc

Configure the dataset's location (optional).

Popular methods of DatasetDescriptor$Builder

Popular in Java

Making http post requests using okhttp
addToBackStack (FragmentTransaction)
setRequestProperty (URLConnection)
putExtra (Intent)
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
BoxLayout (javax.swing)
CodeWhisperer alternatives

How to use locationmethodin org.kitesdk.data.DatasetDescriptor$Builder

Best Java code snippets using org.kitesdk.data.DatasetDescriptor$Builder.location (Showing top 20 results out of 315)

How to use
location
method
in
org.kitesdk.data.DatasetDescriptor$Builder