org.kitesdk.data.DatasetDescriptor$Builder.property java code examples

public DatasetDescriptor addToDescriptor(DatasetDescriptor descriptor) {
 DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder(descriptor)
   .property(CHARSET_PROPERTY, charset)
   .property(DELIMITER_PROPERTY, delimiter)
   .property(ESCAPE_CHAR_PROPERTY, escape)
   .property(QUOTE_CHAR_PROPERTY, quote)
   .property(HAS_HEADER_PROPERTY, Boolean.toString(useHeader))
   .property(LINES_TO_SKIP_PROPERTY, Integer.toString(linesToSkip));
 if (header != null) {
  builder.property(HEADER_PROPERTY, header);
 }
 return builder.build();
}

 @Override
 public FileSystemWriter<Record> newWriter(Path directory, Schema datasetSchema, Schema writerSchema) {
  return FileSystemWriter.newWriter(fs, directory, 100, 2 * 1024 * 1024,
    new DatasetDescriptor.Builder()
      .property(
        "kite.writer.roll-interval-seconds", String.valueOf(10))
      .property(
        "kite.writer.target-file-size",
        String.valueOf(32 * 1024 * 1024)) // 32 MB
      .property(
        "kite.writer.fs-supports-rename", String.valueOf(false))
      .schema(datasetSchema)
      .format("avro")
      .build(), writerSchema);
 }
}

@Override
public FileSystemWriter<Record> newWriter(Path directory, Schema datasetSchema, Schema writerSchema) {
 return FileSystemWriter.newWriter(fs, directory, 100, 2 * 1024 * 1024,
   new DatasetDescriptor.Builder()
     .property(
       "kite.writer.roll-interval-seconds", String.valueOf(10))
     .property(
       "kite.writer.target-file-size",
       String.valueOf(32 * 1024 * 1024)) // 32 MB
     .schema(datasetSchema)
     .format("avro")
     .build(), writerSchema);
}

@Test
public void testTSV() {
 final DatasetDescriptor desc = new DatasetDescriptor.Builder()
   .property("kite.csv.delimiter", "\t")
   .property("kite.csv.lines-to-skip", "1")
   .schema(STRINGS)
   .build();

@Before
public void createTestDatasets() {
 Datasets.delete("dataset:file:/tmp/datasets/unpartitioned");
 Datasets.delete("dataset:file:/tmp/datasets/partitioned");
 Datasets.delete("dataset:file:/tmp/datasets/temporary");
 DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
   .schema(TestRecord.class)
   .build();
 unpartitioned = Datasets.create("dataset:file:/tmp/datasets/unpartitioned",
   descriptor, TestRecord.class);
 descriptor = new DatasetDescriptor.Builder(descriptor)
   .property("kite.writer.cache-size", "20")
   .partitionStrategy(new PartitionStrategy.Builder()
     .hash("id", 4)
     .build())
   .build();
 partitioned = Datasets.create("dataset:file:/tmp/datasets/partitioned",
   descriptor, TestRecord.class);
 // create a second dataset with the same partitioning for replacement parts
 temporary = Datasets.create("dataset:file:/tmp/datasets/temporary",
   descriptor, TestRecord.class);
 writeTestRecords(unpartitioned);
 writeTestRecords(partitioned);
 writeTestRecords(temporary);
}

@Test
public void testCustomProperties() {
 final String propName = "my.custom.property";
 final String propValue = "string";
 DatasetDescriptor descriptorWithProp =
   new DatasetDescriptor.Builder(testDescriptor)
   .property(propName, propValue)
   .build();
 DatasetDescriptor created = provider.create(NAMESPACE, NAME, descriptorWithProp);
 Assert.assertTrue("Should have custom property",
   created.hasProperty(propName));
 Assert.assertEquals("Should have correct custom property value",
   propValue, created.getProperty(propName));
 Assert.assertTrue("List should contain property name",
   created.listProperties().contains(propName));
 DatasetDescriptor loaded = provider.load(NAMESPACE, NAME);
 Assert.assertTrue("Should have custom property",
   loaded.hasProperty(propName));
 Assert.assertEquals("Should have correct custom property value",
   propValue, loaded.getProperty(propName));
 Assert.assertTrue("List should contain property name",
   created.listProperties().contains(propName));
}

 @Test
 public void testUpdate() throws IOException {
  DatasetDescriptor updated = new DatasetDescriptor.Builder(descriptor)
    .property("parquet.block.size", "1024")
    .build();

  DatasetDescriptor saved = provider.update("default", "old_2", updated);
  Assert.assertNotNull("Should find saved metadata", saved);
  Assert.assertEquals("Should update old dataset successfully",
    updated.getProperty("parquet.block.size"),
    saved.getProperty("parquet.block.size"));

  DatasetDescriptor loaded = provider.load("default", "old_2");
  Assert.assertNotNull("Should find saved metadata", loaded);
  Assert.assertEquals("Should make changes on disk",
    updated.getProperty("parquet.block.size"),
    loaded.getProperty("parquet.block.size"));

  Assert.assertFalse("Should not move metadata to new location",
    local.exists(new Path(root, new Path("default", "old_2"))));
 }
}

@Before
public void setUp() throws Exception {
 super.setUp();
 inputDataset = repo.create("ns", "in",
   new DatasetDescriptor.Builder()
     .property("kite.allow.csv", "true")
     .schema(STRING_SCHEMA)
     .format(format)
     .build(), GenericData.Record.class);
 outputDataset = repo.create("ns", "out",
   new DatasetDescriptor.Builder()
     .property("kite.allow.csv", "true")
     .schema(STATS_SCHEMA)
     .format(format)
     .build(), GenericData.Record.class);
}

private static DatasetDescriptor descriptor(FileSystem fs, Result.Table table)
  throws IOException {
 // inspect the path to determine the partition strategy
 PartitionStrategy strategy = strategy(fs, table.location);
 DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder()
   .format(table.format)
   .schema(table.schema)
   .partitionStrategy(strategy)
   .location(table.location);
 if (table.depth < 0) {
  builder.property("kite.filesystem.mixed-depth", "true");
 }
 return builder.build();
}

@Test
public void testConfigureDurableParquetAppender() throws IOException {
 FileSystem fs = LocalFileSystem.getInstance();
 FileSystemWriter<Object> writer = FileSystemWriter.newWriter(
   fs, new Path("/tmp"), -1, -1,
   new DatasetDescriptor.Builder()
     .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "false")
     .schema(SCHEMA)
     .format("parquet")
     .build(), SCHEMA);
 Assert.assertEquals("Disabling the non-durable parquet appender should get us a durable appender",
   DurableParquetAppender.class, writer.newAppender(testDirectory).getClass());
}

@Override
public FileSystemWriter<Record> newWriter(Path directory, Schema datasetSchema, Schema writerSchema) {
 return FileSystemWriter.newWriter(fs, directory, 100, 2 * 1024 * 1024,
   new DatasetDescriptor.Builder()
     .property(
       "kite.writer.roll-interval-seconds", String.valueOf(10))
     .property(
       "kite.writer.target-file-size",
       String.valueOf(32 * 1024 * 1024)) // 32 MB
     .schema(datasetSchema)
     .format("parquet")
     .build(), writerSchema);
}

@Test
public void testParquetConfiguration() throws IOException {
 FileSystem fs = LocalFileSystem.getInstance();
 FileSystemWriter<Object> writer = FileSystemWriter.newWriter(
   fs, new Path("/tmp"), -1, -1,
   new DatasetDescriptor.Builder()
     .property("parquet.block.size", "34343434")
     .schema(SCHEMA)
     .format("parquet")
     .build(), SCHEMA);
 Assert.assertEquals("Should copy properties to Configuration",
   34343434, writer.conf.getInt("parquet.block.size", -1));
}

private static DatasetDescriptor copy(DatasetDescriptor descriptor) {
 // don't reuse the previous dataset's location and don't use durable
 // parquet writers because fault-tolerance is handled by OutputCommitter
 return new DatasetDescriptor.Builder(descriptor)
   .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "true")
   .location((URI) null)
   .build();
}

@Override
public DatasetReader<Text> newReader() throws IOException {
 DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
   .property(InputFormatUtil.INPUT_FORMAT_CLASS_PROP,
     "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
   .property(InputFormatUtil.INPUT_FORMAT_RECORD_PROP, "value")
   .schema(Schema.create(Schema.Type.STRING))
   .build();
 return new InputFormatReader<Text>(localfs, userFile, descriptor);
}

@Before
@Override
public void setUp() throws Exception {
 super.setUp();
 dataset = repo.create("ns", "out",
   new DatasetDescriptor.Builder()
     .property("kite.allow.csv", "true")
     .schema(STATS_SCHEMA)
     .format(format)
     .build(), GenericData.Record.class);
}

@Test
public void testConfigureNonDurableParquetAppender() throws IOException {
 FileSystem fs = LocalFileSystem.getInstance();
 FileSystemWriter<Object> writer = FileSystemWriter.newWriter(
   fs, new Path("/tmp"), -1, -1,
   new DatasetDescriptor.Builder()
     .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "true")
     .schema(SCHEMA)
     .format("parquet")
     .build(), SCHEMA);
 Assert.assertEquals("Enabling the non-durable parquet appender should get us a non-durable appender",
   ParquetAppender.class, writer.newAppender(testDirectory).getClass());
}

@Before
@Override
public void setUp() throws Exception {
 super.setUp();
 outputDataset = repo.create("ns", "out",
  new DatasetDescriptor.Builder()
  .property("kite.allow.csv", "true")
  .schema(STATS_SCHEMA)
  .format(format)
  .build(), GenericData.Record.class);
}

private static DatasetDescriptor copy(DatasetDescriptor descriptor) {
 // don't reuse the previous dataset's location and don't use durable
 // parquet writers because fault-tolerance is handled by OutputCommitter
 return new DatasetDescriptor.Builder(descriptor)
   .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "true")
   .location((URI) null)
   .build();
}

@Override
public DatasetReader<GenericData.Record> newReader() throws IOException {
 final DatasetDescriptor desc = new DatasetDescriptor.Builder()
   .property("kite.csv.has-header", "true")
   .schema(VALIDATOR_SCHEMA)
   .build();
 return new CSVFileReader<GenericData.Record>(localfs, validatorFile, desc,
   DataModelUtil.accessor(GenericData.Record.class, desc.getSchema()));
}

@Override
public DatasetReader<LongWritable> newReader() throws IOException {
 DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
   .property(InputFormatUtil.INPUT_FORMAT_CLASS_PROP,
     "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
   .property(InputFormatUtil.INPUT_FORMAT_RECORD_PROP, "key")
   .schema(Schema.create(Schema.Type.LONG))
   .build();
 return new InputFormatReader<LongWritable>(localfs, userFile, descriptor);
}

Javadoc

Add a key-value property to the descriptor.

Popular methods of DatasetDescriptor$Builder

Popular in Java

Making http post requests using okhttp
addToBackStack (FragmentTransaction)
setRequestProperty (URLConnection)
putExtra (Intent)
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
BoxLayout (javax.swing)
Top Vim plugins

How to use propertymethodin org.kitesdk.data.DatasetDescriptor$Builder

Best Java code snippets using org.kitesdk.data.DatasetDescriptor$Builder.property (Showing top 20 results out of 315)

How to use
property
method
in
org.kitesdk.data.DatasetDescriptor$Builder