org.kitesdk.data.DatasetDescriptor$Builder.format java code examples

@Override
public int run(List<String> args) throws Exception {
 String inputUri = uri;
 String outputUri = "dataset:hive?dataset=correlated_events";
 if (args.size() == 1) {
  outputUri = args.get(0);
 }
 Preconditions.checkState(Datasets.exists(inputUri),
   "input dataset doesn't exists");
 if (!Datasets.exists(outputUri)) {
  Datasets.create(outputUri, new DatasetDescriptor.Builder()
    .format("avro")
    .schema(CorrelatedEvents.class)
    .build());
 }
 CorrelateEventsTask task = new CorrelateEventsTask(inputUri, outputUri);
 task.run();
 return 0;
}

 @Override
 public FileSystemWriter<Record> newWriter(Path directory, Schema datasetSchema, Schema writerSchema) {
  return FileSystemWriter.newWriter(fs, directory, 100, 2 * 1024 * 1024,
    new DatasetDescriptor.Builder()
      .property(
        "kite.writer.roll-interval-seconds", String.valueOf(10))
      .property(
        "kite.writer.target-file-size",
        String.valueOf(32 * 1024 * 1024)) // 32 MB
      .property(
        "kite.writer.fs-supports-rename", String.valueOf(false))
      .schema(datasetSchema)
      .format("avro")
      .build(), writerSchema);
 }
}

@BeforeClass
public static void setup() throws IOException {
 fs = LocalFileSystem.getInstance();
 testDirectory = new Path(Files.createTempDir().getAbsolutePath());
 FileSystemDatasetRepository repo = new FileSystemDatasetRepository(fs.getConf(),
   testDirectory);
 Dataset<StandardEvent> writerDataset = repo.create("ns", "test", new DatasetDescriptor.Builder()
                 .schema(StandardEvent.class)
                 .format(Formats.PARQUET)
                 .build(), StandardEvent.class);
 DatasetWriter<StandardEvent> writer = writerDataset.newWriter();
 for (long i = 0; i < totalRecords; i++) {
  String text = String.valueOf(i);
  writer.write(new StandardEvent(text, text, i, text, text, i));
 }
 writer.close();
 readerDataset = repo.load("ns", "test", GenericRecord.class);
}

@Test
@SuppressWarnings("deprecation")
public void testGetPartitionReturnsNullIfNoAutoCreate() throws IOException {
 PartitionStrategy partitionStrategy = new PartitionStrategy.Builder().hash(
  "username", 2).build();
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("partitioned-users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .format(format)
     .location(testDirectory)
     .partitionStrategy(partitionStrategy)
     .build())
   .type(Record.class)
   .build();
 Assert
  .assertNull(ds.getPartition(new PartitionKey(1), false));
}

@Test
public void signalReadyOnUnboundedDataset() {
 final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(
     new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
       .location(testDirectory).build())
   .type(Record.class)
   .uri(URIBuilder.build(URI.create("repo:" + testDirectory.toUri()), "ns", "name"))
   .build();
 Assert.assertFalse("Unbounded dataset has not been signaled", ds.isReady());
 ds.signalReady();
 Assert.assertTrue("Unbounded dataset has been signaled and should be ready", ds.isReady());
}

@Test
public void testWriteAndRead() throws IOException {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("test")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schemaUri(USER_SCHEMA_URL)
     .format(format)
     .compressionType(compressionType)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 Assert.assertFalse("Dataset is not partitioned", ds.getDescriptor()
  .isPartitioned());
 writeTestUsers(ds, 10);
 checkTestUsers(ds, 10);
}

@Test
public void testPathIterator_Directory() {
 FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(new DatasetDescriptor.Builder()
     .schema(USER_SCHEMA)
     .format(format)
     .compressionType(compressionType)
     .location(testDirectory)
     .build())
   .type(Record.class)
   .build();
 List<Path> dirPaths = Lists.newArrayList(ds.dirIterator());
 Assert.assertEquals("dirIterator for non-partitioned dataset should yield a single path.", 1, dirPaths.size());
 Assert.assertEquals("dirIterator should yield absolute paths.", testDirectory, dirPaths.get(0));
}

@Test
public void testMoveToTrashWithoutPartitions() {
 final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(
     new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
       .location(testDirectory).build())
   .type(Record.class)
   .build();
 writeTestUsers(ds, 10);
 Assert.assertTrue(ds.moveToTrash());
 checkReaderBehavior(ds.newReader(), 0, (RecordValidator<Record>) null);
}

private static DatasetDescriptor descriptor(FileSystem fs, Result.Table table)
  throws IOException {
 // inspect the path to determine the partition strategy
 PartitionStrategy strategy = strategy(fs, table.location);
 DatasetDescriptor.Builder builder = new DatasetDescriptor.Builder()
   .format(table.format)
   .schema(table.schema)
   .partitionStrategy(strategy)
   .location(table.location);
 if (table.depth < 0) {
  builder.property("kite.filesystem.mixed-depth", "true");
 }
 return builder.build();
}

@Before
public void setUp() throws Exception {
 super.setUp();
 inputDataset = repo.create("ns", "in",
   new DatasetDescriptor.Builder()
     .property("kite.allow.csv", "true")
     .schema(STRING_SCHEMA)
     .format(format)
     .build(), GenericData.Record.class);
 outputDataset = repo.create("ns", "out",
   new DatasetDescriptor.Builder()
     .property("kite.allow.csv", "true")
     .schema(STATS_SCHEMA)
     .format(format)
     .build(), GenericData.Record.class);
}

/**
 * Configure the dataset's format from a format name String (optional). If
 * not specified, {@link Formats#AVRO} is used by default.
 *
 * @param formatName a String format name
 * @return An instance of the builder for method chaining.
 * @throws UnknownFormatException if the format name is not recognized.
 *
 * @since 0.8.0
 */
public Builder format(String formatName) {
 return this.format(Formats.fromString(formatName));
}

@Test
public void testParquetConfiguration() throws IOException {
 FileSystem fs = LocalFileSystem.getInstance();
 FileSystemWriter<Object> writer = FileSystemWriter.newWriter(
   fs, new Path("/tmp"), -1, -1,
   new DatasetDescriptor.Builder()
     .property("parquet.block.size", "34343434")
     .schema(SCHEMA)
     .format("parquet")
     .build(), SCHEMA);
 Assert.assertEquals("Should copy properties to Configuration",
   34343434, writer.conf.getInt("parquet.block.size", -1));
}

@Test(expected = UnknownFormatException.class)
public void testUnknownFormat() throws IOException {
 final DatasetDescriptor descriptor = new DatasetDescriptor.Builder()
   .schema(STRING_SCHEMA)
   .format(Accessor.getDefault().newFormat("explode!"))
   .build();
 MultiFileDatasetReader<Record> reader = new MultiFileDatasetReader<Record>(
   fileSystem, Lists.newArrayList(TEST_FILE), descriptor, CONSTRAINTS,
   ACCESSOR);
 try {
  reader.initialize();
 } finally {
  reader.close();
 }
}

@Test
public void testConfigureDurableParquetAppender() throws IOException {
 FileSystem fs = LocalFileSystem.getInstance();
 FileSystemWriter<Object> writer = FileSystemWriter.newWriter(
   fs, new Path("/tmp"), -1, -1,
   new DatasetDescriptor.Builder()
     .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "false")
     .schema(SCHEMA)
     .format("parquet")
     .build(), SCHEMA);
 Assert.assertEquals("Disabling the non-durable parquet appender should get us a durable appender",
   DurableParquetAppender.class, writer.newAppender(testDirectory).getClass());
}

@Override
public FileSystemWriter<Record> newWriter(Path directory, Schema datasetSchema, Schema writerSchema) {
 return FileSystemWriter.newWriter(fs, directory, 100, 2 * 1024 * 1024,
   new DatasetDescriptor.Builder()
     .property(
       "kite.writer.roll-interval-seconds", String.valueOf(10))
     .property(
       "kite.writer.target-file-size",
       String.valueOf(32 * 1024 * 1024)) // 32 MB
     .schema(datasetSchema)
     .format("parquet")
     .build(), writerSchema);
}

@Test
public void testDeleteAllWithoutPartitions() {
 final FileSystemDataset<Record> ds = new FileSystemDataset.Builder<Record>()
   .namespace("ns")
   .name("users")
   .configuration(getConfiguration())
   .descriptor(
     new DatasetDescriptor.Builder().schema(USER_SCHEMA).format(format)
       .location(testDirectory).build())
   .type(Record.class)
   .build();
 
 writeTestUsers(ds, 10);
 
 Assert.assertTrue(ds.deleteAll());
 
 checkReaderBehavior(ds.newReader(), 0, (RecordValidator<Record>) null);
}

@Before
@Override
public void setUp() throws Exception {
 super.setUp();
 dataset = repo.create("ns", "out",
   new DatasetDescriptor.Builder()
     .property("kite.allow.csv", "true")
     .schema(STATS_SCHEMA)
     .format(format)
     .build(), GenericData.Record.class);
}

@Override
public FileSystemWriter<Record> newWriter(Path directory, Schema datasetSchema, Schema writerSchema) {
 return FileSystemWriter.newWriter(fs, directory, 100, 2 * 1024 * 1024,
   new DatasetDescriptor.Builder()
     .property(
       "kite.writer.roll-interval-seconds", String.valueOf(10))
     .property(
       "kite.writer.target-file-size",
       String.valueOf(32 * 1024 * 1024)) // 32 MB
     .schema(datasetSchema)
     .format("avro")
     .build(), writerSchema);
}

@Test
public void testConfigureNonDurableParquetAppender() throws IOException {
 FileSystem fs = LocalFileSystem.getInstance();
 FileSystemWriter<Object> writer = FileSystemWriter.newWriter(
   fs, new Path("/tmp"), -1, -1,
   new DatasetDescriptor.Builder()
     .property(FileSystemProperties.NON_DURABLE_PARQUET_PROP, "true")
     .schema(SCHEMA)
     .format("parquet")
     .build(), SCHEMA);
 Assert.assertEquals("Enabling the non-durable parquet appender should get us a non-durable appender",
   ParquetAppender.class, writer.newAppender(testDirectory).getClass());
}

@Before
@Override
public void setUp() throws Exception {
 super.setUp();
 outputDataset = repo.create("ns", "out",
  new DatasetDescriptor.Builder()
  .property("kite.allow.csv", "true")
  .schema(STATS_SCHEMA)
  .format(format)
  .build(), GenericData.Record.class);
}

Javadoc

Configure the dataset's format from a format name String (optional). If not specified, Formats#AVRO is used by default.

Popular methods of DatasetDescriptor$Builder

Popular in Java

Making http post requests using okhttp
addToBackStack (FragmentTransaction)
setRequestProperty (URLConnection)
putExtra (Intent)
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
BoxLayout (javax.swing)
Top Vim plugins

How to use formatmethodin org.kitesdk.data.DatasetDescriptor$Builder

Best Java code snippets using org.kitesdk.data.DatasetDescriptor$Builder.format (Showing top 20 results out of 315)

How to use
format
method
in
org.kitesdk.data.DatasetDescriptor$Builder