.setPartitioning(PARTITIONING) .setEnableExploreOnCreate(true) .setOutputFormat(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class) .setOutputProperty(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.SEPERATOR, ",")
.setPartitioning(PARTITIONING_1) .setBasePath(absolutePath.getPath()) .setDataExternal(true) .build());
/** * Sets the base path for the file dataset. */ public Builder setPartitioning(Partitioning partitioning) { StringBuilder builder = new StringBuilder(); String sep = ""; for (String key : partitioning.getFields().keySet()) { builder.append(sep).append(key); sep = ","; } add(PARTITIONING_FIELDS, builder.toString()); for (Map.Entry<String, Partitioning.FieldType> entry : partitioning.getFields().entrySet()) { add(PARTITIONING_FIELD_PREFIX + entry.getKey(), entry.getValue().name()); } return this; }
DatasetProperties props = PartitionedFileSetProperties.builder().setPartitioning( Partitioning.builder().addIntField("i").addStringField("s").build()).build(); DatasetSpecification spec = pfsDef.configure("pfs", props); DatasetProperties noIprops = PartitionedFileSetProperties.builder().setPartitioning( Partitioning.builder().addStringField("s").build()).build(); try { ((Reconfigurable) pfsDef).reconfigure("pfs", noIprops, spec); DatasetProperties longIprops = PartitionedFileSetProperties.builder().setPartitioning( Partitioning.builder().addLongField("i").addStringField("s").build()).build(); try { ((Reconfigurable) pfsDef).reconfigure("pfs", longIprops, spec); DatasetProperties revProps = PartitionedFileSetProperties.builder().setPartitioning( Partitioning.builder().addStringField("s").addIntField("i").build()).build(); try { ((Reconfigurable) pfsDef).reconfigure("pfs", revProps, spec); .setPartitioning(Partitioning.builder().addStringField("s").build()) .add(PartitionedFileSetDefinition.NAME_AS_BASE_PATH_DEFAULT, "false") .build(); DatasetSpecification oldSpec = pfsDef.configure("pfs", oldProps); .setPartitioning(Partitioning.builder().addStringField("s").build()) .build(); oldSpec = pfsDef.configure("pfs", props); newSpec = ((Reconfigurable) pfsDef).reconfigure("pfs", props, oldSpec);
addDatasetInstance(PartitionedFileSet.class.getName(), orcPFS, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addLongField("time").build()) .setOutputFormat(OrcNewOutputFormat.class)
.setPartitioning(Partitioning.builder().addLongField("time").build()) .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") .setPartitioning(Partitioning.builder().addLongField("time").build()) .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",")
@Override public void configure() { try { createDataset("fs", FileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(MyTextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("x").build()) .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("tpfs", TimePartitionedFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("myfs", MyFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); addSpark(new FileCountSparkProgram()); addSpark(new ScalaFileCountSparkProgram()); } catch (Throwable t) { throw Throwables.propagate(t); } }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); Class<? extends InputFormat> inputFormatClass = getConfig().isUseCombineFileInputFormat() ? CombineTextInputFormat.class : TextInputFormat.class; createDataset(PARTITIONED, "partitionedFileSet", PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder() .addStringField("type") .addLongField("time") .build()) // properties for file set .setBasePath("partitioned") .setInputFormat(inputFormatClass) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build()); addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
@Test public void testMultipleTransitiveDependencies() throws DatasetManagementException, IOException { // Adding modules DatasetFramework framework = getFramework(); try { framework.addModule(IN_MEMORY, new InMemoryTableModule()); framework.addModule(CORE, new CoreDatasetsModule()); framework.addModule(FILE, new FileSetModule()); framework.addModule(PFS, new PartitionedFileSetModule()); framework.addModule(TWICE, new SingleTypeModule(EmbedsTableTwiceDataset.class)); // Creating an instances framework.addInstance(EmbedsTableTwiceDataset.class.getName(), MY_DS, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("x").build()) .build()); Assert.assertTrue(framework.hasInstance(MY_DS)); framework.getDataset(MY_DS, DatasetProperties.EMPTY.getProperties(), null); } finally { framework.deleteAllInstances(NAMESPACE_ID); framework.deleteAllModules(NAMESPACE_ID); } }
@Test public void testDefaultBasePath() throws Exception { DatasetId id = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPath"); dsFrameworkUtil.createInstance("partitionedFileSet", id, PartitionedFileSetProperties.builder() .setPartitioning(PARTITIONING_1) .build()); PartitionedFileSet pfs = dsFrameworkUtil.getInstance(id); Location baseLocation = pfs.getEmbeddedFileSet().getBaseLocation(); Assert.assertEquals(baseLocation.getName(), id.getDataset()); Assert.assertTrue(baseLocation.exists()); Assert.assertTrue(baseLocation.isDirectory()); DatasetId fid = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPathFileSet"); dsFrameworkUtil.createInstance("fileSet", fid, FileSetProperties.builder().build()); FileSet fs = dsFrameworkUtil.getInstance(fid); Location fsBaseLocation = fs.getBaseLocation(); Assert.assertEquals(Locations.getParent(baseLocation), Locations.getParent(fsBaseLocation)); dsFrameworkUtil.deleteInstance(fid); dsFrameworkUtil.deleteInstance(id); Assert.assertFalse(baseLocation.exists()); }
@Override public void configure() { setName("AppWithMapReduce"); setDescription("Application with MapReduce job"); createDataset("beforeSubmit", KeyValueTable.class); createDataset("onFinish", KeyValueTable.class); createDataset("timeSeries", TimeseriesTable.class); createDataset("counters", Table.class); createDataset("countersFromContext", Table.class); createDataset("recorder", KeyValueTable.class); createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("x").build()) .setOutputFormat(TextOutputFormat.class).build()); addMapReduce(new ClassicWordCount()); addMapReduce(new AggregateTimeseriesByTag()); addMapReduce(new FaiiingMR()); addMapReduce(new ExplicitFaiiingMR()); addMapReduce(new MapReduceWithFailingOutputCommitter()); }
@Override public void configure() { addService(new PartitionService()); // Create a partitioned file set, configure it to work with MapReduce and with Explore createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("partition").addIntField("sub-partition").build()) // Properties for file set .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true) .setExploreFormat("csv") .setExploreSchema("f1 STRING, f2 INT") .setDescription("App for testing authorization in partitioned filesets.") .build()); }
public static PartitionedFileSetProperties.Builder getBaseProperties(SnapshotFileSetConfig config) { PartitionedFileSetProperties.Builder propertiesBuilder = PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addLongField(SNAPSHOT_FIELD).build()); if (!Strings.isNullOrEmpty(config.getBasePath())) { propertiesBuilder.setBasePath(config.getBasePath()); } try { Map<String, String> properties = GSON.fromJson(config.getFileProperties(), MAP_TYPE); if (properties != null) { propertiesBuilder.addAll(properties); } } catch (Exception e) { throw new IllegalArgumentException("Could not decode the 'properties' setting. Please check that it " + "is a JSON Object of string to string. Failed with error: " + e.getMessage(), e); } return propertiesBuilder; }
@Override public void configure() { createDataset(INPUT, KeyValueTable.class.getName(), DatasetProperties.EMPTY); // create two pfs, identical except for their (table) names for (String name : new String[] { PFS, OTHER }) { createDataset(name, PartitionedFileSet.class.getName(), PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("number").build()) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") .setEnableExploreOnCreate(true) .setExploreTableName(name) .setExploreSchema("key STRING, value STRING") .setExploreFormat("csv") .build()); } addMapReduce(new PartitionWriterMR()); }
@Before public void before() throws Exception { txClient = new InMemoryTxSystemClient(dsFrameworkUtil.getTxManager()); dsFrameworkUtil.createInstance("partitionedFileSet", pfsInstance, PartitionedFileSetProperties.builder() .setPartitioning(PARTITIONING_1) .setTablePermissions(tablePermissions) .setBasePath("testDir") .setFilePermissions(fsPermissions) .setFileGroup(group) .build()); pfsBaseLocation = ((PartitionedFileSet) dsFrameworkUtil.getInstance(pfsInstance)) .getEmbeddedFileSet().getBaseLocation(); Assert.assertTrue(pfsBaseLocation.exists()); }
@Test public void testFileSetReconfigure() throws IncompatibleUpdateException { testFileSetReconfigure(registry.get(FileSet.class.getName())); testFileSetReconfigure(registry.get(PartitionedFileSet.class.getName()), PartitionedFileSetProperties.builder().setPartitioning( Partitioning.builder().addIntField("i").build()).build()); testFileSetReconfigure(registry.get(TimePartitionedFileSet.class.getName())); }
/** * @return a properties builder */ public static Builder builder() { return new Builder(); }
@Override public DatasetSpecification configure(String instanceName, DatasetProperties properties) { // add the partition key to the properties. properties = PartitionedFileSetProperties .builder() .setPartitioning(TimePartitionedFileSetDataset.PARTITIONING) .addAll(properties.getProperties()) .build(); return super.configure(instanceName, properties); }
@Override public void configure() { // A PFS for storing uploaded file createDataset(PFS_NAME, PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addLongField("time").build()) .setInputFormat(TextInputFormat.class) .build() ); // A KV table for tracking chunks sizes createDataset(KV_TABLE_NAME, KeyValueTable.class); addService(SERVICE_NAME, new FileHandler()); }
@Override public DatasetSpecification reconfigure(String instanceName, DatasetProperties properties, DatasetSpecification currentSpec) throws IncompatibleUpdateException { // add the partition key to the properties. properties = PartitionedFileSetProperties .builder() .setPartitioning(TimePartitionedFileSetDataset.PARTITIONING) .addAll(properties.getProperties()) .build(); return super.reconfigure(instanceName, properties, currentSpec); }