private DatasetProperties createProperties(OutputFormatProvider outputFormatProvider) { FileSetProperties.Builder properties = FileSetProperties.builder(); if (!Strings.isNullOrEmpty(tpfsSinkConfig.basePath)) { properties.setBasePath(tpfsSinkConfig.basePath); } properties.setOutputFormat(outputFormatProvider.getOutputFormatClassName()); for (Map.Entry<String, String> formatProperty : outputFormatProvider.getOutputFormatConfiguration().entrySet()) { properties.setOutputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileSetProperties(properties); return properties.build(); }
private DatasetProperties createProperties(InputFormatProvider inputFormatProvider) { FileSetProperties.Builder properties = FileSetProperties.builder(); if (!Strings.isNullOrEmpty(config.getBasePath())) { properties.setBasePath(config.getBasePath()); } properties.setInputFormat(inputFormatProvider.getInputFormatClassName()); for (Map.Entry<String, String> formatProperty : inputFormatProvider.getInputFormatConfiguration().entrySet()) { properties.setInputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileSetProperties(properties); return properties.build(); }
private DatasetProperties createProperties(InputFormatProvider inputFormatProvider) { FileSetProperties.Builder properties = SnapshotFileSet.getBaseProperties(config); if (!Strings.isNullOrEmpty(config.getBasePath())) { properties.setBasePath(config.getBasePath()); } properties.setInputFormat(inputFormatProvider.getInputFormatClassName()); for (Map.Entry<String, String> formatProperty : inputFormatProvider.getInputFormatConfiguration().entrySet()) { properties.setInputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileProperties(properties); return properties.build(); }
.setBasePath(existingPath) .setPossessExisting(true) .build());
@Test public void testFixProperties() throws DatasetManagementException, UnsupportedTypeException { testFix("fileSet", FileSetProperties.builder().setBasePath("/tmp/nn").setDataExternal(true).build()); testFix(FileSet.class.getName(), FileSetProperties.builder().setEnableExploreOnCreate(true).setExploreFormat("csv").build()); testFix("timePartitionedFileSet", FileSetProperties.builder().setBasePath("relative").build()); testFix(TimePartitionedFileSet.class.getName(), FileSetProperties.builder().setBasePath("relative").add("custom", "value").build()); testFix("objectMappedTable", ObjectMappedTableProperties.builder().setType(TestObject.class) .setRowKeyExploreName("x").setRowKeyExploreType(Schema.Type.STRING) .setConflictDetection(ConflictDetection.NONE).build()); testFix(ObjectMappedTable.class.getName(), ObjectMappedTableProperties.builder().setType(TestObject.class) .setRowKeyExploreName("x").setRowKeyExploreType(Schema.Type.STRING) .setConflictDetection(ConflictDetection.NONE).build()); testFix("lineageDataset", DatasetProperties.EMPTY); testFix(LineageDataset.class.getName(), TableProperties.builder().setTTL(1000).build()); testFix(UsageDataset.class.getSimpleName(), DatasetProperties.EMPTY); testFix("table", TableProperties.builder().setColumnFamily("fam").build()); testFix("indexedTable", DatasetProperties.builder().add(IndexedTable.INDEX_COLUMNS_CONF_KEY, "a,c").build()); }
static void performAdmin(RuntimeContext context) { Admin admin = context.getAdmin(); Map<String, String> args = context.getRuntimeArguments(); try { // if invoked with dropAll=true, clean up all datasets (a, b, c, d) if ("true".equals(args.get("dropAll"))) { for (String name : new String[]{"a", "b", "c", "d"}) { if (admin.datasetExists(name)) { admin.dropDataset(name); } } } else { // create a, update b with /extra in base path, truncate c, drop d admin.createDataset("a", Table.class.getName(), DatasetProperties.EMPTY); String type = admin.getDatasetType("b"); Assert.assertEquals(FileSet.class.getName(), type); DatasetProperties bProps = admin.getDatasetProperties("b"); String base = bProps.getProperties().get("base.path"); Assert.assertNotNull(base); String newBase = args.get("new.base.path"); DatasetProperties newBProps = ((FileSetProperties.Builder) FileSetProperties.builder() .addAll(bProps.getProperties())).setDataExternal(true).setBasePath(newBase).build(); admin.updateDataset("b", newBProps); admin.truncateDataset("c"); admin.dropDataset("d"); } } catch (DatasetManagementException e) { Throwables.propagate(e); } }
@Test public void testReuseDoesNotDelete() throws IOException, DatasetManagementException { String existingPath = tmpFolder.newFolder() + "/existing/path"; File existingDir = new File(existingPath); existingDir.mkdirs(); File someFile = new File(existingDir, "some.file"); someFile.createNewFile(); // create an external dataset dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(existingPath) .setUseExisting(true) .build()); Assert.assertTrue(someFile.exists()); // truncate the file set dsFrameworkUtil.getFramework().truncateInstance(testFileSetInstance5); Assert.assertTrue(someFile.exists()); // truncate the file set dsFrameworkUtil.getFramework().deleteInstance(testFileSetInstance5); Assert.assertTrue(someFile.exists()); }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); String inputDataset = getConfig().inputDataset; String outputDataset = getConfig().outputDataset; createDataset(inputDataset, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); if (!outputDataset.equals(inputDataset)) { createDataset(outputDataset, "fileSet", FileSetProperties.builder() .setBasePath("foo/my-file-output") .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); } addMapReduce(new ComputeSum(getConfig())); }
@Test public void testNonExternalExistentPath() throws Exception { // Create an instance at a location String absolutePath = tmpFolder.newFolder() + "/some/existing/location"; File file = new File(absolutePath); Assert.assertTrue(file.mkdirs()); // Try to add another instance of non external fileset at the same location try { dsFrameworkUtil.createInstance("fileSet", testFileSetInstance6, FileSetProperties.builder() .setBasePath(absolutePath) .setDataExternal(false) .build()); Assert.fail("Expected IOException from createInstance()"); } catch (IOException e) { // expected } }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); createDataset(TIME_PARTITIONED, "timePartitionedFileSet", FileSetProperties.builder() // properties for file set .setBasePath("partitioned") .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build()); addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
@Test public void testAbsolutePath() throws IOException, DatasetManagementException { String absolutePath = tmpFolder.newFolder() + "/absolute/path"; dsFrameworkUtil.createInstance("fileSet", testFileSetInstance3, FileSetProperties.builder() .setBasePath(absolutePath).build()); // validate that the base path for the file set was created Assert.assertTrue(new File(absolutePath).isDirectory()); // instantiate the file set with an output path Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setOutputPath(fileArgs, "out"); FileSet fileSet = dsFrameworkUtil.getInstance(testFileSetInstance3, fileArgs); // write to the output path Assert.assertEquals(absolutePath + "/out", fileSet.getOutputLocation().toURI().getPath()); try (OutputStream out = fileSet.getOutputLocation().getOutputStream()) { out.write(42); } // validate that the file was created Assert.assertTrue(new File(absolutePath + "/out").isFile()); }
@Test(expected = IOException.class) public void testExternalNonExistentPath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/not/there"; // attempt to create an external dataset - should fail dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setDataExternal(true) .build()); }
@Test(expected = DataSetException.class) public void testAbsolutePathInsideCDAP() throws IOException, DatasetManagementException { String absolutePath = dsFrameworkUtil.getConfiguration().get(Constants.CFG_LOCAL_DATA_DIR).concat("/hello"); dsFrameworkUtil.createInstance("fileSet", DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("badFileSet"), FileSetProperties.builder().setBasePath(absolutePath).build()); }
private FileSet createFileset(DatasetId dsid) throws IOException, DatasetManagementException { dsFrameworkUtil.createInstance("fileSet", dsid, FileSetProperties.builder() .setBasePath("testDir").build()); Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setInputPath(fileArgs, "some?File1"); FileSetArguments.setOutputPath(fileArgs, "some?File1"); return dsFrameworkUtil.getInstance(dsid, fileArgs); }
@Test(expected = IOException.class) public void testReuseNonExistentPath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/not/there"; // attempt to create an external dataset - should fail dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setUseExisting(true) .build()); }
private DatasetProperties buildFileSetProps(DatasetProperties extraProps, Boolean external, String path) { FileSetProperties.Builder builder = FileSetProperties.builder(); builder.addAll(extraProps.getProperties()); if (external != null) { builder.setDataExternal(external); } if (path != null) { builder.setBasePath(path); } return builder.build(); }
@Test public void testAbsolutePathLooksLikeCDAP() throws IOException, DatasetManagementException { String absolutePath = dsFrameworkUtil.getConfiguration().get(Constants.CFG_LOCAL_DATA_DIR).concat("-hello"); dsFrameworkUtil.createInstance("fileSet", testFileSetInstance4, FileSetProperties.builder().setBasePath(absolutePath).build()); }
@Before public void before() throws Exception { dsFrameworkUtil.createInstance("timePartitionedFileSet", TPFS_INSTANCE, FileSetProperties.builder() .setBasePath("testDir").build()); }
@Test(expected = IOException.class) public void testPossessNonExistentPath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/not/there"; // attempt to create an external dataset - should fail dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setPossessExisting(true) .build()); }
@Test(expected = DataSetException.class) public void testAbsolutePathInsideCDAPDouble() throws IOException, DatasetManagementException { // test that it rejects also paths that have // in them String absolutePath = dsFrameworkUtil.getConfiguration() .get(Constants.CFG_LOCAL_DATA_DIR).replace("/", "//").concat("/hello"); dsFrameworkUtil.createInstance("fileSet", DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("badFileSet"), FileSetProperties.builder().setBasePath(absolutePath).build()); }