private DatasetProperties createProperties(OutputFormatProvider outputFormatProvider) { FileSetProperties.Builder properties = FileSetProperties.builder(); if (!Strings.isNullOrEmpty(tpfsSinkConfig.basePath)) { properties.setBasePath(tpfsSinkConfig.basePath); } properties.setOutputFormat(outputFormatProvider.getOutputFormatClassName()); for (Map.Entry<String, String> formatProperty : outputFormatProvider.getOutputFormatConfiguration().entrySet()) { properties.setOutputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileSetProperties(properties); return properties.build(); }
private DatasetProperties createProperties(InputFormatProvider inputFormatProvider) { FileSetProperties.Builder properties = FileSetProperties.builder(); if (!Strings.isNullOrEmpty(config.getBasePath())) { properties.setBasePath(config.getBasePath()); } properties.setInputFormat(inputFormatProvider.getInputFormatClassName()); for (Map.Entry<String, String> formatProperty : inputFormatProvider.getInputFormatConfiguration().entrySet()) { properties.setInputProperty(formatProperty.getKey(), formatProperty.getValue()); } addFileSetProperties(properties); return properties.build(); }
@Override public void initialize(SparkHttpServiceContext context) throws Exception { super.initialize(context); try { context.getAdmin().createDataset("wordcount", FileSet.class.getName(), FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .build()); } catch (InstanceConflictException e) { // It's ok if the dataset already exists } }
@Test(expected = DataSetException.class) public void testAbsolutePathInsideCDAPDouble() throws IOException, DatasetManagementException { // test that it rejects also paths that have // in them String absolutePath = dsFrameworkUtil.getConfiguration() .get(Constants.CFG_LOCAL_DATA_DIR).replace("/", "//").concat("/hello"); dsFrameworkUtil.createInstance("fileSet", DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("badFileSet"), FileSetProperties.builder().setBasePath(absolutePath).build()); }
@Before public void before() throws Exception { dsFrameworkUtil.createInstance("timePartitionedFileSet", TPFS_INSTANCE, FileSetProperties.builder() .setBasePath("testDir").build()); }
@Override protected void configure() { createDataset(INPUT_FILE_SET, FileSet.class, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).build()); createDataset(OUTPUT_FILE_SET, FileSet.class, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).build()); }
private FileSet createFileset(DatasetId dsid) throws IOException, DatasetManagementException { dsFrameworkUtil.createInstance("fileSet", dsid, FileSetProperties.builder() .setBasePath("testDir").build()); Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setInputPath(fileArgs, "some?File1"); FileSetArguments.setOutputPath(fileArgs, "some?File1"); return dsFrameworkUtil.getInstance(dsid, fileArgs); }
@Test public void testAbsolutePathLooksLikeCDAP() throws IOException, DatasetManagementException { String absolutePath = dsFrameworkUtil.getConfiguration().get(Constants.CFG_LOCAL_DATA_DIR).concat("-hello"); dsFrameworkUtil.createInstance("fileSet", testFileSetInstance4, FileSetProperties.builder().setBasePath(absolutePath).build()); }
@Test(expected = DataSetException.class) public void testAbsolutePathInsideCDAP() throws IOException, DatasetManagementException { String absolutePath = dsFrameworkUtil.getConfiguration().get(Constants.CFG_LOCAL_DATA_DIR).concat("/hello"); dsFrameworkUtil.createInstance("fileSet", DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("badFileSet"), FileSetProperties.builder().setBasePath(absolutePath).build()); }
private DatasetProperties buildFileSetProps(DatasetProperties extraProps, Boolean external, String path) { FileSetProperties.Builder builder = FileSetProperties.builder(); builder.addAll(extraProps.getProperties()); if (external != null) { builder.setDataExternal(external); } if (path != null) { builder.setBasePath(path); } return builder.build(); }
@Test(expected = IOException.class) public void testExternalNonExistentPath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/not/there"; // attempt to create an external dataset - should fail dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setDataExternal(true) .build()); }
@Test(expected = IOException.class) public void testPossessNonExistentPath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/not/there"; // attempt to create an external dataset - should fail dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setPossessExisting(true) .build()); }
@Test(expected = IllegalArgumentException.class) public void testReuseAndExternal() throws IOException, DatasetManagementException { dsFrameworkUtil.createInstance("fileSet", DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("badFileSet"), FileSetProperties.builder() .setDataExternal(true) .setUseExisting(true) .build()); }
@Test(expected = IllegalArgumentException.class) public void testPossessAndExternal() throws IOException, DatasetManagementException { dsFrameworkUtil.createInstance("fileSet", DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("badFileSet"), FileSetProperties.builder() .setDataExternal(true) .setPossessExisting(true) .build()); }
@Test(expected = IllegalArgumentException.class) public void testPossessAndReuse() throws IOException, DatasetManagementException { dsFrameworkUtil.createInstance("fileSet", DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("badFileSet"), FileSetProperties.builder() .setUseExisting(true) .setPossessExisting(true) .build()); }
@Test(expected = IOException.class) public void testReuseNonExistentPath() throws IOException, DatasetManagementException { // create an external dir and create a file in it String absolutePath = tmpFolder.newFolder() + "/not/there"; // attempt to create an external dataset - should fail dsFrameworkUtil.createInstance("fileSet", testFileSetInstance5, FileSetProperties.builder() .setBasePath(absolutePath) .setUseExisting(true) .build()); }
@Override public void configure() { createDataset("logs", FileSet.class, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).build()); createDataset("logStats", KeyValueTable.class.getName()); addSpark(new SparkLogParser()); addSpark(new ScalaSparkLogParser()); }
@Override public void configure() { setName("AppWithMapReduceUsingMultipleOutputs"); setDescription("Application with MapReduce job using multiple outputs"); createDataset(PURCHASES, "fileSet", FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .build()); createDataset(SEPARATED_PURCHASES, "fileSet", FileSetProperties.builder() .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, " ") .build()); addMapReduce(new SeparatePurchases()); addMapReduce(new InvalidMapReduce()); }
@Override protected void configure() { setName(SPARK); setMainClass(getClass()); createDataset(SPARK_INPUT, FileSet.class, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset(SPARK_TABLE, Table.class); usePlugin("t1", "n1", "plugin", PluginProperties.builder().add(KEY, TEST).build()); }
@Override protected void configure() { setName(WORKFLOW_NAME); setDescription("Workflow program with local datasets."); createLocalDataset(WORDCOUNT_DATASET, KeyValueTable.class); createLocalDataset(CSV_FILESET_DATASET, FileSet.class, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .build()); createLocalDataset(UNIQUE_ID_DATASET, KeyValueTable.class); addAction(new LocalDatasetWriter()); addSpark("JavaSparkCSVToSpaceConverter"); addMapReduce("WordCount"); addAction(new LocalDatasetReader("readerAction")); } }