@Override public void run(DatasetContext context) throws Exception { FileSet checkpointFileSet = context.getDataset(DataStreamsApp.CHECKPOINT_FILESET); checkpointBaseRef.set(checkpointFileSet.getBaseLocation()); } }, Exception.class);
@Override public Location getLocation() { return partitionedFileSetDataset.getEmbeddedFileSet().getLocation(relativePath); }
@Override public String getInputFormatClassName() { Collection<PartitionKey> inputKeys = getInputKeys(); if (inputKeys.isEmpty()) { return EmptyInputFormat.class.getName(); } return files.getInputFormatClassName(); }
@Override public Map<String, String> getInputFormatConfiguration() { Collection<PartitionKey> inputKeys = getInputKeys(); List<Location> inputLocations = new ArrayList<>(inputKeys.size()); Map<String, PartitionKey> pathToKey = new HashMap<>(inputKeys.size()); for (PartitionKey key : inputKeys) { PartitionDetail partition = getPartition(key); String path = Objects.requireNonNull(partition).getRelativePath(); Location partitionLocation = files.getLocation(path); inputLocations.add(partitionLocation); pathToKey.put(partitionLocation.toURI().toString(), key); } Map<String, String> inputFormatConfiguration = files.getInputFormatConfiguration(inputLocations); inputFormatConfiguration.put(PATH_TO_PARTITIONING_MAPPING, GSON.toJson(pathToKey)); return inputFormatConfiguration; }
@Override public Map<String, String> getOutputFormatConfiguration() { checkNotExternal(); // copy the output properties of the embedded file set to the output arguments Map<String, String> outputArgs = new HashMap<>(files.getOutputFormatConfiguration()); // we set the file set's output path in the definition's getDataset(), so there is no need to configure it again. // here we just want to validate that an output partition key or dynamic partitioner was specified in the arguments. PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning()); if (outputKey == null) { String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(runtimeArguments); if (dynamicPartitionerClassName == null) { throw new DataSetException( "Either a Partition key or a DynamicPartitioner class must be given as a runtime argument."); } copyDynamicPartitionerArguments(runtimeArguments, outputArgs); outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, files.getOutputFormatClassName()); outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET, getName()); } else { assertNotExists(outputKey, true); } return ImmutableMap.copyOf(outputArgs); }
location2 = pfs.getEmbeddedFileSet().getLocation(path2); try (Writer writer = new OutputStreamWriter(location2.append("file").getOutputStream())) { writer.write("2,2\n"); String basePath = pfs.getEmbeddedFileSet().getBaseLocation().toURI().getPath(); String absPath3 = location3.toURI().getPath(); Assert.assertTrue(absPath3.startsWith(basePath));
.builder().setBasePath("some/path").setInputFormat(TextInputFormat.class).build()); DataSetManager<FileSet> bManager = getDataset("b"); String bFormat = bManager.get().getInputFormatClassName(); String bPath = bManager.get().getBaseLocation().toURI().getPath(); Assert.assertTrue(bPath.endsWith("some/path/")); bManager.flush(); Assert.assertEquals(bFormat, bManager.get().getInputFormatClassName()); String newBPath = bManager.get().getBaseLocation().toURI().getPath(); Assert.assertTrue(newBPath.endsWith("/extra/"));
@Test public void testWriteRead() throws IOException, DatasetManagementException { FileSet fileSet1 = createFileset(testFileSetInstance1); FileSet fileSet2 = createFileset(testFileSetInstance2); Location fileSet1Output = fileSet1.getOutputLocation(); Location fileSet2Output = fileSet2.getOutputLocation(); Location fileSet1NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet1Output))); Location fileSet2NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet2Output))); Assert.assertNotNull(fileSet1NsDir); Assert.assertNotNull(fileSet2NsDir); Assert.assertEquals(fileSet1NsDir.getName(), DatasetFrameworkTestUtil.NAMESPACE_ID.getNamespace()); Assert.assertEquals(fileSet2NsDir.getName(), OTHER_NAMESPACE.getNamespace()); Assert.assertNotEquals(fileSet1.getInputLocations().get(0).toURI().getPath(), fileSet2.getInputLocations().get(0).toURI().getPath()); Assert.assertNotEquals(fileSet1Output.toURI().getPath(), fileSet2Output.toURI().getPath()); try (OutputStream out = fileSet1.getOutputLocation().getOutputStream()) { out.write(42); } try (OutputStream out = fileSet2.getOutputLocation().getOutputStream()) { out.write(54); } try (InputStream in = fileSet1.getInputLocations().get(0).getInputStream()) { Assert.assertEquals(42, in.read()); } try (InputStream in = fileSet2.getInputLocations().get(0).getInputStream()) { Assert.assertEquals(54, in.read()); } }
@Override public void run(DatasetContext context) throws Exception { Map<String, String> args = sec.getRuntimeArguments(); String outputPath = args.get("output.path"); Map<String, String> fileSetArgs = new HashMap<>(); FileSetArguments.setOutputPath(fileSetArgs, outputPath); FileSet fileSet = context.getDataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET, fileSetArgs); try (PrintWriter writer = new PrintWriter(fileSet.getOutputLocation().getOutputStream())) { for (String line : converted) { writer.write(line); writer.println(); } } } });
@Test public void testInputOutputFormatClassAtRuntime() throws Exception { // create a dataset with text input and output formats DatasetId datasetId = OTHER_NAMESPACE.dataset("testRuntimeFormats"); dsFrameworkUtil.createInstance("fileSet", datasetId, FileSetProperties.builder() .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .build()); // without passing anything in arguments, the input/output format classes will come from dataset properties FileSet fs = dsFrameworkUtil.getInstance(datasetId); Assert.assertEquals(TextInputFormat.class.getName(), fs.getInputFormatClassName()); Assert.assertEquals(TextOutputFormat.class.getName(), fs.getOutputFormatClassName()); // allow overriding the input format in dataset runtime args fs = dsFrameworkUtil.getInstance(datasetId, ImmutableMap.of( FileSetProperties.INPUT_FORMAT, CombineTextInputFormat.class.getName())); Assert.assertEquals(CombineTextInputFormat.class.getName(), fs.getInputFormatClassName()); Assert.assertEquals(TextOutputFormat.class.getName(), fs.getOutputFormatClassName()); // allow overriding both the input and output format in dataset runtime args fs = dsFrameworkUtil.getInstance(datasetId, ImmutableMap.of( FileSetProperties.INPUT_FORMAT, CombineTextInputFormat.class.getName(), FileSetProperties.OUTPUT_FORMAT, NullOutputFormat.class.getName())); Assert.assertEquals(CombineTextInputFormat.class.getName(), fs.getInputFormatClassName()); Assert.assertEquals(NullOutputFormat.class.getName(), fs.getOutputFormatClassName()); }
@Override public String getOutputFormatClassName() { return delegate.getOutputFormatClassName(); }
@Override public Map<String, String> getInputFormatConfiguration() { return delegate.getInputFormatConfiguration(); }
@Override public Map<String, String> getInputFormatConfiguration() { Collection<PartitionKey> inputKeys = getInputKeys(); List<Location> inputLocations = new ArrayList<>(inputKeys.size()); Map<String, PartitionKey> pathToKey = new HashMap<>(inputKeys.size()); for (PartitionKey key : inputKeys) { PartitionDetail partition = getPartition(key); String path = Objects.requireNonNull(partition).getRelativePath(); Location partitionLocation = files.getLocation(path); inputLocations.add(partitionLocation); pathToKey.put(partitionLocation.toURI().toString(), key); } Map<String, String> inputFormatConfiguration = files.getInputFormatConfiguration(inputLocations); inputFormatConfiguration.put(PATH_TO_PARTITIONING_MAPPING, GSON.toJson(pathToKey)); return inputFormatConfiguration; }
Location input = fileSet.getInputLocations().iterator().next(); InputStream in = input.getInputStream(); in.close(); fileSet.getOutputLocation(); Assert.fail("Extrernal file set should not allow writing output."); } catch (UnsupportedOperationException e) {
@Test public void testAbsolutePath() throws IOException, DatasetManagementException { String absolutePath = tmpFolder.newFolder() + "/absolute/path"; dsFrameworkUtil.createInstance("fileSet", testFileSetInstance3, FileSetProperties.builder() .setBasePath(absolutePath).build()); // validate that the base path for the file set was created Assert.assertTrue(new File(absolutePath).isDirectory()); // instantiate the file set with an output path Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setOutputPath(fileArgs, "out"); FileSet fileSet = dsFrameworkUtil.getInstance(testFileSetInstance3, fileArgs); // write to the output path Assert.assertEquals(absolutePath + "/out", fileSet.getOutputLocation().toURI().getPath()); try (OutputStream out = fileSet.getOutputLocation().getOutputStream()) { out.write(42); } // validate that the file was created Assert.assertTrue(new File(absolutePath + "/out").isFile()); }
@Override public Map<String, String> getOutputFormatConfiguration() { checkNotExternal(); // copy the output properties of the embedded file set to the output arguments Map<String, String> outputArgs = new HashMap<>(files.getOutputFormatConfiguration()); // we set the file set's output path in the definition's getDataset(), so there is no need to configure it again. // here we just want to validate that an output partition key or dynamic partitioner was specified in the arguments. PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning()); if (outputKey == null) { String dynamicPartitionerClassName = PartitionedFileSetArguments.getDynamicPartitioner(runtimeArguments); if (dynamicPartitionerClassName == null) { throw new DataSetException( "Either a Partition key or a DynamicPartitioner class must be given as a runtime argument."); } copyDynamicPartitionerArguments(runtimeArguments, outputArgs); outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, files.getOutputFormatClassName()); outputArgs.put(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET, getName()); } else { assertNotExists(outputKey, true); } return ImmutableMap.copyOf(outputArgs); }
@Override public String getOutputFormatClassName() { checkNotExternal(); PartitionKey outputKey = PartitionedFileSetArguments.getOutputPartitionKey(runtimeArguments, getPartitioning()); if (outputKey == null) { return "co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat"; } return files.getOutputFormatClassName(); }
private Location getQuarantineLocation() throws IOException { // each transaction must not share its quarantine directory with another transaction return files.getBaseLocation().append(QUARANTINE_DIR + "." + tx.getTransactionId()); }
@Override public Location getLocation() { return partitionedFileSetDataset.getEmbeddedFileSet().getLocation(relativePath); }
long sum = 0L, count = 1; long inputRecords = 0; for (Location inputLocation : input.getInputLocations()) { final PrintWriter writer = new PrintWriter(inputLocation.getOutputStream()); for (long value : values) { Location resultLocation = results.getOutputLocation(); if (resultLocation.isDirectory()) { for (Location child : resultLocation.list()) {