congrats Icon
New! Announcing Tabnine Chat Beta
Learn More
Tabnine Logo
PartitionedFileSet
Code IndexAdd Tabnine to your IDE (free)

How to use
PartitionedFileSet
in
co.cask.cdap.api.dataset.lib

Best Java code snippets using co.cask.cdap.api.dataset.lib.PartitionedFileSet (Showing top 20 results out of 315)

origin: cdapio/cdap

/**
 * Removes the list of partitions that have failed processing the configured number of times from the working set and
 * returns them.
 */
protected List<PartitionDetail> removeDiscardedPartitions(ConsumerWorkingSet workingSet) {
 List<PartitionDetail> failedPartitions = new ArrayList<>();
 Iterator<ConsumablePartition> iter = workingSet.getPartitions().iterator();
 while (iter.hasNext()) {
  ConsumablePartition partition = iter.next();
  if (partition.getProcessState() == ProcessState.DISCARDED) {
   failedPartitions.add(getPartitionedFileSet().getPartition(partition.getPartitionKey()));
   iter.remove();
  }
 }
 return failedPartitions;
}
origin: co.cask.hydrator/core-plugins

private Long getLatestSnapshot() throws IOException {
 Location stateFile = files.getEmbeddedFileSet().getBaseLocation().append(STATE_FILE_NAME);
 if (!stateFile.exists()) {
  return null;
 }
 try (InputStreamReader reader = new InputStreamReader(stateFile.getInputStream(), Charsets.UTF_8)) {
  String val = CharStreams.toString(reader);
  return Long.valueOf(val);
 }
}
origin: caskdata/cdap

 @Override
 public void apply() throws Exception {
  Location outputLocation = createPartition(pfs, PARTITION_KEY, "file");
  outputLocationRef.set(outputLocation);
  Assert.assertTrue(outputLocation.exists());
  Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
  Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
  pfs.dropPartition(PARTITION_KEY);
  Assert.assertFalse(outputLocation.exists());
  Assert.assertNull(pfs.getPartition(PARTITION_KEY));
  pfs.dropPartition(PARTITION_KEY);
 }
});
origin: co.cask.hydrator/core-plugins

public void deleteMatchingPartitionsByTime(long upperLimit) throws IOException {
 if (upperLimit > 0 && upperLimit < Long.MAX_VALUE) {
  PartitionFilter filter = PartitionFilter.builder().addRangeCondition(SNAPSHOT_FIELD, null, upperLimit).build();
  Set<PartitionDetail> partitions = files.getPartitions(filter);
  for (PartitionDetail partition : partitions) {
   files.dropPartition(partition.getPartitionKey());
  }
 }
}
origin: caskdata/cdap

 @Override
 public void apply() throws Exception {
  // drop all existing partitions (2 of which are not consumed)
  for (PartitionDetail partitionDetail : dataset.getPartitions(PartitionFilter.ALWAYS_MATCH)) {
   dataset.dropPartition(partitionDetail.getPartitionKey());
  }
  // add 5 new ones
  for (PartitionKey partitionKey : partitionKeys2) {
   dataset.getPartitionOutput(partitionKey).addPartition();
  }
 }
});
origin: caskdata/cdap

 @Override
 public void apply() throws Exception {
  Assert.assertTrue(pfsBaseLocation.exists());
  // attempt to write a new partition - should fail
  try {
   pfs.getPartitionOutput(PARTITION_KEY);
   Assert.fail("External partitioned file set should not allow writing files");
  } catch (UnsupportedOperationException e) {
   // expected
  }
  // create an external file and add it as a partition
  File someFile = new File(absolutePath, "some.file");
  OutputStream out = new FileOutputStream(someFile);
  out.close();
  Assert.assertTrue(someFile.exists());
  pfs.addPartition(PARTITION_KEY, "some.file");
  Assert.assertNotNull(pfs.getPartition(PARTITION_KEY));
  Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists());
  // now drop the partition and validate the file is still there
  pfs.dropPartition(PARTITION_KEY);
  Assert.assertNull(pfs.getPartition(PARTITION_KEY));
  Assert.assertTrue(someFile.exists());
 }
});
origin: caskdata/cdap

 @Override
 public void apply() throws Exception {
  for (PartitionKey partitionKey : partitionKeys1) {
   dataset.getPartitionOutput(partitionKey).addPartition();
  }
 }
});
origin: caskdata/cdap

 @Override
 public void apply() throws Exception {
  PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY);
  long beforeTime = System.currentTimeMillis();
  partitionOutput.addPartition();
  long afterTime = System.currentTimeMillis();
  PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY);
  Assert.assertNotNull(partitionDetail);
  long creationTime = partitionDetail.getMetadata().getCreationTime();
  long lastModificationTime = partitionDetail.getMetadata().lastModificationTime();
  // lastModificationTime time should be equal to creationTime for a partition that has not been appended to
  Assert.assertEquals(creationTime, lastModificationTime);
  Assert.assertTrue(creationTime >= beforeTime && creationTime <= afterTime);
 }
});
origin: caskdata/cdap

@Override
public void apply() throws Exception {
 PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY);
 ImmutableMap<String, String> originalEntries = ImmutableMap.of("key1", "value1", "key2", "value2");
 partitionOutput.setMetadata(originalEntries);
 dataset.addMetadata(PARTITION_KEY, updatedMetadata);
 PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY);
 Assert.assertNotNull(partitionDetail);
 dataset.setMetadata(PARTITION_KEY, Collections.singletonMap("key3", "value4"));
 partitionDetail = dataset.getPartition(PARTITION_KEY);
 Assert.assertNotNull(partitionDetail);
 Assert.assertEquals(ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value4"),
  dataset.addMetadata(PARTITION_KEY, "key2", "value3");
  Assert.fail("Expected not to be able to update an existing metadata entry");
 } catch (DataSetException expected) {
 dataset.removeMetadata(PARTITION_KEY, ImmutableSet.of("key2", "key3", "key4"));
 partitionDetail = dataset.getPartition(PARTITION_KEY);
 Assert.assertNotNull(partitionDetail);
 Assert.assertEquals(ImmutableMap.of("key1", "value1"), partitionDetail.getMetadata().asMap());
   .addStringField("s", "nonexistent")
   .build();
  dataset.addMetadata(nonexistentPartitionKey, "key2", "value3");
origin: cdapio/cdap

DataSetManager<PartitionedFileSet> pfsManager = getDataset("pfs");
PartitionedFileSet pfs = pfsManager.get();
PartitionOutput partitionOutput = pfs.getPartitionOutput(PartitionKey.builder().addStringField("x", "nn").build());
Location location = partitionOutput.getLocation();
prepareFileInput(location);
PartitionDetail partition = pfs.getPartition(outputKey);
Assert.assertNotNull(partition);
validateFileOutput(partition.getLocation());
pfs.dropPartition(partitionOutput.getPartitionKey());
pfs.dropPartition(partition.getPartitionKey());
origin: caskdata/cdap

@Test
public void testRollbackOnJobFailure() throws Exception {
 // tests the logic of #onFailure method
 Map<String, String> args = new HashMap<>();
 FileSetArguments.setOutputPath(args, "custom/output/path");
 PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY);
 PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args);
 TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs);
 txContext.start();
 Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation();
 Assert.assertFalse(outputLocation.exists());
 outputLocation.mkdirs();
 Assert.assertTrue(outputLocation.exists());
 ((PartitionedFileSetDataset) pfs).onFailure();
 txContext.abort();
 // because the previous transaction aborted, the partition as well as the directory for it will not exist
 txContext.start();
 Assert.assertNull(pfs.getPartition(PARTITION_KEY));
 Assert.assertFalse(outputLocation.exists());
 txContext.finish();
}
origin: cdapio/cdap

final PartitionOutput output1 = pfs.getPartitionOutput(KEY_1);
location1 = output1.getLocation();
try (Writer writer = new OutputStreamWriter(location1.append("file").getOutputStream())) {
location2 = pfs.getEmbeddedFileSet().getLocation(path2);
try (Writer writer = new OutputStreamWriter(location2.append("file").getOutputStream())) {
 writer.write("2,2\n");
pfs.addPartition(KEY_2, path2);
final PartitionOutput output3 = pfs.getPartitionOutput(KEY_3);
location3 = output3.getLocation();
String basePath = pfs.getEmbeddedFileSet().getBaseLocation().toURI().getPath();
String absPath3 = location3.toURI().getPath();
Assert.assertTrue(absPath3.startsWith(basePath));
origin: caskdata/cdap

@Override
public void apply() throws Exception {
 try {
  pfs.getPartitionOutput(
   PartitionKey.builder().addField("i", 1).addField("l", 2L).build());
  Assert.fail("should have thrown exception due to missing field");
  pfs.addPartition(
   PartitionKey.builder().addField("i", 1).addField("l", "2").addField("s", "a").build(),
   "some/location");
  pfs.addPartition(
   PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(),
   "some/location", ImmutableMap.of("a", "b"));
 pfs.addPartition(
  PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").build(),
  "some/location", ImmutableMap.of("a", "b"));
 try {
  pfs.addMetadata(
   PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(),
   ImmutableMap.of("abc", "xyz"));
  pfs.dropPartition(PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", 0).build());
  Assert.fail("should have thrown exception due to incompatible field");
 } catch (IllegalArgumentException e) {
origin: co.cask.cdap/cdap-explore

Partitioning partitioning = null;
if (dataset instanceof PartitionedFileSet) {
 partitioning = ((PartitionedFileSet) dataset).getPartitioning();
 baseLocation = ((PartitionedFileSet) dataset).getEmbeddedFileSet().getBaseLocation();
} else {
 baseLocation = ((FileSet) dataset).getBaseLocation();
origin: cdapio/cdap

Location loc = pfs.get().getEmbeddedFileSet().getLocation("some/path");
OutputStream os = loc.append("part1").getOutputStream();
try (Writer writer = new OutputStreamWriter(os)) {
 writer.write("1,x\n");
pfs.get().addPartition(PartitionKey.builder().addStringField("x", "1").build(), "some/path");
pfs.flush();
origin: caskdata/cdap

 @Override
 public void apply() throws Exception {
  // this should succeed without error (but log a warning)
  Assert.assertEquals(Collections.EMPTY_SET,
            pfs.getPartitions(PartitionFilter.builder().addValueCondition("me-not-there", 42).build()));
 }
});
origin: caskdata/cdap

 @Override
 public void apply() throws Exception {
  dataset.dropPartition(partitionKey1);
 }
});
origin: cdapio/cdap

PartitionOutput partitionOutput = cleanRecords.getPartitionOutput(outputPartition);
Location partitionLocation = partitionOutput.getLocation();
int numInputFiles = 100;
cleanRecords.concatenatePartition(outputPartition).get();
origin: cdapio/cdap

DynamicPartitionerWriterWrapper(TaskAttemptContext job) {
 this.job = job;
 Configuration configuration = job.getConfiguration();
 Class<? extends DynamicPartitioner> partitionerClass = configuration
  .getClass(PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class);
 this.dynamicPartitioner = new InstantiatorFactory(false).get(TypeToken.of(partitionerClass)).create();
 this.partitionWriteOption =
  DynamicPartitioner.PartitionWriteOption.valueOf(
   configuration.get(PartitionedFileSetArguments.DYNAMIC_PARTITIONER_WRITE_OPTION));
 MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
 this.taskContext = classLoader.getTaskContextProvider().get(job);
 // name the output file 'part-<RunId>-m-00000' instead of 'part-m-00000'
 String outputName = DynamicPartitioningOutputFormat.getOutputName(job);
 if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND) {
  outputName = outputName + "-" + taskContext.getProgramRunId().getRun();
 }
 this.outputName = outputName;
 String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
 this.outputDataset = taskContext.getDataset(outputDatasetName);
 this.partitioning = outputDataset.getPartitioning();
 this.dynamicPartitioner.initialize(taskContext);
 this.fileOutputFormatName = job.getConfiguration()
  .getClass(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, null, FileOutputFormat.class)
  .getName();
}
origin: cdapio/cdap

 @Override
 public void run() {
  pfs.addPartition(KEY_4, path3);
 }
});
co.cask.cdap.api.dataset.libPartitionedFileSet

Javadoc

Represents a dataset that is split into partitions that can be uniquely addressed by partition keys along multiple dimensions. Each partition is a path in a file set, the partition key attached as meta data. Note that the partitioning of the dataset is fixed, that is, all operations that accept a partition key as a parameter require that that key has exactly the same schema as the partitioning. This dataset can be made available for querying with SQL (explore). This is enabled through dataset properties when the dataset is created. See FileSetPropertiesfor details. If it is enabled for explore, a Hive external table will be created when the dataset is created. The Hive table is partitioned by the same keys as this dataset.

Most used methods

  • getPartition
  • getEmbeddedFileSet
  • dropPartition
  • getPartitionOutput
    Return a partition output for a specific partition key, in preparation for creating a new partition.
  • getPartitions
  • addPartition
    Add a partition for a given partition key, stored at a given path (relative to the file set's base p
  • consumePartitions
  • getPartitioning
  • addMetadata
    Adds a set of new metadata entries for a particular partition. Note that existing entries cannot be
  • concatenatePartition
    Asynchronous operation to concatenate the partition in Hive. Note that Hive only supports certain fo
  • removeMetadata
    Removes a set of metadata entries for a particular partition. If any metadata key does not exist, no
  • setMetadata
    Sets metadata entries for a particular partition. If the metadata entry key does not already exist,
  • removeMetadata,
  • setMetadata

Popular in Java

  • Reading from database using SQL prepared statement
  • startActivity (Activity)
  • orElseThrow (Optional)
    Return the contained value, if present, otherwise throw an exception to be created by the provided s
  • notifyDataSetChanged (ArrayAdapter)
  • System (java.lang)
    Provides access to system-related information and resources including standard input and output. Ena
  • DecimalFormat (java.text)
    A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
  • MessageFormat (java.text)
    Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
  • Notification (javax.management)
  • JComboBox (javax.swing)
  • BasicDataSource (org.apache.commons.dbcp)
    Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
  • Github Copilot alternatives
Tabnine Logo
  • Products

    Search for Java codeSearch for JavaScript code
  • IDE Plugins

    IntelliJ IDEAWebStormVisual StudioAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimGoLandRubyMineEmacsJupyter NotebookJupyter LabRiderDataGripAppCode
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogTabnine AcademyTerms of usePrivacy policyJava Code IndexJavascript Code Index
Get Tabnine for your IDE now