/** * Removes the list of partitions that have failed processing the configured number of times from the working set and * returns them. */ protected List<PartitionDetail> removeDiscardedPartitions(ConsumerWorkingSet workingSet) { List<PartitionDetail> failedPartitions = new ArrayList<>(); Iterator<ConsumablePartition> iter = workingSet.getPartitions().iterator(); while (iter.hasNext()) { ConsumablePartition partition = iter.next(); if (partition.getProcessState() == ProcessState.DISCARDED) { failedPartitions.add(getPartitionedFileSet().getPartition(partition.getPartitionKey())); iter.remove(); } } return failedPartitions; }
private Long getLatestSnapshot() throws IOException { Location stateFile = files.getEmbeddedFileSet().getBaseLocation().append(STATE_FILE_NAME); if (!stateFile.exists()) { return null; } try (InputStreamReader reader = new InputStreamReader(stateFile.getInputStream(), Charsets.UTF_8)) { String val = CharStreams.toString(reader); return Long.valueOf(val); } }
@Override public void apply() throws Exception { Location outputLocation = createPartition(pfs, PARTITION_KEY, "file"); outputLocationRef.set(outputLocation); Assert.assertTrue(outputLocation.exists()); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists()); pfs.dropPartition(PARTITION_KEY); Assert.assertFalse(outputLocation.exists()); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); pfs.dropPartition(PARTITION_KEY); } });
public void deleteMatchingPartitionsByTime(long upperLimit) throws IOException { if (upperLimit > 0 && upperLimit < Long.MAX_VALUE) { PartitionFilter filter = PartitionFilter.builder().addRangeCondition(SNAPSHOT_FIELD, null, upperLimit).build(); Set<PartitionDetail> partitions = files.getPartitions(filter); for (PartitionDetail partition : partitions) { files.dropPartition(partition.getPartitionKey()); } } }
@Override public void apply() throws Exception { // drop all existing partitions (2 of which are not consumed) for (PartitionDetail partitionDetail : dataset.getPartitions(PartitionFilter.ALWAYS_MATCH)) { dataset.dropPartition(partitionDetail.getPartitionKey()); } // add 5 new ones for (PartitionKey partitionKey : partitionKeys2) { dataset.getPartitionOutput(partitionKey).addPartition(); } } });
@Override public void apply() throws Exception { Assert.assertTrue(pfsBaseLocation.exists()); // attempt to write a new partition - should fail try { pfs.getPartitionOutput(PARTITION_KEY); Assert.fail("External partitioned file set should not allow writing files"); } catch (UnsupportedOperationException e) { // expected } // create an external file and add it as a partition File someFile = new File(absolutePath, "some.file"); OutputStream out = new FileOutputStream(someFile); out.close(); Assert.assertTrue(someFile.exists()); pfs.addPartition(PARTITION_KEY, "some.file"); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists()); // now drop the partition and validate the file is still there pfs.dropPartition(PARTITION_KEY); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(someFile.exists()); } });
@Override public void apply() throws Exception { for (PartitionKey partitionKey : partitionKeys1) { dataset.getPartitionOutput(partitionKey).addPartition(); } } });
@Override public void apply() throws Exception { PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY); long beforeTime = System.currentTimeMillis(); partitionOutput.addPartition(); long afterTime = System.currentTimeMillis(); PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY); Assert.assertNotNull(partitionDetail); long creationTime = partitionDetail.getMetadata().getCreationTime(); long lastModificationTime = partitionDetail.getMetadata().lastModificationTime(); // lastModificationTime time should be equal to creationTime for a partition that has not been appended to Assert.assertEquals(creationTime, lastModificationTime); Assert.assertTrue(creationTime >= beforeTime && creationTime <= afterTime); } });
@Override public void apply() throws Exception { PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY); ImmutableMap<String, String> originalEntries = ImmutableMap.of("key1", "value1", "key2", "value2"); partitionOutput.setMetadata(originalEntries); dataset.addMetadata(PARTITION_KEY, updatedMetadata); PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY); Assert.assertNotNull(partitionDetail); dataset.setMetadata(PARTITION_KEY, Collections.singletonMap("key3", "value4")); partitionDetail = dataset.getPartition(PARTITION_KEY); Assert.assertNotNull(partitionDetail); Assert.assertEquals(ImmutableMap.of("key1", "value1", "key2", "value2", "key3", "value4"), dataset.addMetadata(PARTITION_KEY, "key2", "value3"); Assert.fail("Expected not to be able to update an existing metadata entry"); } catch (DataSetException expected) { dataset.removeMetadata(PARTITION_KEY, ImmutableSet.of("key2", "key3", "key4")); partitionDetail = dataset.getPartition(PARTITION_KEY); Assert.assertNotNull(partitionDetail); Assert.assertEquals(ImmutableMap.of("key1", "value1"), partitionDetail.getMetadata().asMap()); .addStringField("s", "nonexistent") .build(); dataset.addMetadata(nonexistentPartitionKey, "key2", "value3");
DataSetManager<PartitionedFileSet> pfsManager = getDataset("pfs"); PartitionedFileSet pfs = pfsManager.get(); PartitionOutput partitionOutput = pfs.getPartitionOutput(PartitionKey.builder().addStringField("x", "nn").build()); Location location = partitionOutput.getLocation(); prepareFileInput(location); PartitionDetail partition = pfs.getPartition(outputKey); Assert.assertNotNull(partition); validateFileOutput(partition.getLocation()); pfs.dropPartition(partitionOutput.getPartitionKey()); pfs.dropPartition(partition.getPartitionKey());
@Test public void testRollbackOnJobFailure() throws Exception { // tests the logic of #onFailure method Map<String, String> args = new HashMap<>(); FileSetArguments.setOutputPath(args, "custom/output/path"); PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY); PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args); TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs); txContext.start(); Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation(); Assert.assertFalse(outputLocation.exists()); outputLocation.mkdirs(); Assert.assertTrue(outputLocation.exists()); ((PartitionedFileSetDataset) pfs).onFailure(); txContext.abort(); // because the previous transaction aborted, the partition as well as the directory for it will not exist txContext.start(); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertFalse(outputLocation.exists()); txContext.finish(); }
final PartitionOutput output1 = pfs.getPartitionOutput(KEY_1); location1 = output1.getLocation(); try (Writer writer = new OutputStreamWriter(location1.append("file").getOutputStream())) { location2 = pfs.getEmbeddedFileSet().getLocation(path2); try (Writer writer = new OutputStreamWriter(location2.append("file").getOutputStream())) { writer.write("2,2\n"); pfs.addPartition(KEY_2, path2); final PartitionOutput output3 = pfs.getPartitionOutput(KEY_3); location3 = output3.getLocation(); String basePath = pfs.getEmbeddedFileSet().getBaseLocation().toURI().getPath(); String absPath3 = location3.toURI().getPath(); Assert.assertTrue(absPath3.startsWith(basePath));
@Override public void apply() throws Exception { try { pfs.getPartitionOutput( PartitionKey.builder().addField("i", 1).addField("l", 2L).build()); Assert.fail("should have thrown exception due to missing field"); pfs.addPartition( PartitionKey.builder().addField("i", 1).addField("l", "2").addField("s", "a").build(), "some/location"); pfs.addPartition( PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(), "some/location", ImmutableMap.of("a", "b")); pfs.addPartition( PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").build(), "some/location", ImmutableMap.of("a", "b")); try { pfs.addMetadata( PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", "a").addField("x", "x").build(), ImmutableMap.of("abc", "xyz")); pfs.dropPartition(PartitionKey.builder().addField("i", 1).addField("l", 2L).addField("s", 0).build()); Assert.fail("should have thrown exception due to incompatible field"); } catch (IllegalArgumentException e) {
Partitioning partitioning = null; if (dataset instanceof PartitionedFileSet) { partitioning = ((PartitionedFileSet) dataset).getPartitioning(); baseLocation = ((PartitionedFileSet) dataset).getEmbeddedFileSet().getBaseLocation(); } else { baseLocation = ((FileSet) dataset).getBaseLocation();
Location loc = pfs.get().getEmbeddedFileSet().getLocation("some/path"); OutputStream os = loc.append("part1").getOutputStream(); try (Writer writer = new OutputStreamWriter(os)) { writer.write("1,x\n"); pfs.get().addPartition(PartitionKey.builder().addStringField("x", "1").build(), "some/path"); pfs.flush();
@Override public void apply() throws Exception { // this should succeed without error (but log a warning) Assert.assertEquals(Collections.EMPTY_SET, pfs.getPartitions(PartitionFilter.builder().addValueCondition("me-not-there", 42).build())); } });
@Override public void apply() throws Exception { dataset.dropPartition(partitionKey1); } });
PartitionOutput partitionOutput = cleanRecords.getPartitionOutput(outputPartition); Location partitionLocation = partitionOutput.getLocation(); int numInputFiles = 100; cleanRecords.concatenatePartition(outputPartition).get();
DynamicPartitionerWriterWrapper(TaskAttemptContext job) { this.job = job; Configuration configuration = job.getConfiguration(); Class<? extends DynamicPartitioner> partitionerClass = configuration .getClass(PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class); this.dynamicPartitioner = new InstantiatorFactory(false).get(TypeToken.of(partitionerClass)).create(); this.partitionWriteOption = DynamicPartitioner.PartitionWriteOption.valueOf( configuration.get(PartitionedFileSetArguments.DYNAMIC_PARTITIONER_WRITE_OPTION)); MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration); this.taskContext = classLoader.getTaskContextProvider().get(job); // name the output file 'part-<RunId>-m-00000' instead of 'part-m-00000' String outputName = DynamicPartitioningOutputFormat.getOutputName(job); if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE_OR_APPEND) { outputName = outputName + "-" + taskContext.getProgramRunId().getRun(); } this.outputName = outputName; String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET); this.outputDataset = taskContext.getDataset(outputDatasetName); this.partitioning = outputDataset.getPartitioning(); this.dynamicPartitioner.initialize(taskContext); this.fileOutputFormatName = job.getConfiguration() .getClass(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_FORMAT_CLASS_NAME, null, FileOutputFormat.class) .getName(); }
@Override public void run() { pfs.addPartition(KEY_4, path3); } });