/** * Removes the list of partitions that have failed processing the configured number of times from the working set and * returns them. */ protected List<PartitionDetail> removeDiscardedPartitions(ConsumerWorkingSet workingSet) { List<PartitionDetail> failedPartitions = new ArrayList<>(); Iterator<ConsumablePartition> iter = workingSet.getPartitions().iterator(); while (iter.hasNext()) { ConsumablePartition partition = iter.next(); if (partition.getProcessState() == ProcessState.DISCARDED) { failedPartitions.add(getPartitionedFileSet().getPartition(partition.getPartitionKey())); iter.remove(); } } return failedPartitions; }
@Override public void apply() throws Exception { Assert.assertTrue(pfsBaseLocation.exists()); // attempt to write a new partition - should fail try { pfs.getPartitionOutput(PARTITION_KEY); Assert.fail("External partitioned file set should not allow writing files"); } catch (UnsupportedOperationException e) { // expected } // create an external file and add it as a partition File someFile = new File(absolutePath, "some.file"); OutputStream out = new FileOutputStream(someFile); out.close(); Assert.assertTrue(someFile.exists()); pfs.addPartition(PARTITION_KEY, "some.file"); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists()); // now drop the partition and validate the file is still there pfs.dropPartition(PARTITION_KEY); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(someFile.exists()); } });
private PartitionDetail getLatestPartition() throws IOException { Long latestTime = getLatestSnapshot(); if (latestTime == null) { return null; } PartitionKey partitionKey = PartitionKey.builder().addLongField(SNAPSHOT_FIELD, latestTime).build(); PartitionDetail partitionDetail = files.getPartition(partitionKey); if (partitionDetail == null) { throw new IllegalStateException(String.format("No snapshot files found for latest recorded snapshot from '%d'. " + "This can happen if files are deleted manually without updating the state file. " + "Please fix the state file to contain the latest snapshot, or delete the file and write another snapshot.", latestTime)); } return partitionDetail; }
@Override public void apply() throws Exception { PartitionOutput partitionOutput = dataset.getPartitionOutput(PARTITION_KEY); long beforeTime = System.currentTimeMillis(); partitionOutput.addPartition(); long afterTime = System.currentTimeMillis(); PartitionDetail partitionDetail = dataset.getPartition(PARTITION_KEY); Assert.assertNotNull(partitionDetail); long creationTime = partitionDetail.getMetadata().getCreationTime(); long lastModificationTime = partitionDetail.getMetadata().lastModificationTime(); // lastModificationTime time should be equal to creationTime for a partition that has not been appended to Assert.assertEquals(creationTime, lastModificationTime); Assert.assertTrue(creationTime >= beforeTime && creationTime <= afterTime); } });
TaskAttemptContext getKeySpecificContext(PartitionKey partitionKey) throws IOException { if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE) { if (outputDataset.getPartition(partitionKey) != null) { // TODO: throw PartitionAlreadyExists exception? (include dataset name also?) throw new DataSetException("Partition already exists: " + partitionKey); } } String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning); String finalPath = relativePath + "/" + outputName; return getTaskAttemptContext(job, finalPath); }
@Override public void apply() throws Exception { PartitionKey partitionKey = PartitionKey.builder() .addIntField("i", 42) .addLongField("l", 17L) .addStringField("s", "x") .build(); ImmutableMap<String, String> metadata = ImmutableMap.of("key1", "value", "key2", "value2", "key3", "value2"); PartitionOutput partitionOutput = dataset.getPartitionOutput(partitionKey); partitionOutput.setMetadata(metadata); partitionOutput.addPartition(); PartitionDetail partitionDetail = dataset.getPartition(partitionKey); Assert.assertNotNull(partitionDetail); Assert.assertEquals(metadata, partitionDetail.getMetadata().asMap()); } });
TaskAttemptContext getKeySpecificContext(PartitionKey partitionKey) throws IOException { if (partitionWriteOption == DynamicPartitioner.PartitionWriteOption.CREATE) { if (outputDataset.getPartition(partitionKey) != null) { // TODO: throw PartitionAlreadyExists exception? (include dataset name also?) throw new DataSetException("Partition already exists: " + partitionKey); } } String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning); String finalPath = relativePath + "/" + outputName; return getTaskAttemptContext(job, finalPath); }
@Override public void apply() { Partition partition = dataset.getPartition(keyX); Assert.assertNotNull(partition); String path = partition.getRelativePath(); Assert.assertTrue(path.contains("x")); Assert.assertTrue(path.contains("150000")); } });
public void put(String key, String value) throws Exception { table.put(Bytes.toBytes(key), COL, Bytes.toBytes(value)); files.getPartition(PartitionKey.builder().addField(key, value).build()); }
@Override public void apply() { Partition partition = dataset.getPartition(keyY); Assert.assertNotNull(partition); String path = partition.getRelativePath(); Assert.assertNotNull(path); Assert.assertTrue(path.contains("y")); Assert.assertTrue(path.contains("200000")); } });
@Override public void apply() throws Exception { PartitionDetail partitionDetail = dataset.getPartition(key); Assert.assertNotNull(partitionDetail); Assert.assertEquals(path, partitionDetail.getRelativePath()); } });
@Override public void apply() throws Exception { Location outputLocation = createPartition(pfs, PARTITION_KEY, "file"); outputLocationRef.set(outputLocation); Assert.assertTrue(outputLocation.exists()); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists()); pfs.dropPartition(PARTITION_KEY); Assert.assertFalse(outputLocation.exists()); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); pfs.dropPartition(PARTITION_KEY); } });
@DELETE @Path("partitions/{partition}/subpartitions/{sub-partition}") public void drop(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("partition") String partition, @PathParam("sub-partition") int subPartition) { PartitionKey key = PartitionKey.builder() .addStringField("partition", partition) .addIntField("sub-partition", subPartition) .build(); if (pfs.getPartition(key) == null) { responder.sendString(404, "Partition not found.", Charsets.UTF_8); return; } pfs.dropPartition(key); responder.sendString(200, "Successfully dropped partition", Charsets.UTF_8); } }
@GET @Path("partitions/{partition}/subpartitions/{sub-partition}") public void read(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("partition") String partition, @PathParam("sub-partition") int subPartition) { PartitionDetail partitionDetail = pfs.getPartition(PartitionKey.builder() .addStringField("partition", partition) .addIntField("sub-partition", subPartition) .build()); if (partitionDetail == null) { responder.sendString(404, "Partition not found.", Charsets.UTF_8); return; } try { responder.send(200, partitionDetail.getLocation().append("file"), "text/plain"); } catch (IOException e) { responder.sendError(400, String.format("Unable to read path '%s'", partitionDetail.getRelativePath())); } }
@Test public void testRollbackOnTransactionAbort() throws Exception { PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance); TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs); txContext.start(); Location outputLocation = createPartition(pfs, PARTITION_KEY, "file");; Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists()); txContext.abort(); // because the previous transaction aborted, the partition as well as the file will not exist txContext.start(); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertFalse(outputLocation.exists()); txContext.finish(); }
@Test public void testRollbackOfPartitionCreateThenDelete() throws Exception { PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance); TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs); txContext.start(); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Location outputLocation = createPartition(pfs, PARTITION_KEY, "file"); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); pfs.dropPartition(PARTITION_KEY); txContext.abort(); // the file shouldn't exist because the transaction was aborted (AND because it was dropped at the end of the tx) Assert.assertFalse(outputLocation.exists()); }
Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Location file1Location = createPartition(pfs, PARTITION_KEY, "file1"); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); txContext.finish(); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertFalse(pfs.getPartition(PARTITION_KEY).getLocation().append("file2").exists()); txContext.finish();
Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(pfs.getPartition(PARTITION_KEY).getLocation().exists()); txContext.finish(); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertFalse(outputLocation.exists()); Assert.assertNotNull(pfs.getPartition(PARTITION_KEY)); Assert.assertTrue(outputLocation.exists()); try (InputStream inputStream = outputLocation.getInputStream()) {
@Override public void run() { try { PartitionDetail detail1 = pfs.getPartition(KEY_1); Assert.assertNotNull(detail1); Assert.assertEquals(location1, detail1.getLocation()); Assert.assertTrue(location1.exists()); Assert.assertTrue(location1.append("file").exists()); PartitionDetail detail2 = pfs.getPartition(KEY_2); Assert.assertNotNull(detail2); Assert.assertEquals(location2, detail2.getLocation()); Assert.assertTrue(location2.exists()); Assert.assertTrue(location2.append("file").exists()); PartitionDetail detail3 = pfs.getPartition(KEY_4); Assert.assertNull(detail3); Assert.assertTrue(location3.exists()); Assert.assertTrue(location3.append("file").exists()); } catch (Exception e) { throw Throwables.propagate(e); } } });
@Test public void testRollbackOnJobFailure() throws Exception { // tests the logic of #onFailure method Map<String, String> args = new HashMap<>(); FileSetArguments.setOutputPath(args, "custom/output/path"); PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY); PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args); TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs); txContext.start(); Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation(); Assert.assertFalse(outputLocation.exists()); outputLocation.mkdirs(); Assert.assertTrue(outputLocation.exists()); ((PartitionedFileSetDataset) pfs).onFailure(); txContext.abort(); // because the previous transaction aborted, the partition as well as the directory for it will not exist txContext.start(); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertFalse(outputLocation.exists()); txContext.finish(); }