/** * Get the values associated with the specified keys. * @param keys keys for which value to be determined * @param tableManager manager for the table * @return the key value map * @throws Exception */ public static Map<String, String> getValues(Set<String> keys, DataSetManager<KeyValueTable> tableManager) throws Exception { tableManager.flush(); KeyValueTable table = tableManager.get(); Map<String, String> values = new HashMap<>(); for (String key : keys) { values.put(key, Bytes.toString(table.read(key))); } return values; } }
/** * Read the value for the specified rowKey and columnKey. */ public static String readOutput(DataSetManager<Table> tableManager, String rowKey, String columnKey) throws Exception { Table table = tableManager.get(); return Bytes.toString(table.get(Bytes.toBytes(rowKey), Bytes.toBytes(columnKey))); } }
/** * Read the value for the specified rowKey and columnKey. */ public static String readOutput(DataSetManager<Table> tableManager, String rowKey, String columnKey) { Table table = tableManager.get(); return Bytes.toString(table.get(Bytes.toBytes(rowKey), Bytes.toBytes(columnKey))); } }
private static void writeInput(DataSetManager<Table> tableManager, @Nullable String rowKey, Iterable<StructuredRecord> records) throws Exception { tableManager.flush(); Table table = tableManager.get(); // write each record as a separate row, with the serialized record as one column and schema as another // each rowkey will be a UUID. for (StructuredRecord record : records) { byte[] row = rowKey == null ? Bytes.toBytes(UUID.randomUUID()) : Bytes.toBytes(rowKey); table.put(row, SCHEMA_COL, Bytes.toBytes(record.getSchema().toString())); table.put(row, RECORD_COL, Bytes.toBytes(StructuredRecordStringConverter.toJsonString(record))); } tableManager.flush(); }
private void createPartition(DataSetManager<TimePartitionedFileSet> tpfsManager, long time, int i) throws Exception { TimePartitionedFileSet tpfs = tpfsManager.get(); TimePartitionOutput output = tpfs.getPartitionOutput(time); try (PrintStream out = new PrintStream(output.getLocation().append("file").getOutputStream())) { out.println(String.format("%d,x%d", i, i)); } output.addPartition(); tpfsManager.flush(); } }
/** * Clear any records written to this sink. * * @param tableManager dataset manager used to get the sink dataset */ public static void clear(DataSetManager<Table> tableManager) { tableManager.flush(); Table table = tableManager.get(); try (Scanner scanner = table.scan(null, null)) { Row row; while ((row = scanner.next()) != null) { table.delete(row.getRow()); } } tableManager.flush(); }
private void addDummyData(NamespaceId namespaceId, String datasetName) throws Exception { DataSetManager<KeyValueTable> tableManager = getDataset(namespaceId.dataset(datasetName)); KeyValueTable inputTable = tableManager.get(); inputTable.write("hello", "world"); tableManager.flush(); }
@Test public void testClassicSpark() throws Exception { ApplicationManager appManager = deploy(TestSparkApp.class); for (Class<?> sparkClass : Arrays.asList(TestSparkApp.ClassicSpark.class, TestSparkApp.ScalaClassicSpark.class)) { final SparkManager sparkManager = appManager.getSparkManager(sparkClass.getSimpleName()); sparkManager.startAndWaitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES); } KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get(); Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ClassicSparkProgram.class.getName()))); Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ScalaClassicSparkProgram.class.getName()))); }
private void assertWorkerDatasetWrites(byte[] startRow, byte[] endRow, int expectedCount, int expectedTotalCount) throws Exception { DataSetManager<KeyValueTable> datasetManager = getDataset(testSpace.dataset(AppUsingGetServiceURL.WORKER_INSTANCES_DATASET)); KeyValueTable instancesTable = datasetManager.get(); try (CloseableIterator<KeyValue<byte[], byte[]>> instancesIterator = instancesTable.scan(startRow, endRow)) { List<KeyValue<byte[], byte[]>> workerInstances = Lists.newArrayList(instancesIterator); // Assert that the worker starts with expectedCount instances Assert.assertEquals(expectedCount, workerInstances.size()); // Assert that each instance of the worker knows the total number of instances for (KeyValue<byte[], byte[]> keyValue : workerInstances) { Assert.assertEquals(expectedTotalCount, Bytes.toInt(keyValue.getValue())); } } }
@Test public void testSparkWithService() throws Exception { ApplicationManager applicationManager = deployApplication(TestSparkServiceIntegrationApp.class); startService(applicationManager); SparkManager sparkManager = applicationManager.getSparkManager( TestSparkServiceIntegrationApp.SparkServiceProgram.class.getSimpleName()).start(); sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS); DataSetManager<KeyValueTable> datasetManager = getDataset("result"); KeyValueTable results = datasetManager.get(); for (int i = 1; i <= 5; i++) { byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8); Assert.assertEquals((i * i), Integer.parseInt(Bytes.toString(results.read(key)))); } }
private void addTimePartition(DataSetManager<TimePartitionedFileSet> tpfsManager, long inputTime) throws IOException, TransactionFailureException, InterruptedException { TimePartitionedFileSet tpfs = tpfsManager.get(); PartitionOutput partitionOutput = tpfs.getPartitionOutput(inputTime); Location location = partitionOutput.getLocation(); prepareFileInput(location); partitionOutput.addPartition(); tpfsManager.flush(); } }
private void verifyDummyData(NamespaceId namespaceId, String datasetName) throws Exception { DataSetManager<KeyValueTable> outTableManager = getDataset(namespaceId.dataset(datasetName)); KeyValueTable outputTable = outTableManager.get(); Assert.assertEquals("world", Bytes.toString(outputTable.read("hello"))); }
private void validateFiles(String dataset, Location expectedExisting) throws Exception { DataSetManager<PartitionedFileSet> pfs = getDataset(testSpace.dataset(dataset)); Location base = pfs.get().getEmbeddedFileSet().getBaseLocation(); validateFiles(base, expectedExisting); }
private void prepareInputData(DataSetManager<ObjectStore<String>> manager) { ObjectStore<String> keys = manager.get(); keys.write(Bytes.toBytes(TEST_STRING_1), TEST_STRING_1); keys.write(Bytes.toBytes(TEST_STRING_2), TEST_STRING_2); manager.flush(); }
private void assertDatasetIsEmpty(NamespaceId namespaceId, String datasetName) throws Exception { DataSetManager<KeyValueTable> outTableManager = getDataset(namespaceId.dataset(datasetName)); KeyValueTable outputTable = outTableManager.get(); try (CloseableIterator<KeyValue<byte[], byte[]>> scanner = outputTable.scan(null, null)) { Assert.assertFalse(scanner.hasNext()); } }
private void checkOutputData(DataSetManager<KeyValueTable> manager) { KeyValueTable count = manager.get(); //read output and verify result byte[] val = count.read(Bytes.toBytes(TEST_STRING_1)); Assert.assertTrue(val != null); Assert.assertEquals(Bytes.toInt(val), TEST_STRING_1.length()); val = count.read(Bytes.toBytes(TEST_STRING_2)); Assert.assertTrue(val != null); Assert.assertEquals(Bytes.toInt(val), TEST_STRING_2.length()); }
@Test(timeout = 60000L) public void testDatasetWithoutApp() throws Exception { // TODO: Although this has nothing to do with this testcase, deploying a dummy app to create the default namespace deployApplication(testSpace, DummyApp.class); deployDatasetModule(testSpace.datasetModule("my-kv"), AppsWithDataset.KeyValueTableDefinition.Module.class); addDatasetInstance("myKeyValueTable", testSpace.dataset("myTable"), DatasetProperties.EMPTY); DataSetManager<AppsWithDataset.KeyValueTableDefinition.KeyValueTable> dataSetManager = getDataset(testSpace.dataset("myTable")); AppsWithDataset.KeyValueTableDefinition.KeyValueTable kvTable = dataSetManager.get(); kvTable.put("test", "hello"); dataSetManager.flush(); Assert.assertEquals("hello", dataSetManager.get().get("test")); }
@Test public void testAppRedeployKeepsData() throws Exception { deployApplication(testSpace, AppWithTable.class); DataSetManager<Table> myTableManager = getDataset(testSpace.dataset("my_table")); myTableManager.get().put(new Put("key1", "column1", "value1")); myTableManager.flush(); // Changes should be visible to other instances of datasets DataSetManager<Table> myTableManager2 = getDataset(testSpace.dataset("my_table")); Assert.assertEquals("value1", myTableManager2.get().get(new Get("key1", "column1")).getString("column1")); // Even after redeploy of an app: changes should be visible to other instances of datasets deployApplication(AppWithTable.class); DataSetManager<Table> myTableManager3 = getDataset(testSpace.dataset("my_table")); Assert.assertEquals("value1", myTableManager3.get().get(new Get("key1", "column1")).getString("column1")); // Calling commit again (to test we can call it multiple times) myTableManager.get().put(new Put("key1", "column1", "value2")); myTableManager.flush(); Assert.assertEquals("value1", myTableManager3.get().get(new Get("key1", "column1")).getString("column1")); }
@Test public void testDynamicPartitioningWithFailure() throws Exception { // deploy app ApplicationManager appManager = deployApplication(testSpace, AppWithDynamicPartitioning.class); // setup inputs DataSetManager<KeyValueTable> dataSetManager = getDataset(testSpace.dataset("input")); KeyValueTable input = dataSetManager.get(); for (int i = 0; i < 3; i++) { input.write(String.valueOf(i), "" + ('a' + i)); } dataSetManager.flush(); // run MR with one pfs testDynamicPartitioningMRWithFailure(appManager, "pfs1", "pfs1"); // run MR with two pfs testDynamicPartitioningMRWithFailure(appManager, "pfs1", "pfs1", "pfs2"); // run MR with two pfs in reverse order (because we don't want to rely on which one gets committed first) testDynamicPartitioningMRWithFailure(appManager, "pfs2", "pfs1", "pfs2"); }
private void testAppConfig(String appName, ApplicationManager appManager, ConfigTestApp.ConfigClass conf) throws Exception { String datasetName = conf == null ? ConfigTestApp.DEFAULT_TABLE : conf.getTableName(); ServiceManager serviceManager = appManager.getServiceManager(ConfigTestApp.SERVICE_NAME).start(); URL serviceURL = serviceManager.getServiceURL(5, TimeUnit.SECONDS); // Write data to the table using the service URL url = new URL(serviceURL, "write/abcd"); Assert.assertEquals(200, HttpRequests.execute(HttpRequest.put(url).build()).getResponseCode()); url = new URL(serviceURL, "write/xyz"); Assert.assertEquals(200, HttpRequests.execute(HttpRequest.put(url).build()).getResponseCode()); DataSetManager<KeyValueTable> dsManager = getDataset(datasetName); KeyValueTable table = dsManager.get(); Assert.assertEquals("abcd", Bytes.toString(table.read(appName + ".abcd"))); Assert.assertEquals("xyz", Bytes.toString(table.read(appName + ".xyz"))); }