/** * Get the values associated with the specified keys. * @param keys keys for which value to be determined * @param tableManager manager for the table * @return the key value map * @throws Exception */ public static Map<String, String> getValues(Set<String> keys, DataSetManager<KeyValueTable> tableManager) throws Exception { tableManager.flush(); KeyValueTable table = tableManager.get(); Map<String, String> values = new HashMap<>(); for (String key : keys) { values.put(key, Bytes.toString(table.read(key))); } return values; } }
/** * Read the value for the specified rowKey and columnKey. */ public static String readOutput(DataSetManager<Table> tableManager, String rowKey, String columnKey) throws Exception { Table table = tableManager.get(); return Bytes.toString(table.get(Bytes.toBytes(rowKey), Bytes.toBytes(columnKey))); } }
@Override public Long call() throws Exception { tsTableManager.flush(); return getCounts("Message", tsTable); } }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
/** * Read the value for the specified rowKey and columnKey. */ public static String readOutput(DataSetManager<Table> tableManager, String rowKey, String columnKey) { Table table = tableManager.get(); return Bytes.toString(table.get(Bytes.toBytes(rowKey), Bytes.toBytes(columnKey))); } }
@Override public Long call() throws Exception { tsTableManager.flush(); return getCounts("Message", tsTable); } }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
/** * Clear any records written to this sink. * * @param tableManager dataset manager used to get the sink dataset */ public static void clear(DataSetManager<Table> tableManager) { tableManager.flush(); Table table = tableManager.get(); try (Scanner scanner = table.scan(null, null)) { Row row; while ((row = scanner.next()) != null) { table.delete(row.getRow()); } } tableManager.flush(); }
/** * Used to read the records written by this sink. * * @param tableManager dataset manager used to get the sink dataset to read from */ public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception { Table table = tableManager.get(); try (Scanner scanner = table.scan(null, null)) { List<StructuredRecord> records = new ArrayList<>(); Row row; while ((row = scanner.next()) != null) { Schema schema = Schema.parseJson(row.getString(SCHEMA_COL)); String recordStr = row.getString(RECORD_COL); records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema)); } return records; } }
@Override public Long call() throws Exception { tsTableManager.flush(); return getCounts(Integer.toString(finalI), tsTable); } }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
private static void writeInput(DataSetManager<Table> tableManager, @Nullable String rowKey, Iterable<StructuredRecord> records) throws Exception { tableManager.flush(); Table table = tableManager.get(); // write each record as a separate row, with the serialized record as one column and schema as another // each rowkey will be a UUID. for (StructuredRecord record : records) { byte[] row = rowKey == null ? Bytes.toBytes(UUID.randomUUID()) : Bytes.toBytes(rowKey); table.put(row, SCHEMA_COL, Bytes.toBytes(record.getSchema().toString())); table.put(row, RECORD_COL, Bytes.toBytes(StructuredRecordStringConverter.toJsonString(record))); } tableManager.flush(); }
@Test public void testClassicSpark() throws Exception { ApplicationManager appManager = deploy(TestSparkApp.class); for (Class<?> sparkClass : Arrays.asList(TestSparkApp.ClassicSpark.class, TestSparkApp.ScalaClassicSpark.class)) { final SparkManager sparkManager = appManager.getSparkManager(sparkClass.getSimpleName()); sparkManager.startAndWaitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES); } KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get(); Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ClassicSparkProgram.class.getName()))); Assert.assertEquals(1L, Bytes.toLong(resultTable.read(ScalaClassicSparkProgram.class.getName()))); }
@Override public Long call() throws Exception { tsTableManager.flush(); return getCounts(Integer.toString(finalI), tsTable); } }, 1, TimeUnit.MINUTES, 1, TimeUnit.SECONDS);
/** * Used to read the records written by this sink. * * @param tableManager dataset manager used to get the sink dataset to read from */ public static List<StructuredRecord> readOutput(DataSetManager<Table> tableManager) throws Exception { tableManager.flush(); Table table = tableManager.get(); try (Scanner scanner = table.scan(null, null)) { List<StructuredRecord> records = new ArrayList<>(); Row row; while ((row = scanner.next()) != null) { Schema schema = Schema.parseJson(row.getString(SCHEMA_COL)); String recordStr = row.getString(RECORD_COL); records.add(StructuredRecordStringConverter.fromJsonString(recordStr, schema)); } return records; } }
private void assertWorkerDatasetWrites(byte[] startRow, byte[] endRow, int expectedCount, int expectedTotalCount) throws Exception { DataSetManager<KeyValueTable> datasetManager = getDataset(testSpace.dataset(AppUsingGetServiceURL.WORKER_INSTANCES_DATASET)); KeyValueTable instancesTable = datasetManager.get(); try (CloseableIterator<KeyValue<byte[], byte[]>> instancesIterator = instancesTable.scan(startRow, endRow)) { List<KeyValue<byte[], byte[]>> workerInstances = Lists.newArrayList(instancesIterator); // Assert that the worker starts with expectedCount instances Assert.assertEquals(expectedCount, workerInstances.size()); // Assert that each instance of the worker knows the total number of instances for (KeyValue<byte[], byte[]> keyValue : workerInstances) { Assert.assertEquals(expectedTotalCount, Bytes.toInt(keyValue.getValue())); } } }
/** * Used to write the input records for the pipeline run. Should be called after the pipeline has been created. * * @param tableManager dataset manager used to write to the source dataset * @param records records that should be the input for the pipeline */ public static void writeInput(DataSetManager<Table> tableManager, Iterable<StructuredRecord> records) throws Exception { tableManager.flush(); Table table = tableManager.get(); // write each record as a separate row, with the serialized record as one column and schema as another // each rowkey will be a UUID. for (StructuredRecord record : records) { byte[] row = Bytes.toBytes(UUID.randomUUID()); table.put(row, SCHEMA_COL, Bytes.toBytes(record.getSchema().toString())); table.put(row, RECORD_COL, Bytes.toBytes(StructuredRecordStringConverter.toJsonString(record))); } tableManager.flush(); }
@Test public void testSparkWithService() throws Exception { ApplicationManager applicationManager = deployApplication(TestSparkServiceIntegrationApp.class); startService(applicationManager); SparkManager sparkManager = applicationManager.getSparkManager( TestSparkServiceIntegrationApp.SparkServiceProgram.class.getSimpleName()).start(); sparkManager.waitForRun(ProgramRunStatus.COMPLETED, 120, TimeUnit.SECONDS); DataSetManager<KeyValueTable> datasetManager = getDataset("result"); KeyValueTable results = datasetManager.get(); for (int i = 1; i <= 5; i++) { byte[] key = String.valueOf(i).getBytes(Charsets.UTF_8); Assert.assertEquals((i * i), Integer.parseInt(Bytes.toString(results.read(key)))); } }
@Test public void testDynamicSpark() throws Exception { ApplicationManager appManager = deploy(TestSparkApp.class); // Write some data to a local file File inputFile = TEMP_FOLDER.newFile(); try (BufferedWriter writer = Files.newBufferedWriter(inputFile.toPath(), StandardCharsets.UTF_8)) { for (int i = 0; i < 10; i++) { writer.write("Line " + (i + 1)); writer.newLine(); } } SparkManager sparkManager = appManager.getSparkManager(ScalaDynamicSpark.class.getSimpleName()); sparkManager.startAndWaitForRun(ImmutableMap.of("input", inputFile.getAbsolutePath(), "output", "ResultTable", "tmpdir", TMP_FOLDER.newFolder().getAbsolutePath()), ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES); // Validate the result written to dataset KeyValueTable resultTable = this.<KeyValueTable>getDataset("ResultTable").get(); // There should be ten "Line" Assert.assertEquals(10, Bytes.toInt(resultTable.read("Line"))); // Each number should appear once for (int i = 0; i < 10; i++) { Assert.assertEquals(1, Bytes.toInt(resultTable.read(Integer.toString(i + 1)))); } }
private void createPartition(DataSetManager<TimePartitionedFileSet> tpfsManager, long time, int i) throws Exception { TimePartitionedFileSet tpfs = tpfsManager.get(); TimePartitionOutput output = tpfs.getPartitionOutput(time); try (PrintStream out = new PrintStream(output.getLocation().append("file").getOutputStream())) { out.println(String.format("%d,x%d", i, i)); } output.addPartition(); tpfsManager.flush(); } }
private void verifyWorkflowRun(String runId, boolean shouldKeepWordCountDataset, boolean shouldKeepCSVFilesetDataset, String expectedRunStatus) throws Exception { // Once the Workflow run is complete local datasets should not be available DataSetManager<KeyValueTable> localKeyValueDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORDCOUNT_DATASET + "." + runId)); if (shouldKeepWordCountDataset) { Assert.assertNotNull(localKeyValueDataset.get()); } else { Assert.assertNull(localKeyValueDataset.get()); } DataSetManager<FileSet> localFileSetDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET + "." + runId)); if (shouldKeepCSVFilesetDataset) { Assert.assertNotNull(localFileSetDataset.get()); } else { Assert.assertNull(localFileSetDataset.get()); } // Dataset which is not local should still be available DataSetManager<KeyValueTable> nonLocalKeyValueDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.RESULT_DATASET)); Assert.assertEquals("6", Bytes.toString(nonLocalKeyValueDataset.get().read("UniqueWordCount"))); // There should not be any local copy of the non local dataset nonLocalKeyValueDataset = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.RESULT_DATASET + "." + runId)); Assert.assertNull(nonLocalKeyValueDataset.get()); DataSetManager<KeyValueTable> workflowRuns = getDataset(testSpace.dataset(WorkflowAppWithLocalDatasets.WORKFLOW_RUNS_DATASET)); Assert.assertEquals(expectedRunStatus, Bytes.toString(workflowRuns.get().read(runId))); }
private void addTimePartition(DataSetManager<TimePartitionedFileSet> tpfsManager, long inputTime) throws IOException, TransactionFailureException, InterruptedException { TimePartitionedFileSet tpfs = tpfsManager.get(); PartitionOutput partitionOutput = tpfs.getPartitionOutput(inputTime); Location location = partitionOutput.getLocation(); prepareFileInput(location); partitionOutput.addPartition(); tpfsManager.flush(); } }