@Override public String apply(DatasetSpecificationSummary input) { return input.getName(); } });
@GET @Path("/apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/localdatasets") public void getWorkflowLocalDatasets(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String applicationId, @PathParam("workflow-id") String workflowId, @PathParam("run-id") String runId) throws NotFoundException, DatasetManagementException { WorkflowSpecification workflowSpec = getWorkflowSpecForValidRun(namespaceId, applicationId, workflowId, runId); Map<String, DatasetSpecificationSummary> localDatasetSummaries = new HashMap<>(); for (Map.Entry<String, DatasetCreationSpec> localDatasetEntry : workflowSpec.getLocalDatasetSpecs().entrySet()) { String mappedDatasetName = localDatasetEntry.getKey() + "." + runId; String datasetType = localDatasetEntry.getValue().getTypeName(); Map<String, String> datasetProperties = localDatasetEntry.getValue().getProperties().getProperties(); if (datasetFramework.hasInstance(new DatasetId(namespaceId, mappedDatasetName))) { localDatasetSummaries.put(localDatasetEntry.getKey(), new DatasetSpecificationSummary(mappedDatasetName, datasetType, datasetProperties)); } } responder.sendJson(HttpResponseStatus.OK, GSON.toJson(localDatasetSummaries)); }
@Override public List<?> makeRow(DatasetSpecificationSummary object) { return Lists.newArrayList(object.getName(), object.getType(), object.getDescription()); } }).build();
@Override public void run() { try { LOG.info("Upgrading dataset in system namespace: {}, spec: {}", spec.getName(), spec.toString()); DatasetAdmin admin = dsFramework.getAdmin(datasetId, null); // we know admin is not null, since we are looping over existing datasets //noinspection ConstantConditions admin.upgrade(); LOG.info("Upgraded dataset: {}", spec.getName()); } catch (Exception e) { throw new RuntimeException(e); } } };
@Override public void run() { try { List<NamespaceMeta> list = namespaceAdmin.list(); for (NamespaceMeta namespaceMeta : list) { Collection<DatasetSpecificationSummary> specs = datasetFramework.getInstances(namespaceMeta.getNamespaceId(), PROPERTIES); if (specs.isEmpty()) { // avoid fetching run records continue; } Set<String> activeRuns = getActiveRuns(namespaceMeta.getNamespaceId()); for (DatasetSpecificationSummary spec : specs) { deleteLocalDataset(namespaceMeta.getName(), spec.getName(), activeRuns, spec.getProperties()); } } } catch (Throwable t) { LOG.warn("Failed to delete the local datasets.", t); } }
Assert.assertEquals("table1", specs.iterator().next().getProperties().get("tag")); specs = framework.getInstances(namespace2); Assert.assertEquals(1, specs.size()); Assert.assertEquals("table2", specs.iterator().next().getProperties().get("tag"));
@Override public void run() { try { List<NamespaceMeta> list = namespaceAdmin.list(); for (NamespaceMeta namespaceMeta : list) { Collection<DatasetSpecificationSummary> specs = datasetFramework.getInstances(namespaceMeta.getNamespaceId(), PROPERTIES); if (specs.isEmpty()) { // avoid fetching run records continue; } Set<String> activeRuns = getActiveRuns(namespaceMeta.getNamespaceId()); for (DatasetSpecificationSummary spec : specs) { deleteLocalDataset(namespaceMeta.getName(), spec.getName(), activeRuns, spec.getProperties()); } } } catch (Throwable t) { LOG.warn("Failed to delete the local datasets.", t); } }
private DatasetSpecificationSummary getSummaryForInstance(String instanceName, List<DatasetSpecificationSummary> summaries) { for (DatasetSpecificationSummary summary : summaries) { if (instanceName.equals(summary.getName())) { return summary; } } return null; }
Assert.assertTrue(summary.getName().endsWith(pid)); Map<String, String> updatedProperties = new HashMap<>(summary.getProperties()); updatedProperties.remove(Constants.AppFabric.WORKFLOW_KEEP_LOCAL); datasetFramework.updateInstance(new DatasetId(TEST_NAMESPACE1, summary.getName()), DatasetProperties.of(updatedProperties));
@Override public List<?> makeRow(DatasetSpecificationSummary object) { return Lists.newArrayList(object.getName(), object.getType(), object.getDescription()); } }).build();
@Override public Collection<DatasetSpecificationSummary> getInstances(NamespaceId namespaceId, Map<String, String> properties) { readLock.lock(); try { // don't expect this to be called a lot. // might be better to maintain this collection separately and just return it, but seems like its not worth it. Collection<DatasetSpecification> specs = instances.row(namespaceId).values(); ImmutableList.Builder<DatasetSpecificationSummary> specSummaries = ImmutableList.builder(); for (DatasetSpecification spec : specs) { if (properties.isEmpty() || Maps.difference(properties, spec.getProperties()).entriesOnlyOnLeft().isEmpty()) { specSummaries.add(new DatasetSpecificationSummary(spec.getName(), spec.getType(), spec.getProperties())); } } return specSummaries.build(); } finally { readLock.unlock(); } }
private void upgradeSystemDatasets(ExecutorService executor) throws Exception { Map<String, Future<?>> futures = new HashMap<>(); for (final DatasetSpecificationSummary spec : dsFramework.getInstances(NamespaceId.SYSTEM)) { final DatasetId datasetId = NamespaceId.SYSTEM.dataset(spec.getName()); Runnable runnable = new Runnable() { @Override
@GET @Path("/apps/{app-id}/workflows/{workflow-id}/runs/{run-id}/localdatasets") public void getWorkflowLocalDatasets(HttpRequest request, HttpResponder responder, @PathParam("namespace-id") String namespaceId, @PathParam("app-id") String applicationId, @PathParam("workflow-id") String workflowId, @PathParam("run-id") String runId) throws NotFoundException, DatasetManagementException { WorkflowSpecification workflowSpec = getWorkflowSpecForValidRun(namespaceId, applicationId, workflowId, runId); Map<String, DatasetSpecificationSummary> localDatasetSummaries = new HashMap<>(); for (Map.Entry<String, DatasetCreationSpec> localDatasetEntry : workflowSpec.getLocalDatasetSpecs().entrySet()) { String mappedDatasetName = localDatasetEntry.getKey() + "." + runId; String datasetType = localDatasetEntry.getValue().getTypeName(); Map<String, String> datasetProperties = localDatasetEntry.getValue().getProperties().getProperties(); if (datasetFramework.hasInstance(new DatasetId(namespaceId, mappedDatasetName))) { localDatasetSummaries.put(localDatasetEntry.getKey(), new DatasetSpecificationSummary(mappedDatasetName, datasetType, datasetProperties)); } } responder.sendJson(HttpResponseStatus.OK, GSON.toJson(localDatasetSummaries)); }
private Set<DatasetId> summaryToDatasetIdSet(Collection<DatasetSpecificationSummary> datasetSpecs) { Collection<DatasetId> datasetIds = Collections2.transform(datasetSpecs, input -> NamespaceId.DEFAULT.dataset(input.getName())); return ImmutableSet.copyOf(datasetIds); }
@Override public Collection<DatasetSpecificationSummary> getInstances(NamespaceId namespaceId, Map<String, String> properties) { readLock.lock(); try { // don't expect this to be called a lot. // might be better to maintain this collection separately and just return it, but seems like its not worth it. Collection<DatasetSpecification> specs = instances.row(namespaceId).values(); ImmutableList.Builder<DatasetSpecificationSummary> specSummaries = ImmutableList.builder(); for (DatasetSpecification spec : specs) { if (properties.isEmpty() || Maps.difference(properties, spec.getProperties()).entriesOnlyOnLeft().isEmpty()) { specSummaries.add(new DatasetSpecificationSummary(spec.getName(), spec.getType(), spec.getProperties())); } } return specSummaries.build(); } finally { readLock.unlock(); } }
private void report(Map<TableId, LevelDBTableService.TableStats> datasetStat) throws DatasetManagementException { for (Map.Entry<TableId, LevelDBTableService.TableStats> statEntry : datasetStat.entrySet()) { String namespace = statEntry.getKey().getNamespace(); // emit metrics for only user datasets, tables in system namespace are ignored if (NamespaceId.SYSTEM.getNamespace().equals(namespace)) { continue; } String tableName = statEntry.getKey().getTableName(); Collection<DatasetSpecificationSummary> instances = dsFramework.getInstances(new NamespaceId(namespace)); for (DatasetSpecificationSummary spec : instances) { DatasetSpecification specification = dsFramework.getDatasetSpec(new DatasetId(namespace, spec.getName())); if (specification.isParent(tableName)) { MetricsContext collector = metricsService.getContext(ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, namespace, Constants.Metrics.Tag.DATASET, spec.getName())); int sizeInMb = (int) (statEntry.getValue().getDiskSizeBytes() / BYTES_IN_MB); collector.gauge("dataset.size.mb", sizeInMb); break; } } } }
static Collection<DatasetSpecificationSummary> spec2Summary(Collection<DatasetSpecification> specs) { List<DatasetSpecificationSummary> datasetSummaries = Lists.newArrayList(); for (DatasetSpecification spec : specs) { // TODO: (CDAP-3097) handle system datasets specially within a namespace instead of filtering them out // by the handler. This filter is only in the list endpoint because the other endpoints are used by // HBaseQueueAdmin through DatasetFramework. spec = DatasetsUtil.fixOriginalProperties(spec); datasetSummaries.add(new DatasetSpecificationSummary(spec.getName(), spec.getType(), spec.getDescription(), spec.getOriginalProperties())); } return datasetSummaries; }
private void report(Map<TableId, LevelDBTableService.TableStats> datasetStat) throws DatasetManagementException { for (Map.Entry<TableId, LevelDBTableService.TableStats> statEntry : datasetStat.entrySet()) { String namespace = statEntry.getKey().getNamespace(); // emit metrics for only user datasets, tables in system namespace are ignored if (NamespaceId.SYSTEM.getNamespace().equals(namespace)) { continue; } String tableName = statEntry.getKey().getTableName(); Collection<DatasetSpecificationSummary> instances = dsFramework.getInstances(new NamespaceId(namespace)); for (DatasetSpecificationSummary spec : instances) { DatasetSpecification specification = dsFramework.getDatasetSpec(new DatasetId(namespace, spec.getName())); if (specification.isParent(tableName)) { MetricsContext collector = metricsService.getContext(ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, namespace, Constants.Metrics.Tag.DATASET, spec.getName())); int sizeInMb = (int) (statEntry.getValue().getDiskSizeBytes() / BYTES_IN_MB); collector.gauge("dataset.size.mb", sizeInMb); break; } } } }
static Collection<DatasetSpecificationSummary> spec2Summary(Collection<DatasetSpecification> specs) { List<DatasetSpecificationSummary> datasetSummaries = Lists.newArrayList(); for (DatasetSpecification spec : specs) { // TODO: (CDAP-3097) handle system datasets specially within a namespace instead of filtering them out // by the handler. This filter is only in the list endpoint because the other endpoints are used by // HBaseQueueAdmin through DatasetFramework. if (QueueConstants.STATE_STORE_NAME.equals(spec.getName())) { continue; } spec = DatasetsUtil.fixOriginalProperties(spec); datasetSummaries.add(new DatasetSpecificationSummary(spec.getName(), spec.getType(), spec.getDescription(), spec.getOriginalProperties())); } return datasetSummaries; }
private boolean isUserDataset(DatasetSpecificationSummary specification) { final DefaultDatasetNamespace dsNamespace = new DefaultDatasetNamespace(CConfiguration.create()); return !dsNamespace.contains(specification.getName(), NamespaceId.SYSTEM.getNamespace()); }