/** * Concatenate two {@link Dag}s. Modify the {@link ConfigurationKeys#JOB_DEPENDENCIES} in the {@link JobSpec}s of the child * {@link Dag} to reflect the concatenation operation. * @param dagLeft The parent dag. * @param dagRight The child dag. * @return The concatenated dag with modified {@link ConfigurationKeys#JOB_DEPENDENCIES}. */ @VisibleForTesting static Dag<JobExecutionPlan> concatenate(Dag<JobExecutionPlan> dagLeft, Dag<JobExecutionPlan> dagRight) { //Compute the fork nodes - set of nodes with no dependents in the concatenated dag. Set<DagNode<JobExecutionPlan>> forkNodes = dagLeft.getEndNodes().stream(). filter(endNode -> isNodeForkable(endNode)).collect(Collectors.toSet()); Set<DagNode<JobExecutionPlan>> dependencyNodes = dagLeft.getDependencyNodes(forkNodes); if (!dependencyNodes.isEmpty()) { List<String> dependenciesList = dependencyNodes.stream() .map(dagNode -> dagNode.getValue().getJobSpec().getConfig().getString(ConfigurationKeys.JOB_NAME_KEY)) .collect(Collectors.toList()); String dependencies = Joiner.on(",").join(dependenciesList); for (DagNode<JobExecutionPlan> childNode : dagRight.getStartNodes()) { JobSpec jobSpec = childNode.getValue().getJobSpec(); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef(dependencies))); } } return dagLeft.concatenate(dagRight, forkNodes); }
jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY)); jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_NAME_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_NAME_KEY))); jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_GROUP_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_GROUP_KEY))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, ConfigValueFactory.fromAnyRef(flowExecutionId)));
jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_GROUP_KEY, ConfigValueFactory.fromAnyRef(flowGroup))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_NAME_KEY, ConfigValueFactory.fromAnyRef(flowName))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef(jobName))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, ConfigValueFactory.fromAnyRef(flowExecutionId))); jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY)); jobSpec.setConfig(jobSpec.getConfig().withoutPath(FSFlowCatalog.JOB_TEMPLATE_KEY)); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef(jobName))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_GROUP_KEY, ConfigValueFactory.fromAnyRef(flowGroup))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_FAILURE_OPTION, ConfigValueFactory.fromAnyRef(flowFailureOption)));
jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef(updatedDependencies)));
jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY)); jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef( flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_NAME_KEY).unwrapped().toString() jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_GROUP_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_GROUP_KEY))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, ConfigValueFactory.fromAnyRef(flowExecutionId)));
/** * A method to add tracking event configurations to a JobSpec. * This enables {@link org.apache.gobblin.metrics.GobblinTrackingEvent}s * to be emitted from each Gobblin job orchestrated by Gobblin-as-a-Service, which will then be used for tracking the * execution status of the job. * @param jobSpec representing a fully resolved {@link JobSpec}. */ private static void addTrackingEventConfig(JobSpec jobSpec, Config sysConfig) { Config reportingConfig = ConfigUtils.getConfig(sysConfig, ConfigurationKeys.METRICS_REPORTING_CONFIGURATIONS_PREFIX, ConfigFactory.empty()); if (!reportingConfig.isEmpty()) { Config jobConfig = jobSpec.getConfig().withFallback(reportingConfig.atPath(ConfigurationKeys.METRICS_REPORTING_CONFIGURATIONS_PREFIX)); boolean isSchemaRegistryEnabled = ConfigUtils.getBoolean(sysConfig, ConfigurationKeys.METRICS_REPORTING_KAFKA_USE_SCHEMA_REGISTRY, false); if (isSchemaRegistryEnabled) { String schemaRegistryUrl = ConfigUtils.getString(sysConfig, KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_URL, ""); if (!Strings.isNullOrEmpty(schemaRegistryUrl)) { jobConfig = jobConfig.withValue(KafkaSchemaRegistry.KAFKA_SCHEMA_REGISTRY_URL, ConfigValueFactory.fromAnyRef(schemaRegistryUrl)); } String schemaOverrideNamespace = ConfigUtils .getString(sysConfig, KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_OVERRIDE_NAMESPACE, ""); if (!Strings.isNullOrEmpty(schemaOverrideNamespace)) { jobConfig = jobConfig.withValue(KafkaSchemaRegistryConfigurationKeys.KAFKA_SCHEMA_REGISTRY_OVERRIDE_NAMESPACE, ConfigValueFactory.fromAnyRef(schemaOverrideNamespace)); } } jobSpec.setConfig(jobConfig); } }
jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_GROUP_KEY, ConfigValueFactory.fromAnyRef(flowGroup))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_NAME_KEY, ConfigValueFactory.fromAnyRef(flowName))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef(jobName))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, ConfigValueFactory.fromAnyRef(flowExecutionId))); jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY)); jobSpec.setConfig(jobSpec.getConfig().withoutPath(FSFlowCatalog.JOB_TEMPLATE_KEY)); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef(jobName))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_GROUP_KEY, ConfigValueFactory.fromAnyRef(flowGroup))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_FAILURE_OPTION, ConfigValueFactory.fromAnyRef(flowFailureOption)));
jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef(updatedDependencies)));
jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY)); jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_NAME_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_NAME_KEY))); jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_GROUP_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_GROUP_KEY))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, ConfigValueFactory.fromAnyRef(flowExecutionId)));
/** * Concatenate two {@link Dag}s. Modify the {@link ConfigurationKeys#JOB_DEPENDENCIES} in the {@link JobSpec}s of the child * {@link Dag} to reflect the concatenation operation. * @param dagLeft The parent dag. * @param dagRight The child dag. * @return The concatenated dag with modified {@link ConfigurationKeys#JOB_DEPENDENCIES}. */ @VisibleForTesting static Dag<JobExecutionPlan> concatenate(Dag<JobExecutionPlan> dagLeft, Dag<JobExecutionPlan> dagRight) { //Compute the fork nodes - set of nodes with no dependents in the concatenated dag. Set<DagNode<JobExecutionPlan>> forkNodes = dagLeft.getEndNodes().stream(). filter(endNode -> isNodeForkable(endNode)).collect(Collectors.toSet()); Set<DagNode<JobExecutionPlan>> dependencyNodes = dagLeft.getDependencyNodes(forkNodes); if (!dependencyNodes.isEmpty()) { List<String> dependenciesList = dependencyNodes.stream() .map(dagNode -> dagNode.getValue().getJobSpec().getConfig().getString(ConfigurationKeys.JOB_NAME_KEY)) .collect(Collectors.toList()); String dependencies = Joiner.on(",").join(dependenciesList); for (DagNode<JobExecutionPlan> childNode : dagRight.getStartNodes()) { JobSpec jobSpec = childNode.getValue().getJobSpec(); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.JOB_DEPENDENCIES, ConfigValueFactory.fromAnyRef(dependencies))); } } return dagLeft.concatenate(dagRight, forkNodes); }
jobSpec.setConfig(jobSpec.getConfig().withoutPath(ConfigurationKeys.JOB_SCHEDULE_KEY)); jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_NAME_KEY, ConfigValueFactory.fromAnyRef( flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_NAME_KEY).unwrapped().toString() jobSpec.setConfig(jobSpec.getConfig() .withValue(ConfigurationKeys.JOB_GROUP_KEY, flowSpec.getConfig().getValue(ConfigurationKeys.FLOW_GROUP_KEY))); jobSpec.setConfig(jobSpec.getConfig().withValue(ConfigurationKeys.FLOW_EXECUTION_ID_KEY, ConfigValueFactory.fromAnyRef(flowExecutionId)));