/** * Get all {@link ComponentJob}s contained in this job. * * @return */ default List<ComponentJob> getComponentJobs() { final ArrayList<ComponentJob> componentJobs = new ArrayList<>(); componentJobs.addAll(getTransformerJobs()); componentJobs.addAll(getAnalyzerJobs()); componentJobs.addAll(getFilterJobs()); return componentJobs; }
@Override public AnalysisJobMetadata getMetadata() { return _delegateJob.getMetadata(); }
public void addSources(final AnalysisJob job) { addSources(new SourceColumns(job.getSourceColumns())); addSources(job.getFilterJobs()); addSources(job.getTransformerJobs()); addSources(job.getAnalyzerJobs()); }
public AnalyzerJobHelper(final AnalysisJob analysisJob) { this(analysisJob.flattened().flatMap(analysisJob1 -> analysisJob1.getAnalyzerJobs().stream()) .collect(Collectors.toList())); }
public void importJob(final AnalysisJob job) { _builder.setDatastore(job.getDatastore()); _builder.setAnalysisJobMetadata(job.getMetadata()); final Collection<InputColumn<?>> sourceColumns = job.getSourceColumns(); for (final InputColumn<?> inputColumn : sourceColumns) { _builder.addSourceColumn((MetaModelInputColumn) inputColumn); addComponentBuilders(job.getFilterJobs(), componentBuilders); addComponentBuilders(job.getTransformerJobs(), componentBuilders); addComponentBuilders(job.getAnalyzerJobs(), componentBuilders);
private static void addJobConfigurations(final AnalysisJob job, final Set<Datastore> datastores, final Set<Dictionary> dictionaries, final Set<StringPattern> stringPatterns, final Set<SynonymCatalog> synonymCatalogs) { datastores.add(job.getDatastore()); Iterators.concat(job.getAnalyzerJobs().iterator(), job.getFilterJobs().iterator(), job.getTransformerJobs().iterator()).forEachRemaining(component -> { component.getDescriptor().getConfiguredProperties().forEach(descriptor -> { final Class<?> type = descriptor.getBaseType(); if (type == Datastore.class) { datastores.addAll(getProperties(component, descriptor)); } else if (type == Dictionary.class) { dictionaries.addAll(getProperties(component, descriptor)); } else if (type == StringPattern.class) { stringPatterns.addAll(getProperties(component, descriptor)); } else if (type == SynonymCatalog.class) { synonymCatalogs.addAll(getProperties(component, descriptor)); } }); for (final OutputDataStreamJob outputDataStreamJob : component.getOutputDataStreamJobs()) { addJobConfigurations(outputDataStreamJob.getJob(), datastores, dictionaries, stringPatterns, synonymCatalogs); } }); }
private String getDatastoreName() { if (_job != null) { final Datastore datastore = _job.getDatastore(); if (datastore != null) { final String datastoreName = datastore.getName(); if (!StringUtils.isNullOrEmpty(datastoreName)) { return datastoreName; } } } return null; }
final Map<String, String> variables = analysisJob.getMetadata().getVariables(); if ((variables != null) && (variables.size() > 0)) { for (final Map.Entry<String, String> variableEntry : variables.entrySet()) { final Datastore datastore = analysisJob.getDatastore(); if (!(datastore instanceof OutputDataStreamDatastore)) { final DataContextType dataContextType = new DataContextType(); final Collection<InputColumn<?>> sourceColumns = analysisJob.getSourceColumns(); final String columnPathQualification = getColumnPathQualification(datastore, sourceColumns); for (final InputColumn<?> inputColumn : sourceColumns) {
private ComponentJob getComponentByKey(final AnalysisJob job, final String queriedKey) { final List<ComponentJob> componentJobs = CollectionUtils.concat(false, job.getTransformerJobs(), job.getTransformerJobs(), job.getAnalyzerJobs()); for (final ComponentJob componentJob : componentJobs) { final String componentKey = getComponentKey(componentJob); if (queriedKey.equals(componentKey)) { return componentJob; } final OutputDataStreamJob[] outputDataStreamJobs = componentJob.getOutputDataStreamJobs(); for (final OutputDataStreamJob outputDataStreamJob : outputDataStreamJobs) { final AnalysisJob childJob = outputDataStreamJob.getJob(); if (childJob != null) { final ComponentJob result = getComponentByKey(childJob, queriedKey); if (result != null) { return result; } } } } return null; }
@Override public List<InputColumn<?>> getSourceColumns() { return _delegateJob.getSourceColumns(); }
@Override public List<FilterJob> getFilterJobs() { return _delegateJob.getFilterJobs(); }
@Override public List<TransformerJob> getTransformerJobs() { return _delegateJob.getTransformerJobs(); }
final Collection<AnalyzerJob> analyzerJobs = _masterJob.getAnalyzerJobs(); for (final AnalyzerJob masterAnalyzerJob : analyzerJobs) { final Collection<AnalyzerResult> slaveResults = new ArrayList<>();
/** * Validates a job before executing it. * * @param context * @param job * @param analysisJob * * @throws NoSuchDatastoreException */ private void preExecuteJob(TenantContext context, DataCleanerJobContext job, AnalysisJob analysisJob) throws NoSuchDatastoreException { final Datastore datastore = analysisJob.getDatastore(); if (datastore instanceof PlaceholderDatastore) { // the job was materialized using a placeholder datastore - ie. // the real datastore was not found! final String sourceDatastoreName = job.getSourceDatastoreName(); logger.warn( "Raising a NoSuchDatastoreException since a PlaceholderDatastore was found at execution time: {}", sourceDatastoreName); throw new NoSuchDatastoreException(sourceDatastoreName); } }
/** * Builds a list of {@link MetricGroup}s for a specific {@link AnalysisJob}. * * @param jobContext * @param analysisJob * * @return */ public List<MetricGroup> getMetricGroups(MetricJobContext jobContext, AnalysisJob analysisJob) { final List<MetricGroup> metricGroups = new ArrayList<>(); final List<AnalyzerJob> analyzerJobs = analysisJob.flattened() .flatMap(analysisJob1 -> analysisJob1.getAnalyzerJobs().stream()).collect(Collectors.toList()); for (AnalyzerJob analyzerJob : analyzerJobs) { final Set<MetricDescriptor> metricDescriptors = analyzerJob.getDescriptor().getResultMetrics(); final MetricGroup metricGroup = getMetricGroup(jobContext, analyzerJob, metricDescriptors); if (metricGroup != null) { metricGroups.add(metricGroup); } } return metricGroups; }
@Override protected Map<InputColumn<?>, InputColumn<?>> getInputColumnConversion(final AnalysisJob wrappedAnalysisJob) { final Collection<InputColumn<?>> sourceColumns = wrappedAnalysisJob.getSourceColumns(); if (input.length < sourceColumns.size()) { throw new IllegalStateException( "Wrapped job defines " + sourceColumns.size() + " columns, but transformer input only defines " + input.length); } final Map<InputColumn<?>, InputColumn<?>> result = new LinkedHashMap<>(); int i = 0; final Iterator<InputColumn<?>> it = sourceColumns.iterator(); while (it.hasNext()) { final InputColumn<?> parentColumn = input[i]; final InputColumn<?> childColumn = it.next(); result.put(parentColumn, childColumn); i++; } return result; }
public static Collection<ComponentJob> getAllComponents(final AnalysisJob job) { return CollectionUtils.concat(false, job.getFilterJobs(), job.getTransformerJobs(), job.getAnalyzerJobs()); }
@Override protected RowProcessingQueryOptimizer fetch() { final Datastore datastore = getAnalysisJob().getDatastore(); try (DatastoreConnection con = datastore.openConnection()) { final DataContext dataContext = con.getDataContext(); final Column[] columnArray = _physicalColumns.toArray(new Column[_physicalColumns.size()]); final Query baseQuery = dataContext.query().from(getTable()).select(columnArray).toQuery(); logger.debug("Base query for row processing: {}", baseQuery); // try to optimize return new RowProcessingQueryOptimizerImpl(datastore, getConsumersSorted(), baseQuery); } catch (final RuntimeException e) { logger.error("Failed to build query optimizer! {}", e.getMessage(), e); throw e; } } };
public Table[] getTables(final SourceColumnFinder sourceColumnFinder, final ComponentJob componentJob, Column[] physicalColumns) { if (physicalColumns == null) { physicalColumns = getPhysicalColumns(sourceColumnFinder, componentJob); } final Table[] tables; if (physicalColumns.length == 0) { // if not dependent on any specific tables, make component available // for all tables final Set<Table> allTables = new HashSet<>(); final Collection<InputColumn<?>> allSourceColumns = _analysisJob.getSourceColumns(); for (final InputColumn<?> inputColumn : allSourceColumns) { allTables.add(inputColumn.getPhysicalColumn().getTable()); } tables = allTables.toArray(new Table[allTables.size()]); } else { tables = MetaModelHelper.getTables(physicalColumns); } if (tables.length > 1) { if (!componentJob.getDescriptor().isMultiStreamComponent()) { throw new IllegalStateException("Component has input columns from multiple tables: " + componentJob); } } if (tables.length == 0) { throw new IllegalStateException("Component has no dependent tables: " + componentJob); } return tables; }
protected AnalysisJobMetadata getMetadata(final AnalysisJob analysisJob) { final AnalysisJobMetadata metadata = analysisJob.getMetadata(); if (metadata == null) { return AnalysisJobMetadata.EMPTY_METADATA; } return metadata; } }