public String renderComponentJob(ComponentJob job) { final ComponentDescriptor<?> desc = job.getDescriptor(); if (desc instanceof HasAnalyzerResultComponentDescriptor) { // if descriptor is an HasAnalyzerResultComponentDescriptor return renderMetrics(job, (HasAnalyzerResultComponentDescriptor<?>) desc); } // or else we cannot handle it return ""; }
/** * Gets the identifying input column of an {@link ComponentJob}, if there is * such a column. With an identifying input column, a externalizable * reference to the {@link ComponentJob} can be build, based on the * descriptor name, component name and the identifying column. * * @param componentJob * @return */ public static InputColumn<?> getIdentifyingInputColumn(final ComponentJob componentJob) { final ComponentDescriptor<?> descriptor = componentJob.getDescriptor(); final Set<ConfiguredPropertyDescriptor> inputProperties = descriptor.getConfiguredPropertiesForInput(false); if (inputProperties.size() != 1) { return null; } final ConfiguredPropertyDescriptor inputProperty = inputProperties.iterator().next(); final Object input = componentJob.getConfiguration().getProperty(inputProperty); if (input instanceof InputColumn) { return (InputColumn<?>) input; } else if (input instanceof InputColumn[]) { final InputColumn<?>[] inputColumns = (InputColumn[]) input; if (inputColumns.length != 1) { return null; } return inputColumns[0]; } return null; }
final Collection<? extends ComponentJob> componentJobs) { for (final ComponentJob componentJob : componentJobs) { if (!componentJob.getDescriptor().isMultiStreamComponent()) { Table originatingTable = null; final InputColumn<?>[] input = componentJob.getInput(); final OutputDataStreamJob[] outputDataStreamJobs = componentJob.getOutputDataStreamJobs(); for (final OutputDataStreamJob outputDataStreamJob : outputDataStreamJobs) { validateSingleTableInput(outputDataStreamJob.getJob());
@Override public int compare(final ComponentJob o1, final ComponentJob o2) { int diff = o1.getDescriptor().compareTo(o2.getDescriptor()); if (diff == 0) { diff = ToStringComparator.getComparator().compare(o1.getName(), o2.getName()); } if (diff == 0) { diff = ToStringComparator.getComparator().compare(o1, o2); } if (diff == 0) { diff = o1.hashCode() - o2.hashCode(); } return diff; }
public Column[] getPhysicalColumns(final SourceColumnFinder sourceColumnFinder, final ComponentJob componentJob) { final Set<Column> physicalColumns = new HashSet<>(); final InputColumn<?>[] inputColumns = componentJob.getInput(); for (final InputColumn<?> inputColumn : inputColumns) { physicalColumns.addAll(sourceColumnFinder.findOriginatingColumns(inputColumn)); } final ComponentRequirement requirement = componentJob.getComponentRequirement(); if (requirement != null) { for (final FilterOutcome filterOutcome : requirement.getProcessingDependencies()) { physicalColumns.addAll(sourceColumnFinder.findOriginatingColumns(filterOutcome)); } } return physicalColumns.toArray(new Column[physicalColumns.size()]); }
final String jobName = job.getName(); final StringBuilder label = new StringBuilder(); if (Strings.isNullOrEmpty(jobName)) { final ComponentDescriptor<?> descriptor = job.getDescriptor(); String baseName = descriptor.getDisplayName(); if (ReflectionUtils.is(descriptor.getComponentClass(), HasLabelAdvice.class)) { final LifeCycleHelper lch = new LifeCycleHelper((DataCleanerConfiguration) null, (AnalysisJob) null, false); lch.assignConfiguredProperties(descriptor, c, job.getConfiguration()); final String suggestedLabel = c.getSuggestedLabel(); if (!StringUtils.isNullOrEmpty(suggestedLabel)) {
final ComponentJob componentJob = entry.getKey(); final ComponentBuilder builder = entry.getValue(); final ComponentRequirement originalRequirement = componentJob.getComponentRequirement(); final ComponentRequirement componentRequirement = findImportedRequirement(originalRequirement, componentBuilders); final ComponentBuilder builder = entry.getValue(); final Set<ConfiguredPropertyDescriptor> inputColumnProperties = componentJob.getDescriptor().getConfiguredPropertiesForInput(true); componentJob.getConfiguration().getProperty(inputColumnProperty); final Object newInputColumnValue = findImportedInputColumns(originalInputColumnValue, componentBuilders, sourceColumnFinder); final ComponentJob componentJob = entry.getKey(); final ComponentBuilder builder = entry.getValue(); final OutputDataStreamJob[] outputDataStreamJobs = componentJob.getOutputDataStreamJobs(); for (final OutputDataStreamJob outputDataStreamJob : outputDataStreamJobs) { final OutputDataStream outputDataStream = outputDataStreamJob.getOutputDataStream();
/** * Creates a component builder similar to the incoming {@link ComponentJob}. * Note that input (columns and requirements) will not be mapped since these * depend on the context of the {@link FilterJob} and may not be matched in * the {@link AnalysisJobBuilder}. * * @param componentJob * * @return the builder object for the specific component */ protected ComponentBuilder addComponent(final ComponentJob componentJob) { final ComponentDescriptor<?> descriptor = componentJob.getDescriptor(); final ComponentBuilder builder = addComponent(descriptor); builder.setName(componentJob.getName()); builder.setConfiguredProperties(componentJob.getConfiguration()); builder.setMetadataProperties(componentJob.getMetadataProperties()); if (componentJob instanceof InputColumnSourceJob) { final InputColumn<?>[] output = ((InputColumnSourceJob) componentJob).getOutput(); final TransformerComponentBuilder<?> transformerJobBuilder = (TransformerComponentBuilder<?>) builder; final List<MutableInputColumn<?>> outputColumns = transformerJobBuilder.getOutputColumns(); assert output.length == outputColumns.size(); for (int i = 0; i < output.length; i++) { final MutableInputColumn<?> mutableOutputColumn = outputColumns.get(i); mutableOutputColumn.setName(output[i].getName()); } } return builder; }
private static void addJobConfigurations(final AnalysisJob job, final Set<Datastore> datastores, final Set<Dictionary> dictionaries, final Set<StringPattern> stringPatterns, final Set<SynonymCatalog> synonymCatalogs) { datastores.add(job.getDatastore()); Iterators.concat(job.getAnalyzerJobs().iterator(), job.getFilterJobs().iterator(), job.getTransformerJobs().iterator()).forEachRemaining(component -> { component.getDescriptor().getConfiguredProperties().forEach(descriptor -> { final Class<?> type = descriptor.getBaseType(); if (type == Datastore.class) { datastores.addAll(getProperties(component, descriptor)); } else if (type == Dictionary.class) { dictionaries.addAll(getProperties(component, descriptor)); } else if (type == StringPattern.class) { stringPatterns.addAll(getProperties(component, descriptor)); } else if (type == SynonymCatalog.class) { synonymCatalogs.addAll(getProperties(component, descriptor)); } }); for (final OutputDataStreamJob outputDataStreamJob : component.getOutputDataStreamJobs()) { addJobConfigurations(outputDataStreamJob.getJob(), datastores, dictionaries, stringPatterns, synonymCatalogs); } }); }
default Stream<AnalysisJob> flattened() { return Stream.concat(Stream.of(this), getComponentJobs().stream().flatMap( componentJob -> Stream.of(componentJob.getOutputDataStreamJobs()).map(OutputDataStreamJob::getJob)) .flatMap(AnalysisJob::flattened)); } }
private Stream<InputColumn<?>> getInputColumns(ComponentJob componentJob, ConfiguredPropertyDescriptor property) { final Object value = componentJob.getConfiguration().getProperty(property); if (value instanceof InputColumn) { return Arrays.stream(new InputColumn[] { (InputColumn<?>) value }); } if (value instanceof InputColumn[]) { Arrays.stream((InputColumn[]) value); } if (value instanceof List) { @SuppressWarnings("unchecked") final List<InputColumn<?>> list = (List<InputColumn<?>>) value; return list.stream(); } return Stream.empty(); } }
getLocalInputColumns(sourceColumnFinder, dataStream.getTable(), componentJob.getInput()); final OutputDataStreamJob[] outputDataStreamJobs = componentJob.getOutputDataStreamJobs(); for (final OutputDataStreamJob outputDataStreamJob : outputDataStreamJobs) { registerOutputDataStream(rowPublisher, consumer, outputDataStreamJob);
@Override public Boolean eval(ComponentJob o) { final String actualAnalyzerName = o.getName(); final String metricAnalyzerName = analyzerJobName; return metricAnalyzerName.equals(actualAnalyzerName); } });
public String getComponentKey(final ComponentJob componentJob) { final String key = componentJob.getMetadataProperties().get(METADATA_PROPERTY_COMPONENT_INDEX); if (key == null) { throw new IllegalStateException("No key registered for component: " + componentJob); } final String partitionKey = componentJob.getMetadataProperties() .get(AnalyzerComponentBuilder.METADATA_PROPERTY_BUILDER_PARTITION_INDEX); if (partitionKey != null) { return key + "." + partitionKey; } else { return key; } }
final ComponentJob componentJob = nextConsumer.getComponentJob(); if (componentJob instanceof HasComponentRequirement) { final ComponentRequirement componentRequirement = componentJob.getComponentRequirement(); if (componentRequirement != null) { final Collection<FilterOutcome> requirements = componentRequirement.getProcessingDependencies();
if (componentJob.getDescriptor().getComponentClass() == PatternFinderAnalyzer.class && !(componentJob .getConfiguration().getProperty(componentJob.getDescriptor().getConfiguredProperty( "Group column")) == null)) { logger.warn("Pattern finder analyzer doesn't support metrics if it has a Group column configured."); metricIdentifier.setAnalyzerDescriptorName(componentJob.getDescriptor().getDisplayName()); metricIdentifier.setAnalyzerName(componentJob.getName()); if (identifyingInputColumn != null) { metricIdentifier.setAnalyzerInputName(identifyingInputColumn.getName());
analyzerJobName = metricIdentifier.getAnalyzerName(); } else { componentJobDescriptorName = componentJob.getDescriptor().getDisplayName(); analyzerJobName = componentJob.getName();
private void addOutputDataStreams(final ComponentType componentType, final ComponentJob componentJob, final BiMap<InputColumn<?>, String> columnMappings) { final OutputDataStreamJob[] outputDataStreamJobs = componentJob.getOutputDataStreamJobs(); for (final OutputDataStreamJob outputDataStreamJob : outputDataStreamJobs) { final OutputDataStreamType outputDataStreamType = new OutputDataStreamType(); outputDataStreamType.setName(outputDataStreamJob.getOutputDataStream().getName()); final JobType childJobType = new JobType(); configureJobType(outputDataStreamJob.getJob(), childJobType, columnMappings, false); outputDataStreamType.setJob(childJobType); componentType.getOutputDataStream().add(outputDataStreamType); } }
@SuppressWarnings("unchecked") private static <T> List<T> getProperties(final ComponentJob component, final ConfiguredPropertyDescriptor descriptor) { if (descriptor.isArray()) { return Arrays.asList(((T[]) component.getConfiguration().getProperty(descriptor))); } else { return Collections.singletonList((T) component.getConfiguration().getProperty(descriptor)); } }
protected ResultDescriptor getResultDescriptor(final ComponentJob componentJob, final AnalyzerResult analyzerResult) { final ComponentDescriptor<?> descriptor = componentJob.getDescriptor(); if (descriptor instanceof ResultDescriptor) { return (ResultDescriptor) descriptor; } // slightly more expensive, but potentially also better / more specific! return Descriptors.ofResult(analyzerResult); } }