public static AnalysisJobBuilder copy(final AnalysisJobBuilder original) { final AnalysisJob analysisJob = original.getRootJobBuilder().withoutListeners().toAnalysisJob(false); return new AnalysisJobBuilder(original.getConfiguration(), analysisJob); }
public static String getScopeLabel(final AnalysisJobBuilder sourceAnalysisJobBuilder) { final String scopeText; if (sourceAnalysisJobBuilder.isRootJobBuilder()) { scopeText = "default scope"; } else { scopeText = "scope " + sourceAnalysisJobBuilder.getDatastore().getName(); } return scopeText; } }
public void addSources(final AnalysisJobBuilder job) { addSources(new SourceColumns(job.getSourceColumns())); addSources(job.getFilterComponentBuilders()); addSources(job.getTransformerComponentBuilders()); addSources(job.getAnalyzerComponentBuilders()); }
/** * toggles whether or not the column is in the source selection */ public void toggleColumn(final Column column) { if (_analysisJobBuilder.containsSourceColumn(column)) { _analysisJobBuilder.removeSourceColumn(column); } else { _analysisJobBuilder.addSourceColumn(column); } } }
public ComponentBuilder addComponent(final ComponentBuilder builder) { if (builder instanceof FilterComponentBuilder) { addFilter((FilterComponentBuilder<?, ?>) builder); } else if (builder instanceof TransformerComponentBuilder) { addTransformer((TransformerComponentBuilder<?>) builder); } else if (builder instanceof AnalyzerComponentBuilder) { addAnalyzer((AnalyzerComponentBuilder<?>) builder); } else { throw new UnsupportedOperationException("Unknown component type: " + builder); } return builder; }
final DatastoreConnection con = ds.openConnection(); final SchemaNavigator sn = con.getSchemaNavigator(); ajb.setDatastore(ds); ajb.addSourceColumns(sn.convertToTable("PUBLIC.CUSTOMERS").getColumns()); ajb.addAnalyzer(ValueDistributionAnalyzer.class); singleValueDist.addInputColumn(ajb.getSourceColumnByName("PUBLIC.CUSTOMERS.ADDRESSLINE2")); ajb.addAnalyzer(ValueDistributionAnalyzer.class); groupedValueDist.addInputColumn(ajb.getSourceColumnByName("PUBLIC.CUSTOMERS.CITY")); groupedValueDist.setConfiguredProperty("Group column", ajb.getSourceColumnByName("PUBLIC.CUSTOMERS.COUNTRY"));
originalAnalysisJobBuilder.getAnalysisJobMetadata().getProperties() .put(PreviewUtils.METADATA_PROPERTY_MARKER, jobBuilderIdentifier); final AnalysisJobBuilder ajb; try { final AnalysisJobBuilder copyAnalysisJobBuilder = PreviewUtils.copy(originalAnalysisJobBuilder.getRootJobBuilder()); ajb = PreviewUtils.findAnalysisJobBuilder(copyAnalysisJobBuilder, jobBuilderIdentifier); } finally { originalAnalysisJobBuilder.getAnalysisJobMetadata().getProperties() .remove(PreviewUtils.METADATA_PROPERTY_MARKER); final List<Table> tables = ajb.getSourceTables(); if (tables.size() > 1) { final Table originatingTable = sourceColumnFinder.findOriginatingTable(tjb.getOutputColumns().get(0)); tables.remove(originatingTable); for (final Table otherTable : tables) { ajb.removeSourceTable(otherTable); final List<MetaModelInputColumn> sourceColumns = ajb.getSourceColumns(); if (sourceColumns.isEmpty()) { logger.error("No source columns left after removing irrelevant source tables. Component: {}", ajb.addAnalyzer(Descriptors.ofAnalyzer(PreviewTransformedDataAnalyzer.class)) .addInputColumns(tjb.getInputColumns()).addInputColumns(tjb.getOutputColumns()); final AnalysisJobBuilder rootJobBuilder = ajb.getRootJobBuilder(); final Collection<? extends ComponentBuilder> componentBuilders; if (alreadyFiltered) {
/** * Creates a slave job by copying the original job and adding a * {@link MaxRowsFilter} as a default requirement. * * @param job * @param firstRow * @param maxRows * @return */ private AnalysisJob buildSlaveJob(final AnalysisJob job, final int slaveJobIndex, final int firstRow, final int maxRows) { logger.info("Building slave job {} with firstRow={} and maxRow={}", slaveJobIndex + 1, firstRow, maxRows); try (AnalysisJobBuilder jobBuilder = new AnalysisJobBuilder(_configuration, job)) { final FilterComponentBuilder<MaxRowsFilter, Category> maxRowsFilter = jobBuilder.addFilter(MaxRowsFilter.class); maxRowsFilter.getComponentInstance().setFirstRow(firstRow); maxRowsFilter.getComponentInstance().setMaxRows(maxRows); final boolean naturalRecordOrderConsistent = jobBuilder.getDatastore().getPerformanceCharacteristics().isNaturalRecordOrderConsistent(); if (!naturalRecordOrderConsistent) { final InputColumn<?> orderColumn = findOrderByColumn(jobBuilder); maxRowsFilter.getComponentInstance().setOrderColumn(orderColumn); } jobBuilder.setDefaultRequirement(maxRowsFilter, MaxRowsFilter.Category.VALID); // in assertion/test mode do an early validation assert jobBuilder.isConfigured(true); return jobBuilder.toAnalysisJob(); } }
final AnalysisJob jobCopy = analysisJobBuilder.toAnalysisJob(false); final AnalysisJobBuilder jobBuilderCopy = new AnalysisJobBuilder(analysisJobBuilder.getConfiguration(), jobCopy); final Set<ComponentBuilder> analyzers = jobBuilderCopy.getComponentBuilders().stream() .filter(o -> o instanceof AnalyzerComponentBuilder).collect(Collectors.toSet()); PreviewUtils.limitJobRows(jobBuilderCopy, jobBuilderCopy.getComponentBuilders(), maxRows); } else { PreviewUtils.limitJobRows(jobBuilderCopy, jobBuilderCopy.getFilterComponentBuilders(), maxRows);
assert _outputDataStreams.contains(outputDataStream); final Table table = outputDataStream.getTable(); analysisJobBuilder = new AnalysisJobBuilder(_analysisJobBuilder.getConfiguration(), _analysisJobBuilder); analysisJobBuilder.setDatastore(new OutputDataStreamDatastore(outputDataStream)); analysisJobBuilder.addSourceColumns(table.getColumns()); final List<MetaModelInputColumn> sourceColumns = analysisJobBuilder.getSourceColumns(); final List<String> sourceColumnsNames = new ArrayList<>(sourceColumns.size()); for (int i = 0; i < sourceColumns.size(); i++) { analysisJobBuilder.removeSourceColumn(sourceColumns.get(i)); analysisJobBuilder.addSourceColumns(columns);
ajb.setDatastore(ds); final Table table = con.getSchemaNavigator().convertToTable("PUBLIC.ORDERS"); ajb.addSourceColumn(table.getColumnByName("ORDERDATE")); ajb.addSourceColumn(table.getColumnByName("SHIPPEDDATE")); ajb.addSourceColumn(table.getColumnByName("CUSTOMERNUMBER")); (InputColumn<Date>) ajb.getSourceColumnByName("ORDERDATE"); @SuppressWarnings("unchecked") final InputColumn<Date> shippedDateColumn = (InputColumn<Date>) ajb.getSourceColumnByName("SHIPPEDDATE"); @SuppressWarnings("unchecked") final InputColumn<Integer> customerNumberColumn = (InputColumn<Integer>) ajb.getSourceColumnByName("CUSTOMERNUMBER"); @SuppressWarnings("unchecked") final MutableInputColumn<String> customerNumberAsStringColumn = (MutableInputColumn<String>) ajb.addTransformer(ConvertToStringTransformer.class) .addInputColumn(customerNumberColumn).getOutputColumns().get(0); final DateGapAnalyzer dga = ajb.addAnalyzer(DateGapAnalyzer.class).getComponentInstance(); dga.setFromColumn(orderDateColumn); dga.setToColumn(shippedDateColumn); dga.setGroupColumn(customerNumberAsStringColumn); final AnalysisResultFuture resultFuture = runner.run(ajb.toAnalysisJob());
/** * Creates an analysis job of this {@link AnalysisJobBuilder}. * * @return * @throws IllegalStateException * if the job is invalidly configured. See * {@link #isConfigured(boolean)} for detailed exception * descriptions. */ public AnalysisJob toAnalysisJob() throws RuntimeException { return toAnalysisJob(true); }
private ActionListener createWriteDataActionListener(final Class<? extends Analyzer<?>> analyzerClass, final String filenameExtension) { return e -> { final AnalysisJob copyAnalysisJob = _analysisJobBuilder.toAnalysisJob(false); final AnalysisJobBuilder copyAnalysisJobBuilder = new AnalysisJobBuilder(_analysisJobBuilder.getConfiguration(), copyAnalysisJob); final AnalyzerComponentBuilder<? extends Analyzer<?>> analyzer = copyAnalysisJobBuilder.addAnalyzer(analyzerClass); analyzer.addInputColumns(copyAnalysisJobBuilder.getAvailableInputColumns(Object.class)); final String formattedDate = new SimpleDateFormat("yyyy-MM-dd").format(new Date()); final FileResource resource = createResource("datacleaner-" + formattedDate + "-output", filenameExtension); if (analyzerClass == CreateExcelSpreadsheetAnalyzer.class) { final File file = resource.getFile(); analyzer.setConfiguredProperty("File", file); } else { analyzer.setConfiguredProperty("File", resource); } final ConfiguredPropertyDescriptor sheetNameProperty = analyzer.getDescriptor().getConfiguredProperty("Sheet name"); if (sheetNameProperty != null) { analyzer.setConfiguredProperty(sheetNameProperty, "data"); } final RunAnalysisActionListener runAnalysis = new RunAnalysisActionListener(_dcModule, copyAnalysisJobBuilder); ExecuteJobWithoutAnalyzersDialog.this.close(); runAnalysis.run(); }; }
final Table sourceTable = jobBuilder.getSourceTables().get(0); if (primaryKeys.size() == 1) { final Column primaryKey = primaryKeys.get(0); final InputColumn<?> sourceColumn = jobBuilder.getSourceColumnByName(primaryKey.getName()); if (sourceColumn == null) { jobBuilder.addSourceColumn(primaryKey); logger.info("Added PK source column for ORDER BY clause on slave jobs: {}", sourceColumn); return jobBuilder.getSourceColumnByName(primaryKey.getName()); } else { logger.info("Using existing PK source column for ORDER BY clause on slave jobs: {}", sourceColumn); final List<MetaModelInputColumn> sourceColumns = jobBuilder.getSourceColumns(); final String tableName = sourceTable.getName().toLowerCase(); for (final MetaModelInputColumn sourceColumn : sourceColumns) {
@Override protected void run(final AnalysisJobBuilder analysisJobBuilder, final Action<AnalysisJobBuilder> executeAction, final AnalysisJobBuilderWindow analysisJobBuilderWindow) throws Exception { final DataCleanerConfiguration baseConfiguration = analysisJobBuilder.getConfiguration(); final DataCleanerConfigurationImpl configuration = new DataCleanerConfigurationImpl(baseConfiguration) .withEnvironment(new DataCleanerEnvironmentImpl(baseConfiguration.getEnvironment()) .withTaskRunner(new SingleThreadedTaskRunner())); final AnalysisJob jobCopy = analysisJobBuilder.toAnalysisJob(false); final AnalysisJobBuilder jobBuilderCopy = new AnalysisJobBuilder(configuration, jobCopy); executeAction.run(jobBuilderCopy); } });
@Override public void actionPerformed(final ActionEvent event) { final AnalysisJobBuilder ajb = new AnalysisJobBuilder(_configuration); ajb.setDatastore(_datastore); ajb.addSourceColumns(getColumns()); final QuickAnalysisStrategy quickAnalysisStrategy = QuickAnalysisStrategy.loadFromUserPreferences(_userPreferences); quickAnalysisStrategy.configureAnalysisJobBuilder(ajb); try { if (!ajb.isConfigured(true)) { throw new IllegalStateException("Unknown job configuration issue!"); } final Injector injector = Guice.createInjector(new DCModuleImpl(_dcModule, ajb)); final RunAnalysisActionListener actionListener = injector.getInstance(RunAnalysisActionListener.class); actionListener.actionPerformed(event); } catch (final Exception e) { WidgetUtils.showErrorMessage("Error", "Could not perform quick analysis on table " + _table.getName(), e); } }
_analysisJobBuilder.addSourceColumns(table.getColumns()); final Table table = column.getTable(); final List<MetaModelInputColumn> columnsOfSameTable = _analysisJobBuilder.getSourceColumnsOfTable(table); if (columnsOfSameTable.isEmpty()) { _analysisJobBuilder.addSourceColumn(column); final ComponentDescriptor<?> descriptor = (ComponentDescriptor<?>) data; final Map<String, String> metadata = JobGraphMetadata.createMetadataProperties(dropPoint); _analysisJobBuilder.addComponent(descriptor, null, null, metadata);
public AnalysisJobMetadata createMetadata() { final MutableAnalysisJobMetadata mutableAnalysisJobMetadata = getAnalysisJobMetadata(); final Datastore datastore = getDatastore(); final String datastoreName = (datastore == null ? null : datastore.getName()); final List<MetaModelInputColumn> sourceColumns = getSourceColumns(); final List<String> sourceColumnPaths = new ArrayList<>(sourceColumns.size()); final List<ColumnType> sourceColumnTypes = new ArrayList<>(sourceColumns.size()); for (final MetaModelInputColumn sourceColumn : sourceColumns) { final Column column = sourceColumn.getPhysicalColumn(); final String path = column.getQualifiedLabel(); final ColumnType type = column.getType(); sourceColumnPaths.add(path); sourceColumnTypes.add(type); } final Map<String, String> properties = mutableAnalysisJobMetadata.getProperties(); final Map<String, String> variables = mutableAnalysisJobMetadata.getVariables(); final String jobName = mutableAnalysisJobMetadata.getJobName(); final String jobVersion = mutableAnalysisJobMetadata.getJobVersion(); final String jobDescription = mutableAnalysisJobMetadata.getJobDescription(); final String author = mutableAnalysisJobMetadata.getAuthor(); final Date createdDate = mutableAnalysisJobMetadata.getCreatedDate(); final Date updatedDate = mutableAnalysisJobMetadata.getUpdatedDate(); return new ImmutableAnalysisJobMetadata(jobName, jobVersion, jobDescription, author, createdDate, updatedDate, datastoreName, sourceColumnPaths, sourceColumnTypes, variables, properties); }
/** * Determines if the job being built is going to be distributable in a * cluster execution environment. * * @return */ public boolean isDistributable() { final Collection<ComponentBuilder> componentBuilders = getComponentBuilders(); for (final ComponentBuilder componentBuilder : componentBuilders) { if (!componentBuilder.isDistributable()) { return false; } } final List<AnalysisJobBuilder> childJobBuilders = getConsumedOutputDataStreamsJobBuilders(); for (final AnalysisJobBuilder childJobBuilder : childJobBuilders) { if (!childJobBuilder.isDistributable()) { return false; } } return true; } }
@Override public void configureForFilterOutcome(final AnalysisJobBuilder ajb, final FilterDescriptor<?, ?> descriptor, final String categoryName) { final String dsName = ajb.getDatastore().getName(); final File saveDatastoreDirectory = userPreferences.getSaveDatastoreDirectory(); final String displayName = descriptor.getDisplayName(); file = new FileResource( new File(saveDatastoreDirectory, dsName + "-" + displayName + "-" + categoryName + ".csv")); }