@Override public AnalyzerJob[] getAnalyzerJobs() { final List<AnalyzerJob> analyzerJobs = new ArrayList<>(); for (final RowProcessingConsumer consumer : _publisher.getConsumers()) { if (consumer instanceof AnalyzerConsumer) { final AnalyzerJob analyzerJob = ((AnalyzerConsumer) consumer).getComponentJob(); analyzerJobs.add(analyzerJob); } } return analyzerJobs.toArray(new AnalyzerJob[analyzerJobs.size()]); }
private void closeNonDistributableComponents() { _publisher.closeConsumers(); }
protected AbstractRowProcessingConsumer(final RowProcessingPublisher publisher, final HasComponentRequirement outcomeSinkJob, final InputColumnSinkJob inputColumnSinkJob) { this(publisher.getAnalysisJob(), publisher.getAnalysisListener(), outcomeSinkJob, inputColumnSinkJob, publisher.getSourceColumnFinder()); }
@Override public void putRow(final Row row) { final ErrorAware errorAware = _publisher.getErrorAware(); if (errorAware.isCancelled() || errorAware.isErrornous()) { throw new PreviousErrorsExistException(); } final int rowNumber = _rowCounter.incrementAndGet(); final MetaModelInputRow inputRow = new MetaModelInputRow(rowNumber, row); _consumeRowHandler.consumeRow(inputRow); _publisher.getAnalysisListener() .rowProcessingProgress(_publisher.getAnalysisJob(), _publisher.getRowProcessingMetrics(), inputRow, rowNumber); } }
public void initialize() { final Table table = _outputDataStreamJob.getOutputDataStream().getTable(); final Query query = new Query(); query.from(table).selectAll(); final List<SelectItem> selectItems = query.getSelectClause().getItems(); final ConsumeRowHandler consumeRowHandler = _publisher.createConsumeRowHandler(); _outputRowCollector = new OutputDataStreamRowCollector(_publisher, selectItems, consumeRowHandler); final OutputDataStream outputDataStream = _outputDataStreamJob.getOutputDataStream(); _component.initializeOutputDataStream(outputDataStream, query, _outputRowCollector); _publisher.getAnalysisListener() .rowProcessingBegin(_publisher.getAnalysisJob(), _publisher.getRowProcessingMetrics()); }
for (final RowProcessingPublisher thisPublisher : publishers) { if (thisPublisher != publisher) { if (thisPublisher.getStream().isSourceTable()) { throw new IllegalArgumentException( "Job consumes multiple source tables, but ConsumeRowHandler can only handle a single " publisher.initializeConsumers(new TaskListener() { @Override public void onError(final Task task, final Throwable throwable) { List<RowProcessingConsumer> consumers = publisher.getConsumers(); if (!rowConsumeConfiguration.includeAnalyzers) { consumers = removeAnalyzers(consumers);
final RowProcessingPublishers publishers = getRowProcessingPublishers(job, lifeCycleHelper); final RowProcessingPublisher publisher = getRowProcessingPublisher(publishers); publisher.initializeConsumers(new TaskListener() { @Override public void onError(final Task task, final Throwable throwable) { _analysisListener.jobBegin(job, analysisJobMetrics); final RowProcessingMetrics rowProcessingMetrics = publisher.getRowProcessingMetrics(); _analysisListener.rowProcessingBegin(job, rowProcessingMetrics);
private static void executeInternal(final RowProcessingConsumer consumer, final RowProcessingPublisher publisher, final LifeCycleHelper lifeCycleHelper) { // we synchronize to avoid a race condition where initialization // is on-going in one stream and therefore skipped in the other synchronized (consumer) { final int publisherCount = consumer.onPublisherInitialized(publisher); if (publisherCount == 1) { final ComponentConfiguration configuration = consumer.getComponentJob().getConfiguration(); final ComponentDescriptor<?> descriptor = consumer.getComponentJob().getDescriptor(); final Object component = consumer.getComponent(); lifeCycleHelper.assignConfiguredProperties(descriptor, component, configuration); lifeCycleHelper.assignProvidedProperties(descriptor, component); lifeCycleHelper.validate(descriptor, component); final Collection<ActiveOutputDataStream> activeOutputDataStreams = consumer.getActiveOutputDataStreams(); for (final ActiveOutputDataStream activeOutputDataStream : activeOutputDataStreams) { activeOutputDataStream.initialize(); final RowProcessingPublisher outputDataStreamPublisher = activeOutputDataStream.getPublisher(); for (final RowProcessingConsumer outputDataStreamConsumer : outputDataStreamPublisher .getConsumers()) { final LifeCycleHelper outputDataStreamLifeCycleHelper = outputDataStreamPublisher.getPublishers().getConsumerSpecificLifeCycleHelper(consumer); executeInternal(outputDataStreamConsumer, outputDataStreamPublisher, outputDataStreamLifeCycleHelper); } } lifeCycleHelper.initialize(descriptor, component); } } }
@Override public RowProcessingMetrics getRowProcessingMetrics(final Table table) { final RowProcessingStream stream = _publishers.getStream(table); final RowProcessingPublisher publisher = _publishers.getRowProcessingPublisher(stream); if (publisher == null) { return null; } return publisher.getRowProcessingMetrics(); }
@Override public Query getQuery() { return _publisher.getQuery(); }
@Override public ComponentJob[] getResultProducers() { final List<ComponentJob> resultProducers = new ArrayList<>(); for (final RowProcessingConsumer consumer : _publisher.getConsumers()) { if (consumer.isResultProducer()) { resultProducers.add(consumer.getComponentJob()); } } return resultProducers.toArray(new ComponentJob[resultProducers.size()]); }
private List<Tuple2<String, NamedAnalyzerResult>> getAnalyzerResults( final Collection<RowProcessingConsumer> rowProcessingConsumers) { final List<Tuple2<String, NamedAnalyzerResult>> analyzerResults = new ArrayList<>(); for (final RowProcessingConsumer consumer : rowProcessingConsumers) { if (consumer.isResultProducer()) { final HasAnalyzerResult<?> resultProducer = (HasAnalyzerResult<?>) consumer.getComponent(); final AnalyzerResult analyzerResult = resultProducer.getResult(); final String key = _sparkJobContext.getComponentKey(consumer.getComponentJob()); final NamedAnalyzerResult namedAnalyzerResult = new NamedAnalyzerResult(key, analyzerResult); final Tuple2<String, NamedAnalyzerResult> tuple = new Tuple2<>(key, namedAnalyzerResult); analyzerResults.add(tuple); } for (final ActiveOutputDataStream activeOutputDataStream : consumer.getActiveOutputDataStreams()) { final List<RowProcessingConsumer> outputDataStreamConsumers = activeOutputDataStream.getPublisher().getConsumers(); final List<Tuple2<String, NamedAnalyzerResult>> outputDataStreamsAnalyzerResults = getAnalyzerResults(outputDataStreamConsumers); analyzerResults.addAll(outputDataStreamsAnalyzerResults); } } return analyzerResults; } }