co.cask.cdap.etl.batch.mapreduce.MapReduceTransformExecutorFactory java code examples

/**
 * Instantiates and initializes the plugin for the stage.
 *
 * @param stageInfo the stage info
 * @return the initialized Transformation
 * @throws InstantiationException if the plugin for the stage could not be instantiated
 * @throws Exception              if there was a problem initializing the plugin
 */
private <T extends Transformation & StageLifecycle<BatchRuntimeContext>> Transformation
getInitializedTransformation(StageSpec stageInfo) throws Exception {
 BatchRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
 T plugin = pluginInstantiator.newPluginInstance(stageInfo.getName(), macroEvaluator);
 plugin.initialize(runtimeContext);
 return plugin;
}

/**
 * Create a transform executor for the specified pipeline. Will instantiate and initialize all sources,
 * transforms, and sinks in the pipeline.
 *
 * @param pipeline the pipeline to create a transform executor for
 * @param outputWriter writes output records to the mapreduce context
 * @return executor for the pipeline
 * @throws InstantiationException if there was an error instantiating a plugin
 * @throws Exception              if there was an error initializing a plugin
 */
public <KEY_OUT, VAL_OUT> PipeTransformExecutor<T> create(PipelinePhase pipeline,
                             OutputWriter<KEY_OUT, VAL_OUT> outputWriter)
 throws Exception {
 // populate the pipe stages in reverse topological order to ensure that an output is always created before its
 // input. this will allow us to setup all outputs for a stage when we get to it.
 List<String> traversalOrder = pipeline.getDag().getTopologicalOrder();
 Collections.reverse(traversalOrder);
 Map<String, PipeStage> pipeStages = new HashMap<>();
 for (String stageName : traversalOrder) {
  pipeStages.put(stageName, getPipeStage(pipeline, stageName, pipeStages, outputWriter));
 }
 // sourceStageName will be null in reducers, so need to handle that case
 Set<String> startingPoints = (sourceStageName == null) ? pipeline.getSources() : Sets.newHashSet(sourceStageName);
 return new PipeTransformExecutor<>(pipeStages, startingPoints);
}

if (BatchAggregator.PLUGIN_TYPE.equals(pluginType)) {
 BatchAggregator<?, ?, ?> batchAggregator = pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
 BatchRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
 batchAggregator.initialize(runtimeContext);
 if (isMapPhase) {
  return getTrackedEmitKeyStep(new MapperAggregatorTransformation(batchAggregator, mapOutputKeyClassName,
                                  mapOutputValClassName),
                 stageMetrics, taskContext.getDataTracer(stageName), collector);
 } else {
  return getTrackedAggregateStep(new ReducerAggregatorTransformation(batchAggregator,
                                    mapOutputKeyClassName,
                                    mapOutputValClassName),
 BatchJoinerRuntimeContext runtimeContext = createRuntimeContext(stageSpec);
 batchJoiner.initialize(runtimeContext);
 if (isMapPhase) {
  return getTrackedEmitKeyStep(
   new MapperJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName), stageMetrics,
   taskContext.getDataTracer(stageName), collector);
 } else {
  return getTrackedMergeStep(
   new ReducerJoinerTransformation(batchJoiner, mapOutputKeyClassName, mapOutputValClassName,
                   runtimeContext.getInputSchemas().size()), stageMetrics,
Transformation transformation = getInitializedTransformation(stageSpec);
boolean isLimitingSource =
 taskContext.getDataTracer(stageName).isEnabled() && BatchSource.PLUGIN_TYPE.equals(pluginType) && isMapPhase;

 new MapReduceTransformExecutorFactory<>(context, pluginInstantiator, metrics,
                     new BasicArguments(context.getWorkflowToken(), runtimeArgs),
                     sourceStage, phaseSpec.getNumOfRecordsPreview(),
                     phaseSpec.pipelineContainsCondition());
this.transformExecutor = transformExecutorFactory.create(phase, outputWriter);

 if (Constants.Connector.PLUGIN_TYPE.equals(pluginType) || BatchJoiner.PLUGIN_TYPE.equals(pluginType)) {
  Transformation<RecordInfo<Object>, Object> sink = getTransformation(stageSpec);
  return new DirectOutputPipeStage<>(stageName, sink, new SinkEmitter<>(stageName, outputWriter));
 } else {
  return new UnwrapPipeStage<>(stageName, getTransformation(stageSpec),
                 new SinkEmitter<>(stageName, outputWriter));
 return new MultiOutputTransformPipeStage<>(stageName, getMultiOutputTransform(stageSpec), pipeEmitter);
} else {
 return new UnwrapPipeStage<>(stageName, getTransformation(stageSpec), pipeEmitter);

private <IN, ERROR> TrackedMultiOutputTransform<IN, ERROR> getMultiOutputTransform(StageSpec stageSpec)
 throws Exception {
 String stageName = stageSpec.getName();
 DefaultMacroEvaluator macroEvaluator =
  new DefaultMacroEvaluator(arguments, taskContext.getLogicalStartTime(), taskContext, taskContext.getNamespace());
 SplitterTransform<IN, ERROR> splitterTransform =
  pluginInstantiator.newPluginInstance(stageName, macroEvaluator);
 TransformContext transformContext = createRuntimeContext(stageSpec);
 splitterTransform.initialize(transformContext);
 StageMetrics stageMetrics = new DefaultStageMetrics(metrics, stageName);
 TaskAttemptContext taskAttemptContext = (TaskAttemptContext) taskContext.getHadoopContext();
 StageStatisticsCollector collector = isPipelineContainsCondition
  ? new MapReduceStageStatisticsCollector(stageName, taskAttemptContext) : new NoopStageStatisticsCollector();
 return new TrackedMultiOutputTransform<>(splitterTransform, stageMetrics, taskContext.getDataTracer(stageName),
                      collector);
}

Javadoc

Helps create TransformExecutor.

Most used methods

<init>
create
Create a transform executor for the specified pipeline. Will instantiate and initialize all sources,
createRuntimeContext
getInitializedTransformation
Instantiates and initializes the plugin for the stage.
getMultiOutputTransform
getPipeStage
getTrackedAggregateStep
getTrackedEmitKeyStep
getTrackedMergeStep
getTransformation

Popular in Java

Reactive rest calls using spring rest template
onCreateOptionsMenu (Activity)
getExternalFilesDir (Context)
setContentView (Activity)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
JLabel (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Runner (org.openjdk.jmh.runner)
Best IntelliJ plugins

How to useMapReduceTransformExecutorFactory in co.cask.cdap.etl.batch.mapreduce

Best Java code snippets using co.cask.cdap.etl.batch.mapreduce.MapReduceTransformExecutorFactory (Showing top 6 results out of 315)

How to use
MapReduceTransformExecutorFactory
in
co.cask.cdap.etl.batch.mapreduce