@Override public void destroy() { transformRunner.destroy(); } }
@Override public MapReduceBatchContext getContext(DatasetContext datasetContext) { return new MapReduceBatchContext(context, pipelineRuntime, stageSpec, connectorDatasets, datasetContext); } }
private OutputWriter<Object, Object> getSinkWriter(MapReduceTaskContext<Object, Object> context, PipelinePhase pipelinePhase, Configuration hConf) { Set<StageSpec> reducers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE); JobContext hadoopContext = context.getHadoopContext(); if (!reducers.isEmpty() && hadoopContext instanceof Mapper.Context) { return new SingleOutputWriter<>(context); } String sinkOutputsStr = hConf.get(ETLMapReduce.SINK_OUTPUTS_KEY); // should never happen, this is set in initialize Preconditions.checkNotNull(sinkOutputsStr, "Sink outputs not found in Hadoop conf."); Map<String, SinkOutput> sinkOutputs = GSON.fromJson(sinkOutputsStr, ETLMapReduce.SINK_OUTPUTS_TYPE); return hasSingleOutput(sinkOutputs) ? new SingleOutputWriter<>(context) : new MultiOutputWriter<>(context, sinkOutputs); }
@Nullable @Override public JoinElement<INPUT_RECORD> apply(@Nullable TaggedWritable<REDUCE_VALUE> input) { return new JoinElement<>(input.getStageName(), inputConversion.fromWritable(input.getRecord())); } });
MapperJoinerTransformation(Joiner<JOIN_KEY, INPUT_RECORD, OUT> joiner, String joinKeyClassName, String joinInputClassName) { this.joiner = joiner; WritableConversion<JOIN_KEY, OUT_KEY> keyConversion = WritableConversions.getConversion(joinKeyClassName); WritableConversion<INPUT_RECORD, OUT_VALUE> inputConversion = WritableConversions.getConversion(joinInputClassName); this.keyConversion = keyConversion == null ? new CastConversion<JOIN_KEY, OUT_KEY>() : keyConversion; this.inputConversion = inputConversion == null ? new CastConversion<INPUT_RECORD, OUT_VALUE>() : inputConversion; }
private boolean hasSingleOutput(Map<String, SinkOutput> sinkOutputs) { // if no error datasets, check if we have more than one sink Set<String> allOutputs = new HashSet<>(); for (SinkOutput sinkOutput : sinkOutputs.values()) { allOutputs.addAll(sinkOutput.getSinkOutputs()); } return allOutputs.size() == 1; }
@Override public void map(Object key, Object value, Mapper.Context context) throws IOException, InterruptedException { try { transformRunner.transform(key, value); } catch (StageFailureException e) { PIPELINE_LOG.error("{}", e.getMessage(), e.getCause()); Throwables.propagate(e.getCause()); } catch (Exception e) { Throwables.propagate(e); } }
@Override public void initialize(MapReduceTaskContext<Object, Object> context) throws Exception { // get source, transform, sink ids from program properties Map<String, String> properties = context.getSpecification().getProperties(); if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) { LogStageInjector.start(); } transformRunner = new TransformRunner<>(context, reducerMetrics); }
@Override public void emit(Object value) { try { outputWriter.write(stageName, (KeyValue<Object, Object>) value); } catch (Exception e) { throw new RuntimeException(e); } }
private MapReduceRuntimeContext createRuntimeContext(StageSpec stageInfo) { PipelineRuntime pipelineRuntime = new PipelineRuntime(taskContext, metrics, arguments); return new MapReduceRuntimeContext(taskContext, pipelineRuntime, stageInfo); }
@Override @TransactionPolicy(TransactionControl.EXPLICIT) public void destroy() { boolean isSuccessful = getContext().getState().getStatus() == ProgramStatus.COMPLETED; if (finisher != null) { // this can be null if the initialize() method failed. finisher.onFinish(isSuccessful); } LOG.info("Batch Run finished : status = {}", getContext().getState()); }
/** * Instantiates and initializes the plugin for the stage. * * @param stageInfo the stage info * @return the initialized Transformation * @throws InstantiationException if the plugin for the stage could not be instantiated * @throws Exception if there was a problem initializing the plugin */ private <T extends Transformation & StageLifecycle<BatchRuntimeContext>> Transformation getInitializedTransformation(StageSpec stageInfo) throws Exception { BatchRuntimeContext runtimeContext = createRuntimeContext(stageInfo); T plugin = pluginInstantiator.newPluginInstance(stageInfo.getName(), macroEvaluator); plugin.initialize(runtimeContext); return plugin; }
ReducerJoinerTransformation(Joiner<JOIN_KEY, INPUT_RECORD, OUT> joiner, String joinKeyClassName, String joinInputClassName, int numOfInputs) { this.joiner = joiner; WritableConversion<JOIN_KEY, REDUCE_KEY> keyConversion = WritableConversions.getConversion(joinKeyClassName); WritableConversion<INPUT_RECORD, REDUCE_VALUE> inputConversion = WritableConversions.getConversion(joinInputClassName); this.keyConversion = keyConversion == null ? new CastConversion<JOIN_KEY, REDUCE_KEY>() : keyConversion; this.inputConversion = inputConversion == null ? new CastConversion<INPUT_RECORD, REDUCE_VALUE>() : inputConversion; this.numOfInputs = numOfInputs; }
@Override public void destroy() { transformRunner.destroy(); } }
@Override public void write(String sinkName, KeyValue<KEY_OUT, VAL_OUT> output) throws Exception { for (String outputName : sinkOutputs.get(sinkName).getSinkOutputs()) { context.write(outputName, output.getKey(), output.getValue()); } } }
@Override protected void reduce(Object key, Iterable values, Context context) throws IOException, InterruptedException { try { transformRunner.transform(key, values.iterator()); } catch (StageFailureException e) { PIPELINE_LOG.error("{}", e.getMessage(), e.getCause()); Throwables.propagate(e.getCause()); } catch (Exception e) { Throwables.propagate(e); } }
@Override public void initialize(MapReduceTaskContext<Object, Object> context) throws Exception { // get source, transform, sink ids from program properties Map<String, String> properties = context.getSpecification().getProperties(); if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) { LogStageInjector.start(); } transformRunner = new TransformRunner<>(context, mapperMetrics); }
ReducerAggregatorTransformation(Aggregator<GROUP_KEY, GROUP_VAL, OUT> aggregator, String groupKeyClassName, String groupValClassName) { this.aggregator = aggregator; WritableConversion<GROUP_KEY, REDUCE_KEY> keyConversion = WritableConversions.getConversion(groupKeyClassName); WritableConversion<GROUP_VAL, REDUCE_VAL> valConversion = WritableConversions.getConversion(groupValClassName); this.keyConversion = keyConversion == null ? new CastConversion<GROUP_KEY, REDUCE_KEY>() : keyConversion; this.valConversion = valConversion == null ? new CastConversion<GROUP_VAL, REDUCE_VAL>() : valConversion; }
MapperAggregatorTransformation(Aggregator<GROUP_KEY, GROUP_VAL, ?> aggregator, String groupKeyClassName, String groupValClassName) { this.aggregator = aggregator; this.groupKeyEmitter = new NoErrorEmitter<>("Errors and Alerts cannot be emitted from the groupBy method of an aggregator"); WritableConversion<GROUP_KEY, OUT_KEY> keyConversion = WritableConversions.getConversion(groupKeyClassName); WritableConversion<GROUP_VAL, OUT_VAL> valConversion = WritableConversions.getConversion(groupValClassName); // if the conversion is null, it means the user is using a Writable already this.keyConversion = keyConversion == null ? new CastConversion<GROUP_KEY, OUT_KEY>() : keyConversion; this.valConversion = valConversion == null ? new CastConversion<GROUP_VAL, OUT_VAL>() : valConversion; }