co.cask.cdap.etl.batch.mapreduce java code examples

 @Override
 public void destroy() {
  transformRunner.destroy();
 }
}

 @Override
 public MapReduceBatchContext getContext(DatasetContext datasetContext) {
  return new MapReduceBatchContext(context, pipelineRuntime, stageSpec, connectorDatasets, datasetContext);
 }
}

@Override
public int compareTo(TaggedWritable o) {
 return Integer.compare(hashCode(), o.hashCode());
}

private OutputWriter<Object, Object> getSinkWriter(MapReduceTaskContext<Object, Object> context,
                          PipelinePhase pipelinePhase,
                          Configuration hConf) {
 Set<StageSpec> reducers = pipelinePhase.getStagesOfType(BatchAggregator.PLUGIN_TYPE, BatchJoiner.PLUGIN_TYPE);
 JobContext hadoopContext = context.getHadoopContext();
 if (!reducers.isEmpty() && hadoopContext instanceof Mapper.Context) {
  return new SingleOutputWriter<>(context);
 }
 String sinkOutputsStr = hConf.get(ETLMapReduce.SINK_OUTPUTS_KEY);
 // should never happen, this is set in initialize
 Preconditions.checkNotNull(sinkOutputsStr, "Sink outputs not found in Hadoop conf.");
 Map<String, SinkOutput> sinkOutputs = GSON.fromJson(sinkOutputsStr, ETLMapReduce.SINK_OUTPUTS_TYPE);
 return hasSingleOutput(sinkOutputs) ?
  new SingleOutputWriter<>(context) : new MultiOutputWriter<>(context, sinkOutputs);
}

 @Nullable
 @Override
 public JoinElement<INPUT_RECORD> apply(@Nullable TaggedWritable<REDUCE_VALUE> input) {
  return new JoinElement<>(input.getStageName(), inputConversion.fromWritable(input.getRecord()));
 }
});

MapperJoinerTransformation(Joiner<JOIN_KEY, INPUT_RECORD, OUT> joiner, String joinKeyClassName,
              String joinInputClassName) {
 this.joiner = joiner;
 WritableConversion<JOIN_KEY, OUT_KEY> keyConversion = WritableConversions.getConversion(joinKeyClassName);
 WritableConversion<INPUT_RECORD, OUT_VALUE> inputConversion =
  WritableConversions.getConversion(joinInputClassName);
 this.keyConversion = keyConversion == null ? new CastConversion<JOIN_KEY, OUT_KEY>() : keyConversion;
 this.inputConversion = inputConversion == null ? new CastConversion<INPUT_RECORD, OUT_VALUE>() : inputConversion;
}

private boolean hasSingleOutput(Map<String, SinkOutput> sinkOutputs) {
 // if no error datasets, check if we have more than one sink
 Set<String> allOutputs = new HashSet<>();
 for (SinkOutput sinkOutput : sinkOutputs.values()) {
  allOutputs.addAll(sinkOutput.getSinkOutputs());
 }
 return allOutputs.size() == 1;
}

@Override
public void map(Object key, Object value, Mapper.Context context) throws IOException, InterruptedException {
 try {
  transformRunner.transform(key, value);
 } catch (StageFailureException e) {
  PIPELINE_LOG.error("{}", e.getMessage(), e.getCause());
  Throwables.propagate(e.getCause());
 } catch (Exception e) {
  Throwables.propagate(e);
 }
}

@Override
public void initialize(MapReduceTaskContext<Object, Object> context) throws Exception {
 // get source, transform, sink ids from program properties
 Map<String, String> properties = context.getSpecification().getProperties();
 if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) {
  LogStageInjector.start();
 }
 transformRunner = new TransformRunner<>(context, reducerMetrics);
}

@Override
public void emit(Object value) {
 try {
  outputWriter.write(stageName, (KeyValue<Object, Object>) value);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

private MapReduceRuntimeContext createRuntimeContext(StageSpec stageInfo) {
 PipelineRuntime pipelineRuntime = new PipelineRuntime(taskContext, metrics, arguments);
 return new MapReduceRuntimeContext(taskContext, pipelineRuntime, stageInfo);
}

@Override
@TransactionPolicy(TransactionControl.EXPLICIT)
public void destroy() {
 boolean isSuccessful = getContext().getState().getStatus() == ProgramStatus.COMPLETED;
 if (finisher != null) {
  // this can be null if the initialize() method failed.
  finisher.onFinish(isSuccessful);
 }
 LOG.info("Batch Run finished : status = {}", getContext().getState());
}

/**
 * Instantiates and initializes the plugin for the stage.
 *
 * @param stageInfo the stage info
 * @return the initialized Transformation
 * @throws InstantiationException if the plugin for the stage could not be instantiated
 * @throws Exception              if there was a problem initializing the plugin
 */
private <T extends Transformation & StageLifecycle<BatchRuntimeContext>> Transformation
getInitializedTransformation(StageSpec stageInfo) throws Exception {
 BatchRuntimeContext runtimeContext = createRuntimeContext(stageInfo);
 T plugin = pluginInstantiator.newPluginInstance(stageInfo.getName(), macroEvaluator);
 plugin.initialize(runtimeContext);
 return plugin;
}

ReducerJoinerTransformation(Joiner<JOIN_KEY, INPUT_RECORD, OUT> joiner, String joinKeyClassName,
              String joinInputClassName, int numOfInputs) {
 this.joiner = joiner;
 WritableConversion<JOIN_KEY, REDUCE_KEY> keyConversion = WritableConversions.getConversion(joinKeyClassName);
 WritableConversion<INPUT_RECORD, REDUCE_VALUE> inputConversion =
  WritableConversions.getConversion(joinInputClassName);
 this.keyConversion = keyConversion == null ? new CastConversion<JOIN_KEY, REDUCE_KEY>() : keyConversion;
 this.inputConversion = inputConversion == null ?
  new CastConversion<INPUT_RECORD, REDUCE_VALUE>() : inputConversion;
 this.numOfInputs = numOfInputs;
}

 @Override
 public void destroy() {
  transformRunner.destroy();
 }
}

 @Override
 public void write(String sinkName, KeyValue<KEY_OUT, VAL_OUT> output) throws Exception {
  for (String outputName : sinkOutputs.get(sinkName).getSinkOutputs()) {
   context.write(outputName, output.getKey(), output.getValue());
  }
 }
}

@Override
protected void reduce(Object key, Iterable values, Context context) throws IOException, InterruptedException {
 try {
  transformRunner.transform(key, values.iterator());
 } catch (StageFailureException e) {
  PIPELINE_LOG.error("{}", e.getMessage(), e.getCause());
  Throwables.propagate(e.getCause());
 } catch (Exception e) {
  Throwables.propagate(e);
 }
}

@Override
public void initialize(MapReduceTaskContext<Object, Object> context) throws Exception {
 // get source, transform, sink ids from program properties
 Map<String, String> properties = context.getSpecification().getProperties();
 if (Boolean.valueOf(properties.get(Constants.STAGE_LOGGING_ENABLED))) {
  LogStageInjector.start();
 }
 transformRunner = new TransformRunner<>(context, mapperMetrics);
}

ReducerAggregatorTransformation(Aggregator<GROUP_KEY, GROUP_VAL, OUT> aggregator,
                String groupKeyClassName,
                String groupValClassName) {
 this.aggregator = aggregator;
 WritableConversion<GROUP_KEY, REDUCE_KEY> keyConversion = WritableConversions.getConversion(groupKeyClassName);
 WritableConversion<GROUP_VAL, REDUCE_VAL> valConversion = WritableConversions.getConversion(groupValClassName);
 this.keyConversion = keyConversion == null ? new CastConversion<GROUP_KEY, REDUCE_KEY>() : keyConversion;
 this.valConversion = valConversion == null ? new CastConversion<GROUP_VAL, REDUCE_VAL>() : valConversion;
}

MapperAggregatorTransformation(Aggregator<GROUP_KEY, GROUP_VAL, ?> aggregator,
                String groupKeyClassName,
                String groupValClassName) {
 this.aggregator = aggregator;
 this.groupKeyEmitter =
  new NoErrorEmitter<>("Errors and Alerts cannot be emitted from the groupBy method of an aggregator");
 WritableConversion<GROUP_KEY, OUT_KEY> keyConversion = WritableConversions.getConversion(groupKeyClassName);
 WritableConversion<GROUP_VAL, OUT_VAL> valConversion = WritableConversions.getConversion(groupValClassName);
 // if the conversion is null, it means the user is using a Writable already
 this.keyConversion = keyConversion == null ? new CastConversion<GROUP_KEY, OUT_KEY>() : keyConversion;
 this.valConversion = valConversion == null ? new CastConversion<GROUP_VAL, OUT_VAL>() : valConversion;
}

How to use co.cask.cdap.etl.batch.mapreduce

Best Java code snippets using co.cask.cdap.etl.batch.mapreduce (Showing top 20 results out of 315)