cascading.flow java code examples

private void report(Stats stats, FlowProcess<Properties> flowProcess) {
  // report current stats
  for (Counter count : Counter.ALL) {
    flowProcess.increment(count, count.get(stats));
  }
}

@Override
public void flowConfInit(Flow<JobConf> flow) {
  CascadingUtils.addSerializationToken(flow.getConfig());
}

/**
 * Retrieves the ambrose node associated with the given flow step.
 *
 * @param step step for which node should be retrieved.
 * @return node associated with step.
 */
private DAGNode<CascadingJob> getNode(FlowStep step) {
 String name = step.getName();
 DAGNode<CascadingJob> node = nodesByName.get(name);
 if (node == null) {
  throw new IllegalStateException(String.format("Node with name '%s' not found", name));
 }
 return node;
}

public void onStarting(Flow flow) {
 List<FlowStep> steps = flow.getFlowSteps();
 totalNumberOfJobs = steps.size();
 currentFlowId = flow.getID();
 props.putAll(flow.getConfigAsProperties());
 try {
  statsWriteService.initWriteService(props);
 FlowStepGraph flowStepGraph = Flows.getStepGraphFrom(flow);
 DirectedGraph graph = new DefaultDirectedGraph<BaseFlowStep, FlowGraphEdge>(
  new EdgeFactory<BaseFlowStep, FlowGraphEdge>() {
 for (FlowStep v: flowStepGraph.vertexSet()) {
  graph.addVertex(v);
 for (ProcessEdge e: flowStepGraph.edgeSet()) {
  graph.addEdge(e.getSourceProcessID(), e.getSinkProcessID());

@Override
public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException {
  super.sinkPrepare(flowProcess, sinkCall);
  Object[] context = new Object[SINK_CTX_SIZE];
  // the tuple is fixed, so we can just use a collection/index
  Settings settings = loadSettings(flowProcess.getConfigCopy(), false);
  context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields());
  sinkCall.setContext(context);
}

/**
 * The onStarting event is fired when a Flow instance receives the start() message. A Flow is cut
 * down into executing units called stepFlow. A stepFlow contains a stepFlowJob which represents
 * the mapreduce job to be submitted to Hadoop. The ambrose graph is constructed from the step
 * graph found in flow object.
 *
 * @param flow the flow.
 */
@Override
@SuppressWarnings("unchecked")
public void onStarting(Flow flow) {
 // init flow
 List<FlowStep> steps = flow.getFlowSteps();
 totalNumberOfJobs = steps.size();
 currentFlowId = flow.getID();
 Properties props = new Properties();
 props.putAll(flow.getConfigAsProperties());
 try {
  statsWriteService.initWriteService(props);
 } catch (IOException e) {
  LOG.error("Failed to initialize statsWriteService", e);
 }
 // convert graph from cascading to ambrose
 AmbroseCascadingGraphConverter converter =
   new AmbroseCascadingGraphConverter(Flows.getStepGraphFrom(flow), nodesByName);
 converter.convert();
 AmbroseUtils.sendDagNodeNameMap(statsWriteService, currentFlowId, nodesByName);
}

public FlowSession getFlowSession()
 {
 return new FlowSession( getCascadingServices() );
 }

/**
 * Retrieves and updates ambrose node associated with the given flow step.
 *
 * @param step step with which to update ambrose node state.
 * @return node associated with step.
 */
private DAGNode<CascadingJob> updateNode(FlowStep step) {
 DAGNode<CascadingJob> node = getNode(step);
 CascadingJob job = node.getJob();
 HadoopStepStats stats = (HadoopStepStats) step.getFlowStepStats();
 job.setId(stats.getProcessStepID());
 job.setJobStats(stats);
 mapReduceHelper.addMapReduceJobState(job, stats.getJobClient());
 return node;
}

public TraceWriter( Flow flow )
 {
 if( flow == null )
  return;
 this.properties = flow.getConfigAsProperties();
 this.flowName = Flows.getNameOrID( flow );
 this.processLogger = (ProcessLogger) flow;
 }

@Override
public boolean hasStepListeners()
 {
 boolean hasStepListeners = false;
 for( FlowStep step : getFlowSteps() )
  hasStepListeners |= step.hasListeners();
 return hasStepListeners;
 }

@Override
public void addStepListener( FlowStepListener flowStepListener )
 {
 for( FlowStep step : getFlowSteps() )
  step.addListener( flowStepListener );
 }

@Override
public boolean removeStepListener( FlowStepListener flowStepListener )
 {
 boolean listenerRemoved = true;
 for( FlowStep step : getFlowSteps() )
  listenerRemoved &= step.removeListener( flowStepListener );
 return listenerRemoved;
 }

@Override
public void sourcePrepare(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException {
  super.sourcePrepare(flowProcess, sourceCall);
  Object[] context = new Object[SRC_CTX_SIZE];
  Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
  context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings);
  context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson();
  sourceCall.setContext(context);
}

/**
 * Retrieves the ambrose node associated with the given flow step.
 *
 * @param step step for which node should be retrieved.
 * @return node associated with step.
 */
private DAGNode<CascadingJob> getNode(FlowStep step) {
 String name = step.getName();
 DAGNode<CascadingJob> node = nodesByName.get(name);
 if (node == null) {
  throw new IllegalStateException(String.format("Node with name '%s' not found", name));
 }
 return node;
}

/**
 * Retrieves and updates ambrose node associated with the given flow step.
 *
 * @param step step with which to update ambrose node state.
 * @return node associated with step.
 */
private DAGNode<CascadingJob> updateNode(FlowStep step) {
 DAGNode<CascadingJob> node = getNode(step);
 CascadingJob job = node.getJob();
 HadoopStepStats stats = (HadoopStepStats) step.getFlowStepStats();
 job.setId(stats.getJobID());
 job.setJobStats(stats);
 mapReduceHelper.addMapReduceJobState(job, stats.getJobClient());
 return node;
}

@Override
public void sinkPrepare(FlowProcess<Properties> flowProcess, SinkCall<Object[], Object> sinkCall) throws IOException {
  super.sinkPrepare(flowProcess, sinkCall);
  Object[] context = new Object[SINK_CTX_SIZE];
  Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props);
  context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields());
  sinkCall.setContext(context);
}

/**
 * onStepProgressing event is fired whenever a job makes progress.
 *
 * @param step the step in the flow that represents the MapReduce job.
 */
@Override
public void onStepRunning(FlowStep step) {
 // first we report the scripts progress
 int progress = (int) ((((double) runningJobs) / totalNumberOfJobs) * 100);
 AmbroseUtils.pushWorkflowProgressEvent(statsWriteService, currentFlowId, progress);
 // only push job progress events for a completed step once
 if (completedStepNames.contains(step.getName())) {
  return;
 }
 try {
  // update node
  DAGNode<CascadingJob> node = updateNode(step);
  if (node.getJob().getMapReduceJobState() != null) {
   AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobProgressEvent(node));
   if (node.getJob().getMapReduceJobState().isComplete()) {
    completedStepNames.add(step.getName());
   }
  }
 } catch (Exception e) {
  LOG.error("Failed to handle onStepRunning event", e);
 }
}

@Override
public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException {
  super.sourcePrepare(flowProcess, sourceCall);
  Object[] context = new Object[SRC_CTX_SIZE];
  context[SRC_CTX_KEY] = sourceCall.getInput().createKey();
  context[SRC_CTX_VALUE] = sourceCall.getInput().createValue();
  // as the tuple _might_ vary (some objects might be missing), we use a map rather then a collection
  Settings settings = loadSettings(flowProcess.getConfigCopy(), true);
  context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings);
  context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson();
  sourceCall.setContext(context);
}

/**
 * onStepProgressing event is fired whenever a job makes progress.
 *
 * @param step the step in the flow that represents the MapReduce job.
 */
@Override
public void onStepRunning(FlowStep step) {
 // first we report the scripts progress
 int progress = (int) ((((double) runningJobs) / totalNumberOfJobs) * 100);
 AmbroseUtils.pushWorkflowProgressEvent(statsWriteService, currentFlowId, progress);
 // only push job progress events for a completed step once
 if (completedStepNames.contains(step.getName())) {
  return;
 }
 try {
  // update node
  DAGNode<CascadingJob> node = updateNode(step);
  if (node.getJob().getMapReduceJobState() != null) {
   AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobProgressEvent(node));
   if (node.getJob().getMapReduceJobState().isComplete()) {
    completedStepNames.add(step.getName());
   }
  }
 } catch (Exception e) {
  LOG.error("Failed to handle onStepRunning event", e);
 }
}

if (input == null) {
  Settings settings = CascadingUtils.addDefaultsToSettings(CascadingUtils.extractOriginalProperties(flowProcess.getConfigCopy()), tapProperties, log);

How to use cascading.flow

Best Java code snippets using cascading.flow (Showing top 20 results out of 342)