private void report(Stats stats, FlowProcess<Properties> flowProcess) { // report current stats for (Counter count : Counter.ALL) { flowProcess.increment(count, count.get(stats)); } }
@Override public void flowConfInit(Flow<JobConf> flow) { CascadingUtils.addSerializationToken(flow.getConfig()); }
/** * Retrieves the ambrose node associated with the given flow step. * * @param step step for which node should be retrieved. * @return node associated with step. */ private DAGNode<CascadingJob> getNode(FlowStep step) { String name = step.getName(); DAGNode<CascadingJob> node = nodesByName.get(name); if (node == null) { throw new IllegalStateException(String.format("Node with name '%s' not found", name)); } return node; }
public void onStarting(Flow flow) { List<FlowStep> steps = flow.getFlowSteps(); totalNumberOfJobs = steps.size(); currentFlowId = flow.getID(); props.putAll(flow.getConfigAsProperties()); try { statsWriteService.initWriteService(props); FlowStepGraph flowStepGraph = Flows.getStepGraphFrom(flow); DirectedGraph graph = new DefaultDirectedGraph<BaseFlowStep, FlowGraphEdge>( new EdgeFactory<BaseFlowStep, FlowGraphEdge>() { for (FlowStep v: flowStepGraph.vertexSet()) { graph.addVertex(v); for (ProcessEdge e: flowStepGraph.edgeSet()) { graph.addEdge(e.getSourceProcessID(), e.getSinkProcessID());
@Override public void sinkPrepare(FlowProcess<JobConf> flowProcess, SinkCall<Object[], OutputCollector> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; // the tuple is fixed, so we can just use a collection/index Settings settings = loadSettings(flowProcess.getConfigCopy(), false); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
/** * The onStarting event is fired when a Flow instance receives the start() message. A Flow is cut * down into executing units called stepFlow. A stepFlow contains a stepFlowJob which represents * the mapreduce job to be submitted to Hadoop. The ambrose graph is constructed from the step * graph found in flow object. * * @param flow the flow. */ @Override @SuppressWarnings("unchecked") public void onStarting(Flow flow) { // init flow List<FlowStep> steps = flow.getFlowSteps(); totalNumberOfJobs = steps.size(); currentFlowId = flow.getID(); Properties props = new Properties(); props.putAll(flow.getConfigAsProperties()); try { statsWriteService.initWriteService(props); } catch (IOException e) { LOG.error("Failed to initialize statsWriteService", e); } // convert graph from cascading to ambrose AmbroseCascadingGraphConverter converter = new AmbroseCascadingGraphConverter(Flows.getStepGraphFrom(flow), nodesByName); converter.convert(); AmbroseUtils.sendDagNodeNameMap(statsWriteService, currentFlowId, nodesByName); }
public FlowSession getFlowSession() { return new FlowSession( getCascadingServices() ); }
/** * Retrieves and updates ambrose node associated with the given flow step. * * @param step step with which to update ambrose node state. * @return node associated with step. */ private DAGNode<CascadingJob> updateNode(FlowStep step) { DAGNode<CascadingJob> node = getNode(step); CascadingJob job = node.getJob(); HadoopStepStats stats = (HadoopStepStats) step.getFlowStepStats(); job.setId(stats.getProcessStepID()); job.setJobStats(stats); mapReduceHelper.addMapReduceJobState(job, stats.getJobClient()); return node; }
public TraceWriter( Flow flow ) { if( flow == null ) return; this.properties = flow.getConfigAsProperties(); this.flowName = Flows.getNameOrID( flow ); this.processLogger = (ProcessLogger) flow; }
@Override public boolean hasStepListeners() { boolean hasStepListeners = false; for( FlowStep step : getFlowSteps() ) hasStepListeners |= step.hasListeners(); return hasStepListeners; }
@Override public void addStepListener( FlowStepListener flowStepListener ) { for( FlowStep step : getFlowSteps() ) step.addListener( flowStepListener ); }
@Override public boolean removeStepListener( FlowStepListener flowStepListener ) { boolean listenerRemoved = true; for( FlowStep step : getFlowSteps() ) listenerRemoved &= step.removeListener( flowStepListener ); return listenerRemoved; }
@Override public void sourcePrepare(FlowProcess<Properties> flowProcess, SourceCall<Object[], ScrollQuery> sourceCall) throws IOException { super.sourcePrepare(flowProcess, sourceCall); Object[] context = new Object[SRC_CTX_SIZE]; Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props); context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings); context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson(); sourceCall.setContext(context); }
/** * Retrieves the ambrose node associated with the given flow step. * * @param step step for which node should be retrieved. * @return node associated with step. */ private DAGNode<CascadingJob> getNode(FlowStep step) { String name = step.getName(); DAGNode<CascadingJob> node = nodesByName.get(name); if (node == null) { throw new IllegalStateException(String.format("Node with name '%s' not found", name)); } return node; }
/** * Retrieves and updates ambrose node associated with the given flow step. * * @param step step with which to update ambrose node state. * @return node associated with step. */ private DAGNode<CascadingJob> updateNode(FlowStep step) { DAGNode<CascadingJob> node = getNode(step); CascadingJob job = node.getJob(); HadoopStepStats stats = (HadoopStepStats) step.getFlowStepStats(); job.setId(stats.getJobID()); job.setJobStats(stats); mapReduceHelper.addMapReduceJobState(job, stats.getJobClient()); return node; }
@Override public void sinkPrepare(FlowProcess<Properties> flowProcess, SinkCall<Object[], Object> sinkCall) throws IOException { super.sinkPrepare(flowProcess, sinkCall); Object[] context = new Object[SINK_CTX_SIZE]; Settings settings = HadoopSettingsManager.loadFrom(flowProcess.getConfigCopy()).merge(props); context[SINK_CTX_ALIASES] = CascadingUtils.fieldToAlias(settings, getSinkFields()); sinkCall.setContext(context); }
/** * onStepProgressing event is fired whenever a job makes progress. * * @param step the step in the flow that represents the MapReduce job. */ @Override public void onStepRunning(FlowStep step) { // first we report the scripts progress int progress = (int) ((((double) runningJobs) / totalNumberOfJobs) * 100); AmbroseUtils.pushWorkflowProgressEvent(statsWriteService, currentFlowId, progress); // only push job progress events for a completed step once if (completedStepNames.contains(step.getName())) { return; } try { // update node DAGNode<CascadingJob> node = updateNode(step); if (node.getJob().getMapReduceJobState() != null) { AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobProgressEvent(node)); if (node.getJob().getMapReduceJobState().isComplete()) { completedStepNames.add(step.getName()); } } } catch (Exception e) { LOG.error("Failed to handle onStepRunning event", e); } }
@Override public void sourcePrepare(FlowProcess<JobConf> flowProcess, SourceCall<Object[], RecordReader> sourceCall) throws IOException { super.sourcePrepare(flowProcess, sourceCall); Object[] context = new Object[SRC_CTX_SIZE]; context[SRC_CTX_KEY] = sourceCall.getInput().createKey(); context[SRC_CTX_VALUE] = sourceCall.getInput().createValue(); // as the tuple _might_ vary (some objects might be missing), we use a map rather then a collection Settings settings = loadSettings(flowProcess.getConfigCopy(), true); context[SRC_CTX_ALIASES] = CascadingUtils.alias(settings); context[SRC_CTX_OUTPUT_JSON] = settings.getOutputAsJson(); sourceCall.setContext(context); }
/** * onStepProgressing event is fired whenever a job makes progress. * * @param step the step in the flow that represents the MapReduce job. */ @Override public void onStepRunning(FlowStep step) { // first we report the scripts progress int progress = (int) ((((double) runningJobs) / totalNumberOfJobs) * 100); AmbroseUtils.pushWorkflowProgressEvent(statsWriteService, currentFlowId, progress); // only push job progress events for a completed step once if (completedStepNames.contains(step.getName())) { return; } try { // update node DAGNode<CascadingJob> node = updateNode(step); if (node.getJob().getMapReduceJobState() != null) { AmbroseUtils.pushEvent(statsWriteService, currentFlowId, new Event.JobProgressEvent(node)); if (node.getJob().getMapReduceJobState().isComplete()) { completedStepNames.add(step.getName()); } } } catch (Exception e) { LOG.error("Failed to handle onStepRunning event", e); } }
if (input == null) { Settings settings = CascadingUtils.addDefaultsToSettings(CascadingUtils.extractOriginalProperties(flowProcess.getConfigCopy()), tapProperties, log);