protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
@Override public String getID() { return flowNode.getID(); }
private void addRemoteDebug( FlowNode flowNode, Vertex vertex ) { String value = System.getProperty( "test.debug.node", null ); if( Util.isEmpty( value ) ) return; if( !flowNode.getSourceElementNames().contains( value ) && asInt( value ) != flowNode.getOrdinal() ) return; LOG.warn( "remote debugging enabled with property: {}, on node: {}, with node id: {}", "test.debug.node", value, flowNode.getID() ); String opts = vertex.getTaskLaunchCmdOpts(); if( opts == null ) opts = ""; String address = System.getProperty( "test.debug.address", "localhost:5005" ).trim(); opts += " -agentlib:jdwp=transport=dt_socket,server=n,address=" + address + ",suspend=y"; vertex.setTaskLaunchCmdOpts( opts ); }
public NodeStreamGraph( FlowProcess flowProcess, FlowNode node, FlowElement streamedSource ) { this.flowProcess = flowProcess; this.node = node; this.elementGraph = streamedSource == null ? node.getElementGraph() : node.getPipelineGraphFor( streamedSource ); this.streamedSource = streamedSource; }
/** * Current rule sets do not guarantee setting Streamed annotation, but do for Accumulated */ private Set<Tap> getStreamedTaps( FlowNode flowNode ) { Set<Tap> taps = new HashSet<>( flowNode.getSourceTaps() ); taps.remove( flowNode.getSourceElements( StreamMode.Accumulated ) ); return taps; }
private Scope getOutScope(FlowNode node) { Set<FlowElement> nodeSinks = node.getSinkElements(); if(nodeSinks.size() != 1) { throw new RuntimeException("Only nodes with one output supported right now"); } FlowElement sink = nodeSinks.iterator().next(); Collection<Scope> outScopes = (Collection<Scope>) node.getPreviousScopes(sink); if(outScopes.size() != 1) { throw new RuntimeException("Only one incoming scope for last node of mapper allowed"); } return outScopes.iterator().next(); }
BigInteger numId = new BigInteger(flowNode.getID(), 16); String hadoopTaskId = String.format( "attempt_%012d_0000_%s_%06d_0", numId.longValue(), "m", taskNumber ); flowProcess = new FlinkFlowProcess(this.config, this.getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); if(sources.size() != 1) { throw new RuntimeException("FlowNode for TapOutputFormat may only have a single source");
LOG.info( "flow node id: {}, ordinal: {}", flowNode.getID(), flowNode.getOrdinal() ); streamGraph = new HadoopReduceStreamGraph( currentProcess, flowNode, Util.getFirst( flowNode.getSourceElements() ) ); LOG.info( "sinking to: " + ( (ElementDuct) tail ).getFlowElement() ); for( Tap trap : flowNode.getTraps() ) LOG.info( "trapping to: " + trap ); logMemory( LOG, "flow node id: " + flowNode.getID() + ", mem on start" );
LOG.info( "flow node id: {}, ordinal: {}", flowNode.getID(), flowNode.getOrdinal() ); Tap source = Flows.getTapForID( flowNode.getSourceTaps(), jobConf.get( "cascading.step.source" ) ); LOG.info( "sinking to: " + ( (ElementDuct) tail ).getFlowElement() ); for( Tap trap : flowNode.getTraps() ) LOG.info( "trapping to: " + trap ); logMemory( LOG, "flow node id: " + flowNode.getID() + ", mem on start" );
private Set<FlowElement> getSources(FlowNode node) { return node.getSourceElements(); }
@Override public void initialize() throws Exception { configuration = new TezConfiguration( TezUtils.createConfFromUserPayload( getContext().getUserPayload() ) ); TezUtil.setMRProperties( getContext(), configuration, true ); try { HadoopUtil.initLog4j( configuration ); LOG.info( "cascading version: {}", configuration.get( "cascading.version", "" ) ); currentProcess = new Hadoop2TezFlowProcess( new FlowSession(), getContext(), configuration ); flowNode = deserializeBase64( configuration.getRaw( FlowNode.CASCADING_FLOW_NODE ), configuration, BaseFlowNode.class ); LOG.info( "flow node id: {}, ordinal: {}", flowNode.getID(), flowNode.getOrdinal() ); logMemory( LOG, "flow node id: " + flowNode.getID() + ", mem on start" ); } catch( Throwable throwable ) { if( throwable instanceof CascadingException ) throw (CascadingException) throwable; throw new FlowException( "internal error during processor configuration", throwable ); } }
@Override public int compare( FlowNode lhs, FlowNode rhs ) { // larger graph first int lhsSize = lhs.getElementGraph().vertexSet().size(); int rhsSize = rhs.getElementGraph().vertexSet().size(); int result = ( lhsSize < rhsSize ) ? -1 : ( ( lhsSize == rhsSize ) ? 0 : 1 ); if( result != 0 ) return result; // more inputs second lhsSize = lhs.getSourceElements().size(); rhsSize = rhs.getSourceElements().size(); return ( lhsSize < rhsSize ) ? -1 : ( ( lhsSize == rhsSize ) ? 0 : 1 ); } }
public NodeStreamGraph( FlowProcess flowProcess, FlowNode node ) { this.flowProcess = flowProcess; this.node = node; this.elementGraph = node.getElementGraph(); }
public Hadoop2TezStreamGraph( Hadoop2TezFlowProcess currentProcess, FlowNode flowNode, Map<String, LogicalInput> inputMap, Map<String, LogicalOutput> outputMap ) { super( currentProcess, flowNode ); this.inputMap = inputMap; this.outputMap = outputMap; buildGraph(); setTraps(); setScopes(); printGraph( node.getID(), node.getName(), flowProcess.getCurrentSliceNum() ); bind(); printBoundGraph( node.getID(), node.getName(), flowProcess.getCurrentSliceNum() ); }
conf.set( "cascading.flow.node.num", Integer.toString( flowNode.getOrdinal() ) ); throw new FlowException( getName(), "the default number of gather partitions must be set, see cascading.flow.FlowRuntimeProps" ); flowNode.addProcessAnnotation( FlowRuntimeProps.GATHER_PARTITIONS, Integer.toString( parallelism ) ); flowNode.addProcessAnnotation( TezConfiguration.TEZ_TASK_LAUNCH_CMD_OPTS, vertex.getTaskLaunchCmdOpts() );
LOG.info( "sinking to: {}, id: {}", ( (ElementDuct) tail ).getFlowElement(), FlowElements.id( ( (ElementDuct) tail ).getFlowElement() ) ); for( Tap trap : flowNode.getTraps() ) LOG.info( "trapping to: {}, id: {}", trap, FlowElements.id( trap ) ); throw (CascadingException) throwable; throw new FlowException( "internal error during processor execution on node: " + flowNode.getOrdinal(), throwable );
ElementGraph nodeGraph = flowNode.getElementGraph(); int nodeOrdinal = flowNode.getOrdinal(); String nodeGraphName = String.format( "%s/%04d-%04d-step-node-sub-graph-%s.dot", rootPath, stepOrdinal, nodeOrdinal, canonicalHash( nodeGraph ) ); List<? extends ElementGraph> pipelineGraphs = flowNode.getPipelineGraphs();
private Configuration getNodeConfig(FlowNode node) { Configuration nodeConfig = HadoopUtil.copyConfiguration(this.getConfig()); ConfigurationSetter configSetter = new ConfigurationSetter(nodeConfig); this.initConfFromNodeConfigDef(node.getElementGraph(), configSetter); this.initConfFromStepConfigDef(configSetter); nodeConfig.set("cascading.flow.node.num", Integer.toString(node.getOrdinal())); return nodeConfig; }
protected void buildGraph() { for( Object rhsElement : node.getSourceTaps() ) { Duct rhsDuct = new SourceStage( tapFlowProcess( (Tap) rhsElement ), (Tap) rhsElement ); addHead( rhsDuct ); handleDuct( (FlowElement) rhsElement, rhsDuct ); } }
public int getOrdinal() { return flowNode.getOrdinal(); }