/** * Current rule sets do not guarantee setting Streamed annotation, but do for Accumulated */ private Set<Tap> getStreamedTaps( FlowNode flowNode ) { Set<Tap> taps = new HashSet<>( flowNode.getSourceTaps() ); taps.remove( flowNode.getSourceElements( StreamMode.Accumulated ) ); return taps; }
/** * Current rule sets do not guarantee setting Streamed annotation, but do for Accumulated */ private Set<Tap> getStreamedTaps( FlowNode flowNode ) { Set<Tap> taps = new HashSet<>( flowNode.getSourceTaps() ); taps.remove( flowNode.getSourceElements( StreamMode.Accumulated ) ); return taps; }
protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
protected void buildGraph() { for( Object rhsElement : node.getSourceTaps() ) { Duct rhsDuct = new SourceStage( tapFlowProcess( (Tap) rhsElement ), (Tap) rhsElement ); addHead( rhsDuct ); handleDuct( (FlowElement) rhsElement, rhsDuct ); } }
Tap source = Flows.getTapForID( flowNode.getSourceTaps(), jobConf.get( "cascading.step.source" ) );
Tap source = Flows.getTapForID( flowNode.getSourceTaps(), jobConf.get( "cascading.step.source" ) );
private DataSet<Tuple> translateSource(FlowProcess flowProcess, ExecutionEnvironment env, FlowNode node, int dop) { Tap tap = this.getSingle(node.getSourceTaps()); JobConf tapConfig = new JobConf(this.getNodeConfig(node)); tap.sourceConfInit(flowProcess, tapConfig); tapConfig.set( "cascading.step.source", Tap.id( tap ) ); Fields outFields = tap.getSourceFields(); registerKryoTypes(outFields); JobConf sourceConfig = new JobConf(this.getNodeConfig(node)); MultiInputFormat.addInputFormat(sourceConfig, tapConfig); DataSet<Tuple> src = env .createInput(new TapInputFormat(node), new TupleTypeInfo(outFields)) .name(tap.getIdentifier()) .setParallelism(dop) .withParameters(FlinkConfigConverter.toFlinkConfig(new Configuration(sourceConfig))); return src; }