@Override public void finalizeGlobal(int parallelism) throws IOException { org.apache.hadoop.conf.Configuration config = HadoopUtil.copyConfiguration(this.config); Tap tap = this.flowNode.getSinkTaps().iterator().next(); config.setBoolean(HadoopUtil.CASCADING_FLOW_EXECUTING, false); HadoopUtil.setOutputPath(config, new Path(tap.getIdentifier())); Hadoop18TapUtil.cleanupJob( config ); } }
protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
private void translateSink(FlowProcess flowProcess, DataSet<Tuple> input, FlowNode node) { Tap tap = this.getSingle(node.getSinkTaps()); Configuration sinkConfig = this.getNodeConfig(node); tap.sinkConfInit(flowProcess, sinkConfig);