private Set<FlowElement> getSources(FlowNode node) { return node.getSourceElements(); }
/** * Current rule sets do not guarantee setting Streamed annotation, but do for Accumulated */ private Set<Tap> getStreamedTaps( FlowNode flowNode ) { Set<Tap> taps = new HashSet<>( flowNode.getSourceTaps() ); taps.remove( flowNode.getSourceElements( StreamMode.Accumulated ) ); return taps; }
/** * Current rule sets do not guarantee setting Streamed annotation, but do for Accumulated */ private Set<Tap> getStreamedTaps( FlowNode flowNode ) { Set<Tap> taps = new HashSet<>( flowNode.getSourceTaps() ); taps.remove( flowNode.getSourceElements( StreamMode.Accumulated ) ); return taps; }
@Override public int compare( FlowNode lhs, FlowNode rhs ) { // larger graph first int lhsSize = lhs.getElementGraph().vertexSet().size(); int rhsSize = rhs.getElementGraph().vertexSet().size(); int result = ( lhsSize < rhsSize ) ? -1 : ( ( lhsSize == rhsSize ) ? 0 : 1 ); if( result != 0 ) return result; // more inputs second lhsSize = lhs.getSourceElements().size(); rhsSize = rhs.getSourceElements().size(); return ( lhsSize < rhsSize ) ? -1 : ( ( lhsSize == rhsSize ) ? 0 : 1 ); } }
protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
protected int getParallelism( FlowNode flowNode, JobConf conf ) { // only count streamed taps, accumulated taps are always annotated HashSet<Tap> sourceStreamedTaps = new HashSet<>( flowNode.getSourceTaps() ); sourceStreamedTaps.removeAll( flowNode.getSourceElements( StreamMode.Accumulated ) ); if( sourceStreamedTaps.size() != 0 ) return -1; int parallelism = Integer.MAX_VALUE; for( Tap tap : flowNode.getSinkTaps() ) { int numSinkParts = tap.getScheme().getNumSinkParts(); if( numSinkParts == 0 ) continue; if( parallelism != Integer.MAX_VALUE ) LOG.info( "multiple sink taps in flow node declaring numSinkParts, choosing lowest value. see cascading.flow.FlowRuntimeProps for broader control." ); parallelism = Math.min( parallelism, numSinkParts ); } if( parallelism != Integer.MAX_VALUE ) return parallelism; return conf.getInt( FlowRuntimeProps.GATHER_PARTITIONS, 0 ); }
private DataSet<Tuple> translateHashJoin(List<DataSet<Tuple>> inputs, FlowNode node) { HashJoin hashJoin = (HashJoin) getCommonSuccessor(node.getSourceElements(), node); Joiner joiner = hashJoin.getJoiner();
Set<FlowElement> sources = flowNode.getSourceElements(); if(sources.size() != 1) { throw new RuntimeException("FlowNode for TapOutputFormat may only have a single source");
@Override public void open(Configuration config) { try { currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); if(sources.size() != 1) { throw new RuntimeException("FlowNode for EachMapper may only have a single source"); } FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof Boundary)) { throw new RuntimeException("Source of EachMapper must be a single Boundary"); } Boundary source = (Boundary)sourceElement; streamGraph = new EachStreamGraph( currentProcess, flowNode, source ); sourceStage = this.streamGraph.getSourceStage(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if( throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException( "internal error during EachMapper configuration", throwable ); } }
@Override public void open(Configuration config) { this.calledPrepare = false; try { currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); if(sources.size() != 1) { throw new RuntimeException("FlowNode for GroupByReducer may only have a single source"); } FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof GroupBy)) { throw new RuntimeException("Source of GroupByReducer must be a GroupBy"); } GroupBy source = (GroupBy)sourceElement; streamGraph = new GroupByStreamGraph( currentProcess, flowNode, source ); groupSource = this.streamGraph.getGroupSource(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if( throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException( "internal error during GroupByReducer configuration", throwable ); } }
@Override public void open(Configuration config) { this.calledPrepare = false; try { currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); if(sources.size() != 1) { throw new RuntimeException("FlowNode for CoGroupBufferReducer may only have a single CoGroup source"); } FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof CoGroup)) { throw new RuntimeException("Source of CoGroupBufferReducer must be a CoGroup"); } CoGroup source = (CoGroup)sourceElement; streamGraph = new CoGroupBufferReduceStreamGraph( currentProcess, flowNode, source ); groupSource = this.streamGraph.getGroupSource(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if (throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException("internal error during CoGroupBufferReducer configuration", throwable); } }
protected Map<FlowElement, Configuration> initFromSources( FlowNode flowNode, FlowProcess<TezConfiguration> flowProcess, Configuration conf, Map<String, LocalResource> taskLocalResources ) Set<? extends FlowElement> accumulatedSources = flowNode.getSourceElements( StreamMode.Accumulated ); Set<FlowElement> sources = new HashSet<>( flowNode.getSourceElements() );
@Override public void open(Configuration config) { this.calledPrepare = false; try { currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); if(sources.size() != 1) { throw new RuntimeException("FlowNode for CoGroupReducer may only have a single source"); } FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof CoGroup)) { throw new RuntimeException("Source of CoGroupReducer must be a CoGroup"); } CoGroup source = (CoGroup)sourceElement; streamGraph = new CoGroupReduceStreamGraph( currentProcess, flowNode, source ); groupSource = this.streamGraph.getGroupSource(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if (throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException("internal error during CoGroupReducer configuration", throwable); } }
protected void buildGraph() { Group group = (Group) Util.getFirst( node.getSourceElements() ); Duct rhsDuct; if( group.isGroupBy() ) rhsDuct = new HadoopGroupByGate( flowProcess, (GroupBy) group, IORole.source ); else rhsDuct = new HadoopCoGroupGate( flowProcess, (CoGroup) group, IORole.source ); addHead( rhsDuct ); handleDuct( group, rhsDuct ); }
@Override public void open(Configuration config) { try { currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); // pick one (arbitrary) source FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof Boundary)) { throw new RuntimeException("Source of HashJoinMapper must be a boundary"); } Boundary source = (Boundary)sourceElement; streamGraph = new HashJoinMapperStreamGraph( currentProcess, flowNode, source ); sourceStage = this.streamGraph.getSourceStage(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if( throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException( "internal error during HashJoinMapper configuration", throwable ); } }
@Override public void open(Configuration config) { try { joinedTuples = new Tuple[numJoinInputs]; currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); // pick one (arbitrary) source FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof Boundary)) { throw new RuntimeException("Source of NaryHashJoinJoiner must be a boundary"); } Boundary source = (Boundary)sourceElement; streamGraph = new HashJoinStreamGraph( currentProcess, flowNode, source ); sourceStage = this.streamGraph.getSourceStage(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if( throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException( "internal error during NaryHashJoinJoiner configuration", throwable ); } this.prepareCalled = false; }
protected void buildGraph() { Group group = (Group) Util.getFirst( node.getSourceElements() ); Duct rhsDuct; if( group.isGroupBy() ) rhsDuct = new HadoopGroupByGate( flowProcess, (GroupBy) group, IORole.source ); else rhsDuct = new HadoopCoGroupGate( flowProcess, (CoGroup) group, IORole.source ); addHead( rhsDuct ); handleDuct( group, rhsDuct ); }
streamGraph = new HadoopReduceStreamGraph( currentProcess, flowNode, Util.getFirst( flowNode.getSourceElements() ) );
streamGraph = new HadoopReduceStreamGraph( currentProcess, flowNode, Util.getFirst( flowNode.getSourceElements() ) );
@Override public void open(Configuration config) { try { this.joinedTuples = new Tuple[2]; this.joinInput = new Tuple2<>(new Tuple(), this.joinedTuples); currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID()); Set<FlowElement> sources = flowNode.getSourceElements(); // pick one (arbitrary) source FlowElement sourceElement = sources.iterator().next(); if(!(sourceElement instanceof Boundary)) { throw new RuntimeException("Source of BinaryHashJoinJoiner must be a boundary"); } Boundary source = (Boundary)sourceElement; streamGraph = new HashJoinStreamGraph( currentProcess, flowNode, source ); sourceStage = this.streamGraph.getSourceStage(); for( Duct head : streamGraph.getHeads() ) { LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement()); } for( Duct tail : streamGraph.getTails() ) { LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement()); } } catch( Throwable throwable ) { if( throwable instanceof CascadingException) { throw (CascadingException) throwable; } throw new FlowException( "internal error during BinaryHashJoinJoiner configuration", throwable ); } this.prepareCalled = false; }