/** * If the sinks have the same scheme as a temp tap, replace the temp tap * * @throws Exception */ @Test public void testChainedTaps() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Pipe pipe = new Each( new Pipe( "first" ), new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Each( new Pipe( "second", pipe ), new Fields( "ip" ), new RegexFilter( "7" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Each( new Pipe( "third", pipe ), new Fields( "ip" ), new RegexFilter( "6" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); Tap sinkFirst = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( "chainedtaps/first" ), SinkMode.REPLACE ); Tap sinkSecond = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( "chainedtaps/second" ), SinkMode.REPLACE ); Tap sinkThird = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( "chainedtaps/third" ), SinkMode.REPLACE ); Map<String, Tap> sinks = Cascades.tapsMap( new String[]{"first", "second", "third"}, Tap.taps( sinkFirst, sinkSecond, sinkThird ) ); FlowConnector flowConnector = getPlatform().getFlowConnector(); Flow flow = flowConnector.connect( source, sinks, pipe ); if( getPlatform().isMapReduce() ) assertEquals( "wrong number of steps", 3, flow.getFlowSteps().size() ); flow.complete(); validateLength( flow, 3 ); } }
/** * If the sinks have the same scheme as a temp tap, replace the temp tap * * @throws Exception */ @Test public void testChainedTaps() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Pipe pipe = new Each( new Pipe( "first" ), new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Each( new Pipe( "second", pipe ), new Fields( "ip" ), new RegexFilter( "7" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Each( new Pipe( "third", pipe ), new Fields( "ip" ), new RegexFilter( "6" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); Tap sinkFirst = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( "chainedtaps/first" ), SinkMode.REPLACE ); Tap sinkSecond = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( "chainedtaps/second" ), SinkMode.REPLACE ); Tap sinkThird = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( "chainedtaps/third" ), SinkMode.REPLACE ); Map<String, Tap> sinks = Cascades.tapsMap( new String[]{"first", "second", "third"}, Tap.taps( sinkFirst, sinkSecond, sinkThird ) ); FlowConnector flowConnector = getPlatform().getFlowConnector(); Flow flow = flowConnector.connect( source, sinks, pipe ); if( getPlatform().isMapReduce() ) assertEquals( "wrong number of steps", 3, flow.getFlowSteps().size() ); flow.complete(); validateLength( flow, 3 ); } }