private void performLimitTest( int limit, int tasks, int values ) { Filter filter = new Limit( limit ); int count = 0; for( int i = 0; i < tasks; i++ ) { FlowProcess process = new TestFlowProcess( tasks, i ); filter.prepare( process, operationCall ); operationCall.setArguments( getEntry( new Tuple( 1 ) ) ); for( int j = 0; j < values; j++ ) { if( !filter.isRemove( process, operationCall ) ) count++; } } String message = String.format( "limit:%d tasks:%d values:%d", limit, tasks, values ); assertEquals( message, Math.min( limit, values * tasks ), count ); } }
@Test public void testCoGroup() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "cogroup" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); pipeLower = new Each( pipeLower, new Fields( "num", "char" ), new Stop( new Limit( 2 ) ) ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); pipeUpper = new Each( pipeUpper, new Fields( "num", "char" ), new Stop( new Limit( 2 ) ) ); Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new InnerJoin( Fields.size( 4 ) ) ); splice = new Each( splice, Fields.ALL, new Stop( new Limit( 2 ) ) ); Map<Object, Object> properties = getProperties(); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 2 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); assertEquals( 2, flow.getFlowStats().getCounterValue( StepCounters.Tuples_Written ) ); assertEquals( 6, flow.getFlowStats().getCounterValue( StepCounters.Tuples_Read ) ); } }
@Test public void testSimple() throws Exception { getPlatform().copyFromLocal( inputFileApache200 ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache200 ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new Each( pipe, new Fields( "ip" ), new Stop( new Limit( 100 ) ) ); Tap sink = getPlatform().getTextFile( getOutputPath(), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow.openSink(), 100 ); assertEquals( 100, flow.getFlowStats().getCounterValue( StepCounters.Tuples_Written ) ); assertEquals( 101, flow.getFlowStats().getCounterValue( StepCounters.Tuples_Read ) ); }
@Test public void testSimpleGroup() throws Exception { getPlatform().copyFromLocal( inputFileApache200 ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache200 ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count(), new Fields( "ip", "count" ) ); pipe = new Each( pipe, new Fields( "ip" ), new Stop( new Limit( 100 ) ) ); Tap sink = getPlatform().getTextFile( getOutputPath(), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow.openSink(), 100 ); assertEquals( 100, flow.getFlowStats().getCounterValue( StepCounters.Tuples_Written ) ); assertEquals( 200, flow.getFlowStats().getCounterValue( StepCounters.Tuples_Read ) ); }