@Test public void testNoParamExpression() { Fields fields = new Fields( "a", "b" ).applyTypes( String.class, double.class ); String expression = "(int) (Math.random() * Integer.MAX_VALUE)"; Number integer = (Number) evaluate( new ExpressionFunction( new Fields( "result" ), expression ), getEntry( fields, "1", 2.0 ) ); assertNotNull( integer ); // Fields.NONE as argument selector integer = (Number) evaluate( new ExpressionFunction( new Fields( "result" ), expression ), TupleEntry.NULL ); assertNotNull( integer ); try { evaluate( new ExpressionFunction( new Fields( "result" ), "(int) (Math.random() * Integer.MAX_VALUE) + parameter" ), getEntry( fields, "1", 2.0 ) ); fail( "should throw exception" ); } catch( Exception exception ) { // ignore } }
@Test public void testNoParamExpression() { Fields fields = new Fields( "a", "b" ).applyTypes( String.class, double.class ); String expression = "(int) (Math.random() * Integer.MAX_VALUE)"; Number integer = (Number) evaluate( new ExpressionFunction( new Fields( "result" ), expression ), getEntry( fields, "1", 2.0 ) ); assertNotNull( integer ); // Fields.NONE as argument selector integer = (Number) evaluate( new ExpressionFunction( new Fields( "result" ), expression ), TupleEntry.NULL ); assertNotNull( integer ); try { evaluate( new ExpressionFunction( new Fields( "result" ), "(int) (Math.random() * Integer.MAX_VALUE) + parameter" ), getEntry( fields, "1", 2.0 ) ); fail( "should throw exception" ); } catch( Exception exception ) { // ignore } }
/** * This test checks for a deadlock when the same input is forked, adapted on one edge, then hashjoined back together. * * @throws Exception */ @Test public void testForkThenJoin() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "text" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper", pipeLower ), new Fields( "text" ), new ExpressionFunction( Fields.ARGS, "text.toUpperCase(java.util.Locale.ROOT)", String.class ), Fields.REPLACE ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
/** * This test checks for a deadlock when the same input is forked, adapted on one edge, then hashjoined back together. * * @throws Exception */ @Test public void testForkThenJoin() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "text" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper", pipeLower ), new Fields( "text" ), new ExpressionFunction( Fields.ARGS, "text.toUpperCase(java.util.Locale.ROOT)", String.class ), Fields.REPLACE ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
pipeLower = new Each( pipeLower, new Fields( "num" ), new ExpressionFunction( Fields.ARGS, "Integer.parseInt( num )", String.class ), Fields.REPLACE );
pipeLower = new Each( pipeLower, new Fields( "num" ), new ExpressionFunction( Fields.ARGS, "Integer.parseInt( num )", String.class ), Fields.REPLACE );
@Test public void testWritable() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Pipe pipe = new Pipe( "keyvalue" ); pipe = new Each( pipe, new Fields( "offset" ), new ExpressionFunction( Fields.ARGS, "new org.apache.hadoop.io.LongWritable($0)", long.class ), Fields.REPLACE ); pipe = new Each( pipe, new Fields( "line" ), new ExpressionFunction( Fields.ARGS, "new org.apache.hadoop.io.Text($0)", String.class ), Fields.REPLACE ); Tap tapKeyValue = new Hfs( new WritableSequenceFile( new Fields( "offset", "line" ), LongWritable.class, Text.class ), getOutputPath( "keyvalue" ), SinkMode.REPLACE ); Tap tapKey = new Hfs( new WritableSequenceFile( new Fields( "offset" ), LongWritable.class, null ), getOutputPath( "key" ), SinkMode.REPLACE ); Tap tapValue = new Hfs( new WritableSequenceFile( new Fields( "line" ), Text.class ), getOutputPath( "value" ), SinkMode.REPLACE ); Flow flowKeyValue = getPlatform().getFlowConnector( getProperties() ).connect( source, tapKeyValue, pipe ); Flow flowKey = getPlatform().getFlowConnector( getProperties() ).connect( tapKeyValue, tapKey, new Pipe( "key" ) ); Flow flowValue = getPlatform().getFlowConnector( getProperties() ).connect( tapKeyValue, tapValue, new Pipe( "value" ) ); Cascade cascade = new CascadeConnector( getProperties() ).connect( "keyvalues", flowKeyValue, flowKey, flowValue ); cascade.complete(); validateLength( flowKeyValue, 10, 2 ); validateLength( flowKey, 10, 1 ); validateLength( flowValue, 10, 1 ); } }
@Test public void testWritable() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = new Hfs( new TextLine( new Fields( "offset", "line" ) ), inputFileApache ); Pipe pipe = new Pipe( "keyvalue" ); pipe = new Each( pipe, new Fields( "offset" ), new ExpressionFunction( Fields.ARGS, "new org.apache.hadoop.io.LongWritable($0)", long.class ), Fields.REPLACE ); pipe = new Each( pipe, new Fields( "line" ), new ExpressionFunction( Fields.ARGS, "new org.apache.hadoop.io.Text($0)", String.class ), Fields.REPLACE ); Tap tapKeyValue = new Hfs( new WritableSequenceFile( new Fields( "offset", "line" ), LongWritable.class, Text.class ), getOutputPath( "keyvalue" ), SinkMode.REPLACE ); Tap tapKey = new Hfs( new WritableSequenceFile( new Fields( "offset" ), LongWritable.class, null ), getOutputPath( "key" ), SinkMode.REPLACE ); Tap tapValue = new Hfs( new WritableSequenceFile( new Fields( "line" ), Text.class ), getOutputPath( "value" ), SinkMode.REPLACE ); Flow flowKeyValue = getPlatform().getFlowConnector( getProperties() ).connect( source, tapKeyValue, pipe ); Flow flowKey = getPlatform().getFlowConnector( getProperties() ).connect( tapKeyValue, tapKey, new Pipe( "key" ) ); Flow flowValue = getPlatform().getFlowConnector( getProperties() ).connect( tapKeyValue, tapValue, new Pipe( "value" ) ); Cascade cascade = new CascadeConnector( getProperties() ).connect( "keyvalues", flowKeyValue, flowKey, flowValue ); cascade.complete(); validateLength( flowKeyValue, 10, 2 ); validateLength( flowKey, 10, 1 ); validateLength( flowValue, 10, 1 ); } }
@Test public void testChainEndingWithEach() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count( new Fields( "count1" ) ) ); pipe = new Every( pipe, new Count( new Fields( "count2" ) ) ); pipe = new Each( pipe, new Fields( "count1", "count2" ), new ExpressionFunction( new Fields( "sum" ), "count1 + count2", int.class ), Fields.ALL ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Tap sink = getPlatform().getTextFile( getOutputPath( "chaineach" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 8, null ); }
@Test public void testSumMerge() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Tap lhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap rhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileRhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "sum" ), "\t", new Class[]{String.class, Integer.TYPE}, getOutputPath( "mergesum" ), SinkMode.REPLACE ); Pipe lhsPipe = new Pipe( "sum-lhs" ); Pipe rhsPipe = new Pipe( "sum-rhs" ); rhsPipe = new Each( rhsPipe, new Fields( "char" ), new ExpressionFunction( Fields.ARGS, "$0.toLowerCase()", String.class ), Fields.REPLACE ); Pipe sumPipe = new SumBy( Pipe.pipes( lhsPipe, rhsPipe ), new Fields( "char" ), new Fields( "num" ), new Fields( "sum" ), long.class, 2 ); Map<String, Tap> tapMap = Cascades.tapsMap( Pipe.pipes( lhsPipe, rhsPipe ), Tap.taps( lhs, rhs ) ); Flow flow = getPlatform().getFlowConnector().connect( tapMap, sink, sumPipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s\\d+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", 12 ), new Tuple( "b", 24 ), new Tuple( "c", 20 ), new Tuple( "d", 12 ), new Tuple( "e", 10 ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testCountMerge() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Tap lhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap rhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileRhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "count" ), "\t", new Class[]{String.class, Integer.TYPE}, getOutputPath( "mergecount" ), SinkMode.REPLACE ); Pipe lhsPipe = new Pipe( "count-lhs" ); Pipe rhsPipe = new Pipe( "count-rhs" ); rhsPipe = new Each( rhsPipe, new Fields( "char" ), new ExpressionFunction( Fields.ARGS, "$0.toLowerCase()", String.class ), Fields.REPLACE ); Pipe countPipe = new CountBy( Pipe.pipes( lhsPipe, rhsPipe ), new Fields( "char" ), new Fields( "count" ), 2 ); Map<String, Tap> tapMap = Cascades.tapsMap( Pipe.pipes( lhsPipe, rhsPipe ), Tap.taps( lhs, rhs ) ); Flow flow = getPlatform().getFlowConnector().connect( tapMap, sink, countPipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s\\d+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", 4 ), new Tuple( "b", 8 ), new Tuple( "c", 8 ), new Tuple( "d", 4 ), new Tuple( "e", 2 ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testChainEndingWithEach() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Count( new Fields( "count1" ) ) ); pipe = new Every( pipe, new Count( new Fields( "count2" ) ) ); pipe = new Each( pipe, new Fields( "count1", "count2" ), new ExpressionFunction( new Fields( "sum" ), "count1 + count2", int.class ), Fields.ALL ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Tap sink = getPlatform().getTextFile( getOutputPath( "chaineach" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 8, null ); }
@Test public void testAverageMerge() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Tap lhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap rhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileRhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "average" ), "\t", new Class[]{String.class, Double.TYPE}, getOutputPath( "mergeaverage" ), SinkMode.REPLACE ); Pipe lhsPipe = new Pipe( "average-lhs" ); Pipe rhsPipe = new Pipe( "average-rhs" ); rhsPipe = new Each( rhsPipe, new Fields( "char" ), new ExpressionFunction( Fields.ARGS, "$0.toLowerCase()", String.class ), Fields.REPLACE ); Pipe sumPipe = new AverageBy( Pipe.pipes( lhsPipe, rhsPipe ), new Fields( "char" ), new Fields( "num" ), new Fields( "average" ), 2 ); Map<String, Tap> tapMap = Cascades.tapsMap( Pipe.pipes( lhsPipe, rhsPipe ), Tap.taps( lhs, rhs ) ); Flow flow = getPlatform().getFlowConnector().connect( tapMap, sink, sumPipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s[\\d.]+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", (double) 12 / 4 ), new Tuple( "b", (double) 24 / 8 ), new Tuple( "c", (double) 20 / 8 ), new Tuple( "d", (double) 12 / 4 ), new Tuple( "e", (double) 10 / 2 ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testSumMerge() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); getPlatform().copyFromLocal( inputFileRhs ); Tap lhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap rhs = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileRhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "sum" ), "\t", new Class[]{String.class, Integer.TYPE}, getOutputPath( "mergesum" ), SinkMode.REPLACE ); Pipe lhsPipe = new Pipe( "sum-lhs" ); Pipe rhsPipe = new Pipe( "sum-rhs" ); rhsPipe = new Each( rhsPipe, new Fields( "char" ), new ExpressionFunction( Fields.ARGS, "$0.toLowerCase()", String.class ), Fields.REPLACE ); Pipe sumPipe = new SumBy( Pipe.pipes( lhsPipe, rhsPipe ), new Fields( "char" ), new Fields( "num" ), new Fields( "sum" ), long.class, 2 ); Map<String, Tap> tapMap = Cascades.tapsMap( Pipe.pipes( lhsPipe, rhsPipe ), Tap.taps( lhs, rhs ) ); Flow flow = getPlatform().getFlowConnector().connect( tapMap, sink, sumPipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s\\d+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", 12 ), new Tuple( "b", 24 ), new Tuple( "c", 20 ), new Tuple( "d", 12 ), new Tuple( "e", 10 ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testCountNullNotNull() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); Tap source = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "notnull", "null" ), "\t", new Class[]{Integer.TYPE, Integer.TYPE}, getOutputPath( "countnullnotnull" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "count" ); ExpressionFunction function = new ExpressionFunction( Fields.ARGS, "\"c\".equals($0) ? null : $0", String.class ); pipe = new Each( pipe, new Fields( "char" ), function, Fields.REPLACE ); CountBy countNotNull = new CountBy( new Fields( "char" ), new Fields( "notnull" ), CountBy.Include.NO_NULLS ); CountBy countNull = new CountBy( new Fields( "char" ), new Fields( "null" ), CountBy.Include.ONLY_NULLS ); pipe = new AggregateBy( pipe, Fields.NONE, 2, countNotNull, countNull ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 1, 2, Pattern.compile( "^\\d+\t\\d+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( 9, 4 ) }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testCountNullNotNull() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); Tap source = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "notnull", "null" ), "\t", new Class[]{Integer.TYPE, Integer.TYPE}, getOutputPath( "countnullnotnull" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "count" ); ExpressionFunction function = new ExpressionFunction( Fields.ARGS, "\"c\".equals($0) ? null : $0", String.class ); pipe = new Each( pipe, new Fields( "char" ), function, Fields.REPLACE ); CountBy countNotNull = new CountBy( new Fields( "char" ), new Fields( "notnull" ), CountBy.Include.NO_NULLS ); CountBy countNull = new CountBy( new Fields( "char" ), new Fields( "null" ), CountBy.Include.ONLY_NULLS ); pipe = new AggregateBy( pipe, Fields.NONE, 2, countNotNull, countNull ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 1, 2, Pattern.compile( "^\\d+\t\\d+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( 9, 4 ) }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testAverageByNull() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); Tap source = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "average" ), "\t", new Class[]{String.class, Double.TYPE}, getOutputPath( "averagenull" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "average" ); ExpressionFunction function = new ExpressionFunction( Fields.ARGS, "3 == $0 ? null : $0", Integer.class ); pipe = new Each( pipe, new Fields( "num" ), function, Fields.REPLACE ); pipe = new AverageBy( pipe, new Fields( "char" ), new Fields( "num" ), new Fields( "average" ), AverageBy.Include.NO_NULLS, 2 ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s[\\d.]+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", (double) 6 / 2 ), new Tuple( "b", (double) 12 / 4 ), new Tuple( "c", (double) 7 / 3 ), new Tuple( "d", (double) 6 / 2 ), new Tuple( "e", (double) 5 / 1 ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testSumByNulls() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); Tap source = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "sum" ), "\t", new Class[]{String.class, Integer.class}, getOutputPath( "sumnulls" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "sum" ); ExpressionFunction function = new ExpressionFunction( Fields.ARGS, "5 == $0 ? null : $0", Integer.class ); pipe = new Each( pipe, new Fields( "num" ), function, Fields.REPLACE ); // Long.class denotes return null for null, not zero pipe = new SumBy( pipe, new Fields( "char" ), new Fields( "num" ), new Fields( "sum" ), Integer.class, 2 ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s(\\d+|null)$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", 1 ), new Tuple( "b", 7 ), new Tuple( "c", 10 ), new Tuple( "d", 6 ), new Tuple( "e", null ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testAverageByNull() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); Tap source = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "average" ), "\t", new Class[]{String.class, Double.TYPE}, getOutputPath( "averagenull" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "average" ); ExpressionFunction function = new ExpressionFunction( Fields.ARGS, "3 == $0 ? null : $0", Integer.class ); pipe = new Each( pipe, new Fields( "num" ), function, Fields.REPLACE ); pipe = new AverageBy( pipe, new Fields( "char" ), new Fields( "num" ), new Fields( "average" ), AverageBy.Include.NO_NULLS, 2 ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s[\\d.]+$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", (double) 6 / 2 ), new Tuple( "b", (double) 12 / 4 ), new Tuple( "c", (double) 7 / 3 ), new Tuple( "d", (double) 6 / 2 ), new Tuple( "e", (double) 5 / 1 ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }
@Test public void testSumByNulls() throws IOException { getPlatform().copyFromLocal( inputFileLhs ); Tap source = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLhs ); Tap sink = getPlatform().getDelimitedFile( new Fields( "char", "sum" ), "\t", new Class[]{String.class, Integer.class}, getOutputPath( "sumnulls" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "sum" ); ExpressionFunction function = new ExpressionFunction( Fields.ARGS, "5 == $0 ? null : $0", Integer.class ); pipe = new Each( pipe, new Fields( "num" ), function, Fields.REPLACE ); // Long.class denotes return null for null, not zero pipe = new SumBy( pipe, new Fields( "char" ), new Fields( "num" ), new Fields( "sum" ), Integer.class, 2 ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 5, 2, Pattern.compile( "^\\w+\\s(\\d+|null)$" ) ); Tuple[] results = new Tuple[]{ new Tuple( "a", 1 ), new Tuple( "b", 7 ), new Tuple( "c", 10 ), new Tuple( "d", 6 ), new Tuple( "e", null ), }; TupleEntryIterator iterator = flow.openSink(); int count = 0; while( iterator.hasNext() ) assertEquals( results[ count++ ], iterator.next().getTuple() ); iterator.close(); }