@Test public void testGeneratorAggregator() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new TestAggregator( new Fields( "count1" ), new Fields( "ip" ), new Tuple( "first1" ), new Tuple( "first2" ) ) ); pipe = new Every( pipe, new TestAggregator( new Fields( "count2" ), new Fields( "ip" ), new Tuple( "second" ), new Tuple( "second2" ), new Tuple( "second3" ) ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "generatoraggregator" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 8 * 2 * 3, null ); }
public Flow connect( FlowDef flowDef ) { FlowPlanner flowPlanner = createFlowPlanner(); flowPlanner.initialize( this, properties ); RuleRegistrySet ruleRegistrySet = getRuleRegistrySet(); return flowPlanner.buildFlow( flowDef, ruleRegistrySet ); }
void runTestCount( String name, Fields argumentSelector, Fields fieldDeclaration, Fields outputSelector ) throws Exception { getPlatform().copyFromLocal( inputFileIps ); Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileIps ); Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( name ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "count" ); pipe = new GroupBy( pipe, new Fields( 1 ) ); pipe = new Every( pipe, argumentSelector, new Count( fieldDeclaration ), outputSelector ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.start(); // simple test for start flow.complete(); validateLength( flow, 17 ); assertTrue( getSinkAsList( flow ).contains( new Tuple( "63.123.238.8\t2" ) ) ); }
@Test public void testSinkUnknown() throws IOException { getPlatform().copyFromLocal( inputFileCross ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileCross ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third" ), "\\s" ), Fields.RESULTS ); Tap sink = getPlatform().getTabDelimitedFile( Fields.UNKNOWN, getOutputPath( "unknownsinks" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 37, null ); TupleEntryIterator iterator = flow.openSink(); String line = iterator.next().getTuple().toString(); assertTrue( "not equal: wrong values: " + line, line.matches( "[0-9]\t[a-z]\t[A-Z]" ) ); iterator.close(); }
@Override public void initialize( FlowConnector flowConnector, Map<Object, Object> properties ) { super.initialize( flowConnector, properties ); defaultJobConf = HadoopUtil.createJobConf( properties, createJobConf( properties ) ); checkPlatform( defaultJobConf ); intermediateSchemeClass = flowConnector.getIntermediateSchemeClass( properties ); Class type = AppProps.getApplicationJarClass( properties ); if( defaultJobConf.getJar() == null && type != null ) defaultJobConf.setJarByClass( type ); String path = AppProps.getApplicationJarPath( properties ); if( defaultJobConf.getJar() == null && path != null ) defaultJobConf.setJar( path ); if( defaultJobConf.getJar() == null ) defaultJobConf.setJarByClass( HadoopUtil.findMainClass( HadoopPlanner.class ) ); AppProps.setApplicationJarPath( properties, defaultJobConf.getJar() ); LOG.info( "using application jar: {}", defaultJobConf.getJar() ); }
void runTestCount( String name, Fields argumentSelector, Fields fieldDeclaration, Fields outputSelector ) throws Exception { getPlatform().copyFromLocal( inputFileIps ); Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileIps ); Tap sink = getPlatform().getTextFile( Fields.size( 1 ), getOutputPath( name ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "count" ); pipe = new GroupBy( pipe, new Fields( 1 ) ); pipe = new Every( pipe, argumentSelector, new Count( fieldDeclaration ), outputSelector ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.start(); // simple test for start flow.complete(); validateLength( flow, 17 ); assertTrue( getSinkAsList( flow ).contains( new Tuple( "63.123.238.8\t2" ) ) ); }
@Test public void testSinkUnknown() throws IOException { getPlatform().copyFromLocal( inputFileCross ); Tap source = getPlatform().getTextFile( new Fields( "line" ), inputFileCross ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third" ), "\\s" ), Fields.RESULTS ); Tap sink = getPlatform().getTabDelimitedFile( Fields.UNKNOWN, getOutputPath( "unknownsinks" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 37, null ); TupleEntryIterator iterator = flow.openSink(); String line = iterator.next().getTuple().toString(); assertTrue( "not equal: wrong values: " + line, line.matches( "[0-9]\t[a-z]\t[A-Z]" ) ); iterator.close(); }
@Test public void testGeneratorAggregator() throws Exception { getPlatform().copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexParser( new Fields( "ip" ), "^[^ ]*" ), new Fields( "ip" ) ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new TestAggregator( new Fields( "count1" ), new Fields( "ip" ), new Tuple( "first1" ), new Tuple( "first2" ) ) ); pipe = new Every( pipe, new TestAggregator( new Fields( "count2" ), new Fields( "ip" ), new Tuple( "second" ), new Tuple( "second2" ), new Tuple( "second3" ) ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "generatoraggregator" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 8 * 2 * 3, null ); }
@Test public void testUnGroup() throws Exception { getPlatform().copyFromLocal( inputFileJoined ); Tap source = getPlatform().getTextFile( inputFileJoined ); Tap sink = getPlatform().getTextFile( getOutputPath( "ungrouped" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ) ) ); pipe = new Each( pipe, new UnGroup( new Fields( "num", "char" ), new Fields( "num" ), Fields.fields( new Fields( "lower" ), new Fields( "upper" ) ) ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); }
@Test public void testUnGroup() throws Exception { copyFromLocal( inputFileJoined ); Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileJoined ); Tap sink = getPlatform().getTextFile( getOutputPath( "ungrouped" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( 1 ), new RegexSplitter( Fields.size( 3 ) ) ); pipe = new Each( pipe, new UnGroup( Fields.size( 2 ), new Fields( 0 ), Fields.fields( new Fields( 1 ), new Fields( 2 ) ) ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); }
@Test public void testUnGroup() throws Exception { getPlatform().copyFromLocal( inputFileJoined ); Tap source = getPlatform().getTextFile( inputFileJoined ); Tap sink = getPlatform().getTextFile( getOutputPath( "ungrouped" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ) ) ); pipe = new Each( pipe, new UnGroup( new Fields( "num", "char" ), new Fields( "num" ), Fields.fields( new Fields( "lower" ), new Fields( "upper" ) ) ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); }
@Test public void testUnGroup() throws Exception { copyFromLocal( inputFileJoined ); Tap source = getPlatform().getTextFile( Fields.size( 2 ), inputFileJoined ); Tap sink = getPlatform().getTextFile( getOutputPath( "ungrouped" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( 1 ), new RegexSplitter( Fields.size( 3 ) ) ); pipe = new Each( pipe, new UnGroup( Fields.size( 2 ), new Fields( 0 ), Fields.fields( new Fields( 1 ), new Fields( 2 ) ) ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); }
@Test public void testSwap() throws Exception { Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileApache ); Tap sink = getPlatform().getTextFile( new Fields( "offset", "line" ), new Fields( "count", "ipaddress" ), getOutputPath( "swap" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); Function parser = new RegexParser( new Fields( "ip" ), "^[^ ]*" ); pipe = new Each( pipe, new Fields( "line" ), parser, Fields.SWAP ); pipe = new GroupBy( pipe, new Fields( "ip" ) ); pipe = new Every( pipe, new Fields( "ip" ), new Count( new Fields( "count" ) ) ); pipe = new Each( pipe, new Fields( "ip" ), new Identity( new Fields( "ipaddress" ) ), Fields.SWAP ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 8, 2, Pattern.compile( "^\\d+\\s\\d+\\s[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}$" ) ); }
@Test public void testUnGroupAnon() throws Exception { getPlatform().copyFromLocal( inputFileJoined ); Tap source = getPlatform().getTextFile( inputFileJoined ); Tap sink = getPlatform().getTextFile( getOutputPath( "ungroupedanon" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ) ) ); pipe = new Each( pipe, new UnGroup( new Fields( "num" ), Fields.fields( new Fields( "lower" ), new Fields( "upper" ) ) ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); }
@Test public void testUnGroupAnon() throws Exception { getPlatform().copyFromLocal( inputFileJoined ); Tap source = getPlatform().getTextFile( inputFileJoined ); Tap sink = getPlatform().getTextFile( getOutputPath( "ungroupedanon" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new RegexSplitter( new Fields( "num", "lower", "upper" ) ) ); pipe = new Each( pipe, new UnGroup( new Fields( "num" ), Fields.fields( new Fields( "lower" ), new Fields( "upper" ) ) ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10 ); }
private Flow secondFlow( String name, Tap source ) { Pipe pipe = new Pipe( name ); pipe = new Each( pipe, new RegexSplitter( new Fields( "first", "second", "third", "fourth" ), "\\." ) ); pipe = new Each( pipe, new FieldJoiner( new Fields( "mangled" ), "-" ) ); Tap sink = getPlatform().getTabDelimitedFile( new Fields( "mangled" ), getOutputPath( name ), SinkMode.REPLACE ); return getPlatform().getFlowConnector().connect( source, sink, pipe ); }
@Test public void testLastEachNotModified() throws Exception { copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new TestFunction( new Fields( "insert" ), new Tuple( "inserted" ) ) ); pipe = new GroupBy( pipe, new Fields( "insert" ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "lasteachmodified" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10, null ); }
private Flow firstFlow( String path ) { Tap source = getPlatform().getTextFile( inputFileIps ); Pipe pipe = new Pipe( "first" ); pipe = new Each( pipe, new Fields( "line" ), new Identity( new Fields( "ip" ) ), new Fields( "ip" ) ); Tap sink = getPlatform().getTabDelimitedFile( new Fields( "ip" ), getOutputPath( path + "/first" ), SinkMode.REPLACE ); return getPlatform().getFlowConnector().connect( source, sink, pipe ); }
@Test public void testLastEachNotModified() throws Exception { copyFromLocal( inputFileApache ); Tap source = getPlatform().getTextFile( inputFileApache ); Pipe pipe = new Pipe( "test" ); pipe = new Each( pipe, new Fields( "line" ), new TestFunction( new Fields( "insert" ), new Tuple( "inserted" ) ) ); pipe = new GroupBy( pipe, new Fields( "insert" ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "lasteachmodified" ), SinkMode.REPLACE ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 10, null ); }
@Test public void testJoinSamePipe2() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Map sources = new HashMap(); sources.put( "lower", source ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "samepipe2" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe join = new HashJoin( pipeLower, new Fields( "num" ), pipeLower, new Fields( "num" ), new Fields( "num1", "char1", "num2", "char2" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, join ); flow.complete(); validateLength( flow, 5, null ); List<Tuple> actual = getSinkAsList( flow ); assertTrue( actual.contains( new Tuple( "1\ta\t1\ta" ) ) ); assertTrue( actual.contains( new Tuple( "2\tb\t2\tb" ) ) ); }