@Test public void testMultiSourceIterator() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" ); GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" ); MultiSourceTap source = new MultiSourceTap( source1, source2 ); validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10 ); GlobHfs sourceMulti = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" ); source = new MultiSourceTap( sourceMulti ); validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10, null ); }
@Test public void testMultiSourceIterator() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" ); GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" ); MultiSourceTap source = new MultiSourceTap( source1, source2 ); validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10 ); GlobHfs sourceMulti = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" ); source = new MultiSourceTap( sourceMulti ); validateLength( source.openForRead( getPlatform().getFlowProcess() ), 10, null ); }
@Test public void testNestedMultiSourceGlobHfs() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" ); GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" ); MultiSourceTap source = new MultiSourceTap( source1, source2 ); assertEquals( 2, source.getNumChildTaps() ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), getOutputPath( "globmultisource" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), "\\s" ); Pipe concatPipe = new Each( new Pipe( "concat" ), new Fields( "line" ), splitter ); Flow concatFlow = getPlatform().getFlowConnector( getProperties() ).connect( "first", source, sink, concatPipe ); Tap nextSink = new Hfs( new TextLine(), getOutputPath( "globmultiource2" ), SinkMode.REPLACE ); Flow nextFlow = getPlatform().getFlowConnector( getProperties() ).connect( "second", sink, nextSink, concatPipe ); Cascade cascade = new CascadeConnector( getProperties() ).connect( concatFlow, nextFlow ); cascade.complete(); validateLength( concatFlow, 10 ); }
@Test public void testGlobHfs() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); GlobHfs source = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" ); assertEquals( 2, source.getTaps().length ); // show globhfs will just match a directory if ended with a / assertEquals( 1, new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "../?ata/" ).getTaps().length ); Tap sink = new Hfs( new TextLine(), getOutputPath( "glob" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), "\\s" ); Pipe concatPipe = new Each( new Pipe( "concat" ), new Fields( "line" ), splitter ); Flow concatFlow = getPlatform().getFlowConnector( getProperties() ).connect( "first", source, sink, concatPipe ); Tap nextSink = new Hfs( new TextLine(), getOutputPath( "glob2" ), SinkMode.REPLACE ); Flow nextFlow = getPlatform().getFlowConnector( getProperties() ).connect( "second", sink, nextSink, concatPipe ); Cascade cascade = new CascadeConnector( getProperties() ).connect( concatFlow, nextFlow ); cascade.complete(); validateLength( concatFlow, 10 ); }
@Test public void testNestedMultiSourceGlobHfs() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); GlobHfs source1 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r]}.txt" ); GlobHfs source2 = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{owe?}.txt" ); MultiSourceTap source = new MultiSourceTap( source1, source2 ); assertEquals( 2, source.getNumChildTaps() ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), getOutputPath( "globmultisource" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), "\\s" ); Pipe concatPipe = new Each( new Pipe( "concat" ), new Fields( "line" ), splitter ); Flow concatFlow = getPlatform().getFlowConnector( getProperties() ).connect( "first", source, sink, concatPipe ); Tap nextSink = new Hfs( new TextLine(), getOutputPath( "globmultiource2" ), SinkMode.REPLACE ); Flow nextFlow = getPlatform().getFlowConnector( getProperties() ).connect( "second", sink, nextSink, concatPipe ); Cascade cascade = new CascadeConnector( getProperties() ).connect( concatFlow, nextFlow ); cascade.complete(); validateLength( concatFlow, 10 ); }
@Test public void testGlobHfs() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); GlobHfs source = new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "?{ppe[_r],owe?}.txt" ); assertEquals( 2, source.getTaps().length ); // show globhfs will just match a directory if ended with a / assertEquals( 1, new GlobHfs( new TextLine( new Fields( "offset", "line" ) ), InputData.inputPath + "../?ata/" ).getTaps().length ); Tap sink = new Hfs( new TextLine(), getOutputPath( "glob" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), "\\s" ); Pipe concatPipe = new Each( new Pipe( "concat" ), new Fields( "line" ), splitter ); Flow concatFlow = getPlatform().getFlowConnector( getProperties() ).connect( "first", source, sink, concatPipe ); Tap nextSink = new Hfs( new TextLine(), getOutputPath( "glob2" ), SinkMode.REPLACE ); Flow nextFlow = getPlatform().getFlowConnector( getProperties() ).connect( "second", sink, nextSink, concatPipe ); Cascade cascade = new CascadeConnector( getProperties() ).connect( concatFlow, nextFlow ); cascade.complete(); validateLength( concatFlow, 10 ); }