protected void init(Pipe[] pipes, Fields[] groupFields, Fields groupRename, MultiBuffer operation) { Fields outputFields = groupRename.append(operation.getResultFields()); Pipe grouped = new CoGroup(pipes, groupFields, null, null, new BufferJoin()); grouped = new Every(grouped, new MultiBufferOperation(groupRename, operation), outputFields ); grouped = new Retain(grouped, outputFields ); setTails(grouped); } }
@Test public void testDupeSourceRepeat() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe pipe = new Pipe( "pipe" ); Pipe merge = new CoGroup( "cogroup", pipe, new Fields( "offset" ), 1, Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "pipe", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "not equal: steps.size()", 1, steps.size() ); }
@Test public void testDupeSourceRepeat() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe pipe = new Pipe( "pipe" ); Pipe merge = new CoGroup( "cogroup", pipe, new Fields( "offset" ), 1, Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "pipe", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); List<FlowStep> steps = flow.getFlowSteps(); assertEquals( "not equal: steps.size()", 1, steps.size() ); }
@Test public void testDupeSource2() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe left = new Pipe( "left" ); Pipe right = new Pipe( "right" ); Pipe merge = new CoGroup( "cogroup", left, new Fields( "offset" ), right, new Fields( "offset" ), Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "left", source1 ); sources.put( "right", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); }
@Test public void testDupeSource2() { Tap source1 = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo/merge" ); Tap sink = new Hfs( new TextLine(), "foo" ); Pipe left = new Pipe( "left" ); Pipe right = new Pipe( "right" ); Pipe merge = new CoGroup( "cogroup", left, new Fields( "offset" ), right, new Fields( "offset" ), Fields.size( 4 ) ); Map sources = new HashMap(); sources.put( "left", source1 ); sources.put( "right", source1 ); Map sinks = new HashMap(); sinks.put( "cogroup", sink ); Flow flow = getPlatform().getFlowConnector().connect( sources, sinks, merge ); }
@Test public void testGetFirstJoin() { Pipe pipeFirst = new Pipe( "first" ); Pipe pipeSecond = new Pipe( "second" ); Pipe pipe = new CoGroup( pipeFirst, pipeSecond ); pipe = new Pipe( pipe ); pipe = new Pipe( pipe ); pipe = new Pipe( pipe ); assertTrue( pipe.getHeads()[ 0 ] == pipeFirst || pipe.getHeads()[ 0 ] == pipeSecond ); }
public SelfCoGroupGraph() { Map sources = new HashMap(); NonTap sourceLower = new NonTap( "lower", new Fields( "offset", "line" ) ); sources.put( "lower", sourceLower ); Map sinks = new HashMap(); sinks.put( "sink", new NonTap( "sink", new Fields( "offset", "line" ) ) ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe lower = new Pipe( "lower" ); Pipe pipeLower = new Each( new Pipe( "lhs", lower ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "rhs", lower ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( "sink", pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); initialize( sources, sinks, splice ); } }
@Test public void testCoGroupAroundCoGroup() throws Exception { Tap source10 = new Hfs( new TextLine( new Fields( "num" ) ), "foo" ); Tap source20 = new Hfs( new TextLine( new Fields( "num" ) ), "bar" ); Map sources = new HashMap(); sources.put( "source20", source20 ); sources.put( "source101", source10 ); sources.put( "source102", source10 ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), "baz", SinkMode.REPLACE ); Pipe pipeNum20 = new Pipe( "source20" ); Pipe pipeNum101 = new Pipe( "source101" ); Pipe pipeNum102 = new Pipe( "source102" ); Pipe splice1 = new CoGroup( pipeNum20, new Fields( "num" ), pipeNum101, new Fields( "num" ), new Fields( "num1", "num2" ) ); Pipe splice2 = new CoGroup( splice1, new Fields( "num1" ), pipeNum102, new Fields( "num" ), new Fields( "num1", "num2", "num3" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, splice2 ); assertEquals( "not equal: steps.size()", 2, flow.getFlowSteps().size() ); }
@Test public void testPipeCoGroup() { Pipe pipe = new Pipe( "foo" ); pipe = new Each( pipe, new Fields( "a" ), new Identity() ); pipe = new CoGroup( pipe, new Fields( "b" ), 4 ); assertEqualsTrace( "cascading.TraceTest.testPipeCoGroup(TraceTest.java", pipe.getTrace() ); }
@Test public void testCoGroupSamePipe3() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "num", "char" ), " ", inputFileLower ); Map sources = new HashMap(); sources.put( "lower", source ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "samepipe3" ), SinkMode.REPLACE ); Pipe pipe = new Pipe( "lower" ); Pipe lhs = new Pipe( "lhs", pipe ); Pipe rhs = new Pipe( "rhs", pipe ); Pipe cogroup = new CoGroup( lhs, new Fields( "num" ), rhs, new Fields( "num" ), new Fields( "num1", "char1", "num2", "char2" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cogroup ); flow.complete(); validateLength( flow, 5, null ); List<Tuple> actual = getSinkAsList( flow ); assertTrue( actual.contains( new Tuple( "1\ta\t1\ta" ) ) ); assertTrue( actual.contains( new Tuple( "2\tb\t2\tb" ) ) ); }
@Test public void testCoGroupSamePipe() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Map sources = new HashMap(); sources.put( "lower", source ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "samepipe" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe cogroup = new CoGroup( pipeLower, new Fields( "num" ), 1, new Fields( "num1", "char1", "num2", "char2" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cogroup ); flow.complete(); validateLength( flow, 5, null ); List<Tuple> actual = getSinkAsList( flow ); assertTrue( actual.contains( new Tuple( "1\ta\t1\ta" ) ) ); assertTrue( actual.contains( new Tuple( "2\tb\t2\tb" ) ) ); }
@Test public void testCoGroupSamePipe() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Map sources = new HashMap(); sources.put( "lower", source ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "samepipe" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe cogroup = new CoGroup( pipeLower, new Fields( "num" ), 1, new Fields( "num1", "char1", "num2", "char2" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cogroup ); flow.complete(); validateLength( flow, 5, null ); List<Tuple> actual = getSinkAsList( flow ); assertTrue( actual.contains( new Tuple( "1\ta\t1\ta" ) ) ); assertTrue( actual.contains( new Tuple( "2\tb\t2\tb" ) ) ); }
@Test public void testCoGroupSamePipe2() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Map sources = new HashMap(); sources.put( "lower", source ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "samepipe2" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe cogroup = new CoGroup( pipeLower, new Fields( "num" ), pipeLower, new Fields( "num" ), new Fields( "num1", "char1", "num2", "char2" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cogroup ); flow.complete(); validateLength( flow, 5, null ); List<Tuple> actual = getSinkAsList( flow ); assertTrue( actual.contains( new Tuple( "1\ta\t1\ta" ) ) ); assertTrue( actual.contains( new Tuple( "2\tb\t2\tb" ) ) ); }
@Test public void testCoGroupSamePipe2() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Map sources = new HashMap(); sources.put( "lower", source ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "samepipe2" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe cogroup = new CoGroup( pipeLower, new Fields( "num" ), pipeLower, new Fields( "num" ), new Fields( "num1", "char1", "num2", "char2" ) ); Flow flow = getPlatform().getFlowConnector().connect( sources, sink, cogroup ); flow.complete(); validateLength( flow, 5, null ); List<Tuple> actual = getSinkAsList( flow ); assertTrue( actual.contains( new Tuple( "1\ta\t1\ta" ) ) ); assertTrue( actual.contains( new Tuple( "2\tb\t2\tb" ) ) ); }
private Flow thirdFlow( Tap lhs, Tap rhs ) { Pipe lhsPipe = new Pipe( "lhs" ); Pipe rhsPipe = new Pipe( "rhs" ); Pipe pipe = new CoGroup( lhsPipe, new Fields( 0 ), rhsPipe, new Fields( 0 ), Fields.size( 2 ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "third" ), SinkMode.REPLACE ); return getPlatform().getFlowConnector().connect( Cascades.tapsMap( Pipe.pipes( lhsPipe, rhsPipe ), Tap.taps( lhs, rhs ) ), sink, pipe ); }
private Flow thirdFlow( Tap lhs, Tap rhs ) { Pipe lhsPipe = new Pipe( "lhs" ); Pipe rhsPipe = new Pipe( "rhs" ); Pipe pipe = new CoGroup( lhsPipe, new Fields( 0 ), rhsPipe, new Fields( 0 ), Fields.size( 2 ) ); Tap sink = getPlatform().getTextFile( getOutputPath( "third" ), SinkMode.REPLACE ); return getPlatform().getFlowConnector().connect( Cascades.tapsMap( Pipe.pipes( lhsPipe, rhsPipe ), Tap.taps( lhs, rhs ) ), sink, pipe ); }
@Test public void testCoGroupWithResultGroupFieldsDefault() throws Exception { Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo" ); Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "bar" ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), "/complex/cogroup/", SinkMode.REPLACE ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new Fields( "num1", "lhs", "num2", "rhs" ) ); splice = new Every( splice, new First( new Fields( "value" ) ), new Fields( "num1", "value" ) ); Flow countFlow = getPlatform().getFlowConnector().connect( sources, sink, splice ); }
@Test public void testCoGroupWithResultGroupFieldsDefault() throws Exception { Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo" ); Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "bar" ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), "/complex/cogroup/", SinkMode.REPLACE ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new Fields( "num1", "lhs", "num2", "rhs" ) ); splice = new Every( splice, new First( new Fields( "value" ) ), new Fields( "num1", "value" ) ); Flow countFlow = getPlatform().getFlowConnector().connect( sources, sink, splice ); }
@Test public void testCoGroupWithResultGroupFields() throws Exception { Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo" ); Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "bar" ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), "/complex/cogroup/", SinkMode.REPLACE ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new Fields( "num1", "lhs", "num2", "rhs" ), new Fields( "somenum", "somenum2" ) ); splice = new Every( splice, new First( new Fields( "value" ) ), new Fields( "somenum", "value" ) ); Flow countFlow = getPlatform().getFlowConnector().connect( sources, sink, splice ); }
@Test public void testCoGroupWithResultGroupFields() throws Exception { Tap sourceLower = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "foo" ); Tap sourceUpper = new Hfs( new TextLine( new Fields( "offset", "line" ) ), "bar" ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); // using null pos so all fields are written Tap sink = new Hfs( new TextLine(), "/complex/cogroup/", SinkMode.REPLACE ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new CoGroup( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), new Fields( "num1", "lhs", "num2", "rhs" ), new Fields( "somenum", "somenum2" ) ); splice = new Every( splice, new First( new Fields( "value" ) ), new Fields( "somenum", "value" ) ); Flow countFlow = getPlatform().getFlowConnector().connect( sources, sink, splice ); }