List<Tuple> values = getSinkAsList( flow );
List<Tuple> values = getSinkAsList( flow );
List<Tuple> values = getSinkAsList( flow );
List<Tuple> values = getSinkAsList( flow );
@Test public void testHashJoinDistCacheTapRHS() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = ( (BaseHadoopPlatform) getPlatform() ).getDistCacheTap( (Hfs) getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ) ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( getTestName() + "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); Flow flow = getPlatform().getFlowConnector( properties ).connect( "distcache test", sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); assertTrue( values.contains( new Tuple( "3\tc\t3\tC" ) ) ); assertTrue( values.contains( new Tuple( "4\td\t4\tD" ) ) ); assertTrue( values.contains( new Tuple( "5\te\t5\tE" ) ) ); }
@Test public void testHashJoinDistCacheTapLHS() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = ( (BaseHadoopPlatform) getPlatform() ).getDistCacheTap( (Hfs) getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ) ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( getTestName() + "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); Flow flow = getPlatform().getFlowConnector( properties ).connect( "distcache test", sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); assertTrue( values.contains( new Tuple( "3\tc\t3\tC" ) ) ); assertTrue( values.contains( new Tuple( "4\td\t4\tD" ) ) ); assertTrue( values.contains( new Tuple( "5\te\t5\tE" ) ) ); }
@Test public void testHashJoinDistCacheTapRHS() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = ( (BaseHadoopPlatform) getPlatform() ).getDistCacheTap( (Hfs) getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ) ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( getTestName() + "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); Flow flow = getPlatform().getFlowConnector( properties ).connect( "distcache test", sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); assertTrue( values.contains( new Tuple( "3\tc\t3\tC" ) ) ); assertTrue( values.contains( new Tuple( "4\td\t4\tD" ) ) ); assertTrue( values.contains( new Tuple( "5\te\t5\tE" ) ) ); }
@Test public void testHashJoinDistCacheTapLHS() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = ( (BaseHadoopPlatform) getPlatform() ).getDistCacheTap( (Hfs) getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ) ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( getTestName() + "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); Flow flow = getPlatform().getFlowConnector( properties ).connect( "distcache test", sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); assertTrue( values.contains( new Tuple( "3\tc\t3\tC" ) ) ); assertTrue( values.contains( new Tuple( "4\td\t4\tD" ) ) ); assertTrue( values.contains( new Tuple( "5\te\t5\tE" ) ) ); }
@Test public void testHashJoinCheckpointWithDistCacheDecorator() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); pipeUpper = new Checkpoint( pipeUpper ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); FlowConnectorProps.setCheckpointTapDecoratorClass( properties, "cascading.tap.hadoop.DistCacheTap" ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }
@Test public void testHashJoinCheckpointWithDistCacheDecorator() throws Exception { getPlatform().copyFromLocal( inputFileLower ); getPlatform().copyFromLocal( inputFileUpper ); Tap sourceLower = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileLower ); Tap sourceUpper = getPlatform().getTextFile( new Fields( "offset", "line" ), inputFileUpper ); Map sources = new HashMap(); sources.put( "lower", sourceLower ); sources.put( "upper", sourceUpper ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "join" ), SinkMode.REPLACE ); Function splitter = new RegexSplitter( new Fields( "num", "char" ), " " ); Pipe pipeLower = new Each( new Pipe( "lower" ), new Fields( "line" ), splitter ); Pipe pipeUpper = new Each( new Pipe( "upper" ), new Fields( "line" ), splitter ); pipeUpper = new Checkpoint( pipeUpper ); Pipe splice = new HashJoin( pipeLower, new Fields( "num" ), pipeUpper, new Fields( "num" ), Fields.size( 4 ) ); Map<Object, Object> properties = getProperties(); FlowConnectorProps.setCheckpointTapDecoratorClass( properties, "cascading.tap.hadoop.DistCacheTap" ); Flow flow = getPlatform().getFlowConnector( properties ).connect( sources, sink, splice ); flow.complete(); validateLength( flow, 5 ); List<Tuple> values = getSinkAsList( flow ); assertTrue( values.contains( new Tuple( "1\ta\t1\tA" ) ) ); assertTrue( values.contains( new Tuple( "2\tb\t2\tB" ) ) ); }