@Test public void testFilteredPartitionTap_NoFilters() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap delimitedFile = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/filteredpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); Tap partitionTap = getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); partitionTap = getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/filteredpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector().connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 5, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "2", "b" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "4", "d" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }
@Test public void testFilteredPartitionTap_NoFilters() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap delimitedFile = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/filteredpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); Tap partitionTap = getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); partitionTap = getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/filteredpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector().connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 5, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "2", "b" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "4", "d" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }
@Test public void testFilteredPartitionTap_Typical() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap delimitedFile = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/filteredpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); PartitionTap partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); partitionTap.addSourcePartitionFilter( new Fields( "number" ), new PartitionFilter( Arrays.asList( "2", "4" ) ) ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/filteredpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector().connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 3, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }
@Test public void testFilteredPartitionTap_Typical() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap delimitedFile = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/filteredpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); PartitionTap partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); partitionTap = (PartitionTap) getPlatform().getPartitionTap( delimitedFile, partition, 1 ); partitionTap.addSourcePartitionFilter( new Fields( "number" ), new PartitionFilter( Arrays.asList( "2", "4" ) ) ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/filteredpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector().connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 3, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }
@Test public void testCombinedPartitionTap() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap partitionTap = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/combinedpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); partitionTap = getPlatform().getPartitionTap( partitionTap, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); // Configure combine inputs for reading from the partition tap Map<Object, Object> properties = getProperties(); HfsProps.setUseCombinedInput( properties, true ); //set to lots of bytes so the test will combine all input files HfsProps.setCombinedInputMaxSize( properties, 100000000L ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/combinedpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector( properties ).connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); //Asserting we combined all partition files into one mapper if( getPlatform().isUseCluster() ) assertEquals( 1, secondFlow.getStats().getCounterValue( JobInProgress.Counter.TOTAL_LAUNCHED_MAPS ) ); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 5, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "2", "b" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "4", "d" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }
@Test public void testCombinedPartitionTap() throws Exception { getPlatform().copyFromLocal( inputFileLower ); Tap source = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), " ", inputFileLower ); Tap partitionTap = getPlatform().getDelimitedFile( new Fields( "lower" ), "+", getOutputPath( "/combinedpartition/partitioned" ), SinkMode.REPLACE ); Partition partition = new DelimitedPartition( new Fields( "number" ) ); partitionTap = getPlatform().getPartitionTap( partitionTap, partition, 1 ); Flow firstFlow = getPlatform().getFlowConnector().connect( source, partitionTap, new Pipe( "partition" ) ); firstFlow.complete(); // Configure combine inputs for reading from the partition tap Map<Object, Object> properties = getProperties(); HfsProps.setUseCombinedInput( properties, true ); //set to lots of bytes so the test will combine all input files HfsProps.setCombinedInputMaxSize( properties, 100000000L ); Tap sink = getPlatform().getDelimitedFile( new Fields( "number", "lower" ), "+", getOutputPath( "/combinedpartition/final" ), SinkMode.REPLACE ); Flow secondFlow = getPlatform().getFlowConnector( properties ).connect( partitionTap, sink, new Pipe( "copy" ) ); secondFlow.complete(); //Asserting we combined all partition files into one mapper if( getPlatform().isUseCluster() ) assertEquals( 1, secondFlow.getStats().getCounterValue( JobInProgress.Counter.TOTAL_LAUNCHED_MAPS ) ); List<Tuple> values = getSinkAsList( secondFlow ); assertEquals( 5, values.size() ); assertTrue( values.contains( new Tuple( "1", "a" ) ) ); assertTrue( values.contains( new Tuple( "2", "b" ) ) ); assertTrue( values.contains( new Tuple( "3", "c" ) ) ); assertTrue( values.contains( new Tuple( "4", "d" ) ) ); assertTrue( values.contains( new Tuple( "5", "e" ) ) ); }