@Test public void testPearsonDistanceComposite() throws Exception { getPlatform().copyFromLocal( inputFileCritics ); Tap source = getPlatform().getTextFile( inputFileCritics ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "pearson/composite" ), SinkMode.REPLACE ); // unknown number of elements Pipe pipe = new Each( "pearson", new Fields( "line" ), new RegexSplitter( "\t" ) ); // break not names and movies pipe = new Each( pipe, new UnGroup( new Fields( "name", "movie", "rate" ), Fields.FIRST, 2 ) ); // name and rate against others of same movie pipe = new PearsonDistance( pipe, new Fields( "name", "movie", "rate" ), new Fields( "name1", "name2", "distance" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 21 ); List<Tuple> results = getSinkAsList( flow ); assertTrue( results.contains( new Tuple( "GeneSeymour\tLisaRose\t0.39605901719066977" ) ) ); } }
@Test public void testPearsonDistanceComposite() throws Exception { getPlatform().copyFromLocal( inputFileCritics ); Tap source = getPlatform().getTextFile( inputFileCritics ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "pearson/composite" ), SinkMode.REPLACE ); // unknown number of elements Pipe pipe = new Each( "pearson", new Fields( "line" ), new RegexSplitter( "\t" ) ); // break not names and movies pipe = new Each( pipe, new UnGroup( new Fields( "name", "movie", "rate" ), Fields.FIRST, 2 ) ); // name and rate against others of same movie pipe = new PearsonDistance( pipe, new Fields( "name", "movie", "rate" ), new Fields( "name1", "name2", "distance" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 21 ); List<Tuple> results = getSinkAsList( flow ); assertTrue( results.contains( new Tuple( "GeneSeymour\tLisaRose\t0.39605901719066977" ) ) ); } }