/** * Constructor * * @param previous * @param argumentFieldSelector * @param fieldDeclaration */ public EuclideanDistance( Pipe previous, Fields argumentFieldSelector, Fields fieldDeclaration ) { super( previous, argumentFieldSelector, new Euclidean(), fieldDeclaration ); }
public PearsonDistance( Pipe previous, Fields argumentFieldSelector, Fields fieldDeclaration ) { super( previous, argumentFieldSelector, new Pearson(), fieldDeclaration ); }
pipe = new Each( pipe, new SortElements( new Fields( "n1", "v1" ), new Fields( "n2", "v2" ) ) ); setTails( pipe );
pipe = new Each( pipe, new SortElements( new Fields( "name1", "rate1" ), new Fields( "name2", "rate2" ) ) );
@Test public void testEuclideanDistanceComposite() throws Exception { getPlatform().copyFromLocal( inputFileCritics ); Tap source = getPlatform().getTextFile( inputFileCritics ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "euclidean/composite" ), SinkMode.REPLACE ); // unknown number of elements Pipe pipe = new Each( "euclidean", new Fields( "line" ), new RegexSplitter( "\t" ) ); // break not names and movies pipe = new Each( pipe, new UnGroup( new Fields( "name", "movie", "rate" ), Fields.FIRST, 2 ) ); // name and rate against others of same movie pipe = new EuclideanDistance( pipe, new Fields( "name", "movie", "rate" ), new Fields( "name1", "name2", "distance" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 21 ); List<Tuple> results = getSinkAsList( flow ); assertTrue( results.contains( new Tuple( "GeneSeymour\tLisaRose\t0.14814814814814814" ) ) ); }
@Test public void testPearsonDistanceComposite() throws Exception { getPlatform().copyFromLocal( inputFileCritics ); Tap source = getPlatform().getTextFile( inputFileCritics ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "pearson/composite" ), SinkMode.REPLACE ); // unknown number of elements Pipe pipe = new Each( "pearson", new Fields( "line" ), new RegexSplitter( "\t" ) ); // break not names and movies pipe = new Each( pipe, new UnGroup( new Fields( "name", "movie", "rate" ), Fields.FIRST, 2 ) ); // name and rate against others of same movie pipe = new PearsonDistance( pipe, new Fields( "name", "movie", "rate" ), new Fields( "name1", "name2", "distance" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 21 ); List<Tuple> results = getSinkAsList( flow ); assertTrue( results.contains( new Tuple( "GeneSeymour\tLisaRose\t0.39605901719066977" ) ) ); } }
pipe = new Each( pipe, new SortElements( new Fields( "name1", "rate1" ), new Fields( "name2", "rate2" ) ) );
@Test public void testEuclideanDistanceComposite() throws Exception { getPlatform().copyFromLocal( inputFileCritics ); Tap source = getPlatform().getTextFile( inputFileCritics ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "euclidean/composite" ), SinkMode.REPLACE ); // unknown number of elements Pipe pipe = new Each( "euclidean", new Fields( "line" ), new RegexSplitter( "\t" ) ); // break not names and movies pipe = new Each( pipe, new UnGroup( new Fields( "name", "movie", "rate" ), Fields.FIRST, 2 ) ); // name and rate against others of same movie pipe = new EuclideanDistance( pipe, new Fields( "name", "movie", "rate" ), new Fields( "name1", "name2", "distance" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 21 ); List<Tuple> results = getSinkAsList( flow ); assertTrue( results.contains( new Tuple( "GeneSeymour\tLisaRose\t0.14814814814814814" ) ) ); }
@Test public void testPearsonDistanceComposite() throws Exception { getPlatform().copyFromLocal( inputFileCritics ); Tap source = getPlatform().getTextFile( inputFileCritics ); Tap sink = getPlatform().getTextFile( new Fields( "line" ), getOutputPath( "pearson/composite" ), SinkMode.REPLACE ); // unknown number of elements Pipe pipe = new Each( "pearson", new Fields( "line" ), new RegexSplitter( "\t" ) ); // break not names and movies pipe = new Each( pipe, new UnGroup( new Fields( "name", "movie", "rate" ), Fields.FIRST, 2 ) ); // name and rate against others of same movie pipe = new PearsonDistance( pipe, new Fields( "name", "movie", "rate" ), new Fields( "name1", "name2", "distance" ) ); Flow flow = getPlatform().getFlowConnector().connect( source, sink, pipe ); flow.complete(); validateLength( flow, 21 ); List<Tuple> results = getSinkAsList( flow ); assertTrue( results.contains( new Tuple( "GeneSeymour\tLisaRose\t0.39605901719066977" ) ) ); } }
pipe = new Each( pipe, new SortElements( new Fields( "name1", "rate1" ), new Fields( "name2", "rate2" ) ) );
pipe = new Each( pipe, new SortElements( new Fields( "name1", "rate1" ), new Fields( "name2", "rate2" ) ) );