/** * It's told to consider more options than the dimension of the data point. If this isn't handled an out * of bounds exception will be thrown. */ @Test public void splitExceedsDimension() { AxisSplitRuleRandomK alg = new AxisSplitRuleRandomK(new Random(234),10); alg.setDimension(3); alg.select(new double[3]); }
/** * Approximate {@link NearestNeighbor} search which uses a set of randomly generated K-D trees and a Best-Bin-First * search. Designed to work in high dimensional space. Distance measure is Euclidean squared. * * @see KdForestBbfSearch * @see AxisSplitterMedian * * @param distance Specifies how distance is computed between two points. * @param maxNodesSearched Maximum number of nodes it will search. Controls speed and accuracy. * @param numTrees Number of trees that are considered. Try 10 and tune. * @param numConsiderSplit Number of nodes that are considered when generating a tree. Must be less than the * point's dimension. Try 5 * @param randomSeed Seed used by random number generator * @param <P> Point type. * @return {@link NearestNeighbor} implementation */ public static <P> NearestNeighbor<P> kdRandomForest( KdTreeDistance<P> distance , int maxNodesSearched , int numTrees , int numConsiderSplit , long randomSeed ) { Random rand = new Random(randomSeed); return new KdForestBbfSearch<>(numTrees,maxNodesSearched,distance, new AxisSplitterMedian<>(distance,new AxisSplitRuleRandomK(rand,numConsiderSplit))); }
/** * Approximate {@link NearestNeighbor} search which uses a set of randomly generated K-D trees and a Best-Bin-First * search. Designed to work in high dimensional space. Distance measure is Euclidean squared. * * @see KdForestBbfSearch * @see AxisSplitterMedian * * @param distance Specifies how distance is computed between two points. * @param maxNodesSearched Maximum number of nodes it will search. Controls speed and accuracy. * @param numTrees Number of trees that are considered. Try 10 and tune. * @param numConsiderSplit Number of nodes that are considered when generating a tree. Must be less than the * point's dimension. Try 5 * @param randomSeed Seed used by random number generator * @param <P> Point type. * @return {@link NearestNeighbor} implementation */ public static <P> NearestNeighbor<P> kdRandomForest( KdTreeDistance<P> distance , int maxNodesSearched , int numTrees , int numConsiderSplit , long randomSeed ) { Random rand = new Random(randomSeed); return new KdForestBbfSearch<>(numTrees,maxNodesSearched,distance, new AxisSplitterMedian<>(distance,new AxisSplitRuleRandomK(rand,numConsiderSplit))); }
@Test public void basic() { AxisSplitRuleRandomK alg = new AxisSplitRuleRandomK(new Random(234),3); // results are random. Test to see if only the expected numbers are returned int num10 = 0; int num11 = 0; int num12 = 0; for( int i = 0; i < 20; i++ ) { double[] var = new double[]{1,2,3,10,4,5,5,6,11,12}; alg.setDimension(var.length); int found = alg.select(var); switch( found ) { case 3: num10++; break; case 8: num11++; break; case 9: num12++; break; default: fail("Unexpected value"); } } assertTrue(num10 > 2); assertTrue(num11 > 2); assertTrue(num12 > 2); }
public TestKdForestBbfSearch() { // set the max nodes so it that it will produce perfect results AxisSplitRule rule = new AxisSplitRuleRandomK(new Random(234),1); KdTreeEuclideanSq_F64 distance = new KdTreeEuclideanSq_F64(N); setAlg(new KdForestBbfSearch<>(5,10000,distance, new AxisSplitterMedian<>(distance,rule))); } }