@Test public void testSample() throws Exception { double[] data = new double[10001]; Sampler<Double> sampler = new Normal(); for (int i = 0; i < data.length; i++) { data[i] = sampler.sample(); } Arrays.sort(data); NormalDistribution reference = new NormalDistribution(RandomUtils.getRandom().getRandomGenerator(), 0, 1, NormalDistribution.DEFAULT_INVERSE_ABSOLUTE_ACCURACY); assertEquals("Median", reference.inverseCumulativeProbability(0.5), data[5000], 0.04); } }
@Test public void testOffset() { OnlineSummarizer s = new OnlineSummarizer(); Sampler<Double> sampler = new Normal(2, 5); for (int i = 0; i < 10001; i++) { s.add(sampler.sample()); } assertEquals(String.format("m = %.3f, sd = %.3f", s.getMean(), s.getSD()), 2, s.getMean(), 0.04 * s.getSD()); assertEquals(5, s.getSD(), 0.12); }
/** * Generates a list of projectedVectorSize vectors, each of size vectorSize. This looks like a * matrix of size (projectedVectorSize, vectorSize). * @param projectedVectorSize final projected size of a vector (number of projection vectors) * @param vectorSize initial vector size * @return a list of projection vectors */ public static List<Vector> generateVectorBasis(int projectedVectorSize, int vectorSize) { DoubleFunction random = new Normal(); List<Vector> basisVectors = Lists.newArrayList(); for (int i = 0; i < projectedVectorSize; ++i) { Vector basisVector = new DenseVector(vectorSize); basisVector.assign(random); basisVector.normalize(); basisVectors.add(basisVector); } return basisVectors; } }
/** * Generates a list of projectedVectorSize vectors, each of size vectorSize. This looks like a * matrix of size (projectedVectorSize, vectorSize). * @param projectedVectorSize final projected size of a vector (number of projection vectors) * @param vectorSize initial vector size * @return a list of projection vectors */ public static List<Vector> generateVectorBasis(int projectedVectorSize, int vectorSize) { DoubleFunction random = new Normal(); List<Vector> basisVectors = Lists.newArrayList(); for (int i = 0; i < projectedVectorSize; ++i) { Vector basisVector = new DenseVector(vectorSize); basisVector.assign(random); basisVector.normalize(); basisVectors.add(basisVector); } return basisVectors; } }
/** * Generates a list of projectedVectorSize vectors, each of size vectorSize. This looks like a * matrix of size (projectedVectorSize, vectorSize). * @param projectedVectorSize final projected size of a vector (number of projection vectors) * @param vectorSize initial vector size * @return a list of projection vectors */ public static List<Vector> generateVectorBasis(int projectedVectorSize, int vectorSize) { DoubleFunction random = new Normal(); List<Vector> basisVectors = Lists.newArrayList(); for (int i = 0; i < projectedVectorSize; ++i) { Vector basisVector = new DenseVector(vectorSize); basisVector.assign(random); basisVector.normalize(); basisVectors.add(basisVector); } return basisVectors; } }
/** * Generates a basis matrix of size projectedVectorSize x vectorSize. Multiplying a a vector by * this matrix results in the projected vector. * * The rows of the matrix are sampled from a multi normal distribution. * * @param projectedVectorSize final projected size of a vector (number of projection vectors) * @param vectorSize initial vector size * @return a projection matrix */ public static Matrix generateBasisNormal(int projectedVectorSize, int vectorSize) { Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize); basisMatrix.assign(new Normal()); for (MatrixSlice row : basisMatrix) { row.vector().assign(row.normalize()); } return basisMatrix; }
/** * Generates a basis matrix of size projectedVectorSize x vectorSize. Multiplying a a vector by * this matrix results in the projected vector. * * The rows of the matrix are sampled from a multi normal distribution. * * @param projectedVectorSize final projected size of a vector (number of projection vectors) * @param vectorSize initial vector size * @return a projection matrix */ public static Matrix generateBasisNormal(int projectedVectorSize, int vectorSize) { Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize); basisMatrix.assign(new Normal()); for (MatrixSlice row : basisMatrix) { row.vector().assign(row.normalize()); } return basisMatrix; }
/** * Generates a basis matrix of size projectedVectorSize x vectorSize. Multiplying a a vector by * this matrix results in the projected vector. * * The rows of the matrix are sampled from a multi normal distribution. * * @param projectedVectorSize final projected size of a vector (number of projection vectors) * @param vectorSize initial vector size * @return a projection matrix */ public static Matrix generateBasisNormal(int projectedVectorSize, int vectorSize) { Matrix basisMatrix = new DenseMatrix(projectedVectorSize, vectorSize); basisMatrix.assign(new Normal()); for (MatrixSlice row : basisMatrix) { row.vector().assign(row.normalize()); } return basisMatrix; }
@Test public void testDotCorrelation() { final Normal gen = new Normal(); Matrix projection = new DenseMatrix(64, 10); projection.assign(gen); Vector query = new DenseVector(10); query.assign(gen); long qhash = HashedVector.computeHash64(query, projection); int count[] = new int[65]; Vector v = new DenseVector(10); for (int i = 0; i <500000; i++) { v.assign(gen); long hash = HashedVector.computeHash64(v, projection); final int bitDot = Long.bitCount(qhash ^ hash); count[bitDot]++; if (count[bitDot] < 200) { System.out.printf("%d, %.3f\n", bitDot, v.dot(query) / Math.sqrt(v.getLengthSquared() * query.getLengthSquared())); } } for (int i = 0; i < 65; ++i) { System.out.printf("%d, %d\n", i, count[i]); } } }
@Test public void testNormal() { Matrix testData = new DenseMatrix(100000, 10); final Normal gen = new Normal(); testData.assign(gen); final EuclideanDistanceMeasure distance = new EuclideanDistanceMeasure(); BruteSearch ref = new BruteSearch(distance); ref.addAllMatrixSlicesAsWeightedVectors(testData); LocalitySensitiveHashSearch cut = new LocalitySensitiveHashSearch(distance, 10); cut.addAllMatrixSlicesAsWeightedVectors(testData); cut.setSearchSize(200); cut.resetEvaluationCount(); System.out.printf("speedup,q1,q2,q3\n"); for (int i = 0; i < 12; i++) { double strategy = (i - 1.0) / 10.0; cut.setRaiseHashLimitStrategy(strategy); OnlineSummarizer t1 = evaluateStrategy(testData, ref, cut); int evals = cut.resetEvaluationCount(); final double speedup = 10.0e6 / evals; System.out.printf("%.1f,%.2f,%.2f,%.2f\n", speedup, t1.getQuartile(1), t1.getQuartile(2), t1.getQuartile(3)); assertTrue(t1.getQuartile(2) > 0.45); assertTrue(speedup > 4 || t1.getQuartile(2) > 0.9); assertTrue(speedup > 15 || t1.getQuartile(2) > 0.8); } }