@Test public void testGetDistanceSquared() { Vector other = new RandomAccessSparseVector(test.size()); other.set(1, -2); other.set(2, -5); other.set(3, -9); other.set(4, 1); double expected = test.minus(other).getLengthSquared(); assertTrue("a.getDistanceSquared(b) != a.minus(b).getLengthSquared", Math.abs(expected - test.getDistanceSquared(other)) < 10.0E-7); }
Vector hColumn = hessenBerg.viewColumn(m - 1).viewPart(m, high - m + 1); double scale = hColumn.norm(1); ort.viewPart(m, high - m + 1).assign(hColumn, Functions.plusMult(1 / scale)); double h = ort.viewPart(m, high - m + 1).getLengthSquared(); if (ort.getQuick(m) > 0) { g = -g; h -= ort.getQuick(m) * g; ort.setQuick(m, ort.getQuick(m) - g); Vector ortPiece = ort.viewPart(m, high - m + 1); for (int j = m; j < n; j++) { double f = ortPiece.dot(hessenBerg.viewColumn(j).viewPart(m, high - m + 1)) / h; hessenBerg.viewColumn(j).viewPart(m, high - m + 1).assign(ortPiece, Functions.plusMult(-f)); double f = ortPiece.dot(hessenBerg.viewRow(i).viewPart(m, high - m + 1)) / h; hessenBerg.viewRow(i).viewPart(m, high - m + 1).assign(ortPiece, Functions.plusMult(-f)); ort.setQuick(m, scale * ort.getQuick(m)); hessenBerg.setQuick(m, m - 1, scale * g); v.viewDiagonal().assign(1); ort.viewPart(m + 1, high - m).assign(hessenBerg.viewColumn(m - 1).viewPart(m + 1, high - m)); for (int j = m; j <= high; j++) { double g = ort.viewPart(m, high - m + 1).dot(v.viewColumn(j).viewPart(m, high - m + 1));
@Test public void testPlusDouble() { Vector val = test.plus(1); assertEquals("size", test.size(), val.size()); for (int i = 0; i < test.size(); i++) { if (i % 2 == 0) { assertEquals("get [" + i + ']', 1.0, val.get(i), EPSILON); } else { assertEquals("get [" + i + ']', values[i/2] + 1.0, val.get(i), EPSILON); } } }
public RandomAccessSparseVector(Vector other) { this(other.size(), other.getNumNondefaultElements()); for (Element e : other.nonZeroes()) { values.put(e.index(), e.get()); } }
private void initializeM(Vector averageRatings) throws IOException { Random random = RandomUtils.getRandom(); FileSystem fs = FileSystem.get(pathToM(-1).toUri(), getConf()); SequenceFile.Writer writer = null; try { writer = new SequenceFile.Writer(fs, getConf(), new Path(pathToM(-1), "part-m-00000"), IntWritable.class, VectorWritable.class); IntWritable index = new IntWritable(); VectorWritable featureVector = new VectorWritable(); for (Vector.Element e : averageRatings.nonZeroes()) { Vector row = new DenseVector(numFeatures); row.setQuick(0, e.get()); for (int m = 1; m < numFeatures; m++) { row.setQuick(m, random.nextDouble()); } index.set(e.index()); featureVector.set(row); writer.append(index, featureVector); } } finally { Closeables.close(writer, false); } }
@Test public void toyData() throws Exception { TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob(); trainNaiveBayes.setConf(conf); trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(), "-el", "--tempDir", tempDir.getAbsolutePath() }); NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf); AbstractVectorClassifier classifier = new StandardNaiveBayesClassifier(naiveBayesModel); assertEquals(2, classifier.numCategories()); Vector prediction = classifier.classifyFull(trainingInstance(COLOR_RED, TYPE_SUV, ORIGIN_DOMESTIC).get()); // should be classified as not stolen assertTrue(prediction.get(0) < prediction.get(1)); }
public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); Vector weightsPerLabel = null; Vector perLabelThetaNormalizer = null; Vector weightsPerFeature = null; Matrix weightsPerLabelAndFeature; float alphaI; FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin")); try { alphaI = in.readFloat(); weightsPerFeature = VectorWritable.readVector(in); weightsPerLabel = new DenseVector(VectorWritable.readVector(in)); perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in)); weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size()); for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } } finally { Closeables.close(in, true); } NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI); model.validate(); return model; }
SequenceFile.Writer out; int fileNo = 0; out = createNewFile(new Path(samplePath, "file" + (fileNo++))); double[] vec = new double[this.dimension]; while ((s = in.readLine()) != null) { Vector p = new RandomAccessSparseVector(dimension); p.assign(vec); if (samplesInCurrFile >= SAMPLES_PER_FILE) { out.close(); out = createNewFile(new Path(samplePath, "file" + (fileNo++))); samplesInCurrFile = 0; out.append(new LongWritable(samplesInCurrFile++), new VectorWritable(p)); sampleNum++;
@Override protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException { int row = r.get(); for (Vector.Element e : v.get().nonZeroes()) { RandomAccessSparseVector tmp = new RandomAccessSparseVector(Integer.MAX_VALUE, 1); tmp.setQuick(row, e.get()); r.set(e.index()); ctx.write(r, new VectorWritable(tmp)); } } }
@Override protected void map(IntWritable index, VectorWritable value, Context ctx) throws IOException, InterruptedException { Vector instance = value.get(); if (weightsPerFeature == null) { weightsPerFeature = new RandomAccessSparseVector(instance.size(), instance.getNumNondefaultElements()); } int label = index.get(); weightsPerFeature.assign(instance, Functions.PLUS); weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum()); }
private void validateVectors(Path vectorPath, int[] highDFWordsDictionaryIndices, boolean prune) throws Exception { assertTrue("Path does not exist", vectorPath.getFileSystem(conf).exists(vectorPath)); for (VectorWritable value : new SequenceFileDirValueIterable<VectorWritable>(vectorPath, PathType.LIST, PathFilters .partFilter(), null, true, conf)) { Vector v = ((NamedVector) value.get()).getDelegate(); for (int i = 0; i < highDFWordsDictionaryIndices.length; i++) { if (prune) { assertEquals("Found vector for which word '" + HIGH_DF_WORDS[i] + "' is not pruned", 0.0, v .get(highDFWordsDictionaryIndices[i]), 0.0); } else { assertTrue("Found vector for which word '" + HIGH_DF_WORDS[i] + "' is pruned, and shouldn't have been", v .get(highDFWordsDictionaryIndices[i]) != 0.0); } } } } }
@Override protected void map(IntWritable key, VectorWritable value, Context context) throws IOException, InterruptedException { Vector similarityMatrixRow = value.get(); /* remove self similarity */ similarityMatrixRow.set(key.get(), Double.NaN); index.set(key.get()); vectorOrPref.set(similarityMatrixRow); context.write(index, vectorOrPref); }
Vector v1 = v0.assign(new Normal(0, 1, gen)); assertEquals(v0.get(12), v1.get(12), 0); v0.set(12, gen.nextDouble()); assertEquals(v0.get(12), v1.get(12), 0); assertSame(v0, v1); Vector v2 = vectorToTest(20).assign(new Normal(0, 1, gen)); Vector dv1 = new DenseVector(v1); Vector dv2 = new DenseVector(v2); Vector sv1 = new RandomAccessSparseVector(v1); Vector sv2 = new RandomAccessSparseVector(v2); assertEquals(0, dv1.plus(dv2).getDistanceSquared(v1.plus(v2)), FUZZ); assertEquals(0, dv1.plus(dv2).getDistanceSquared(v1.plus(dv2)), FUZZ); assertEquals(0, dv1.plus(dv2).getDistanceSquared(v1.plus(sv2)), FUZZ); assertEquals(0, dv1.plus(dv2).getDistanceSquared(sv1.plus(v2)), FUZZ); assertEquals(0, dv1.times(dv2).getDistanceSquared(v1.times(v2)), FUZZ); assertEquals(0, dv1.times(dv2).getDistanceSquared(v1.times(dv2)), FUZZ); assertEquals(0, dv1.times(dv2).getDistanceSquared(v1.times(sv2)), FUZZ); assertEquals(0, dv1.times(dv2).getDistanceSquared(sv1.times(v2)), FUZZ); assertEquals(0, dv1.minus(dv2).getDistanceSquared(v1.minus(v2)), FUZZ); assertEquals(0, dv1.minus(dv2).getDistanceSquared(v1.minus(dv2)), FUZZ); assertEquals(0, dv1.minus(dv2).getDistanceSquared(v1.minus(sv2)), FUZZ); assertEquals(0, dv1.minus(dv2).getDistanceSquared(sv1.minus(v2)), FUZZ); assertEquals(0, dv1.divide(z).getDistanceSquared(v1.divide(z)), 1.0e-12);
public void reduce(IntWritable itemIndex1, Iterable<IntWritable> itemIndex2s, Context context) throws IOException, InterruptedException { Vector cooccurrenceRow = new RandomAccessSparseVector( Integer.MAX_VALUE, 100); for (IntWritable intWritable : itemIndex2s) { int itemIndex2 = intWritable.get(); cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0); } context.write(itemIndex1, new VectorWritable(cooccurrenceRow)); } }
@Override protected void map(IntWritable r, VectorWritable v, Context ctx) throws IOException, InterruptedException { RunningAverage avg = new FullRunningAverage(); for (Vector.Element e : v.get().nonZeroes()) { avg.addDatum(e.get()); } featureVector.setQuick(r.get(), avg.getAverage()); featureVectorWritable.set(featureVector); ctx.write(firstIndex, featureVectorWritable); // prepare instance for reuse featureVector.setQuick(r.get(), 0.0d); } }
@Override public Vector classify(Vector data, ClusterClassifier prior) { List<Cluster> models = prior.getModels(); int i = 0; Vector pdfs = new DenseVector(models.size()); for (Cluster model : models) { pdfs.set(i++, model.pdf(new VectorWritable(data))); } return pdfs.assign(new TimesFunction(), 1.0 / pdfs.zSum()); }
vec1.setQuick(0, -1); vec1.setQuick(1, -3); vec1.setQuick(2, -2); double max = vec1.maxValue(); assertEquals(-1.0, max, 0.0); int idx = vec1.maxValueIndex(); assertEquals(0, idx); vec1.setQuick(0, -1); vec1.setQuick(2, -2); max = vec1.maxValue(); assertEquals(0.0, max, 0.0); idx = vec1.maxValueIndex(); assertEquals(1, idx); vec1.setQuick(0, -1); vec1.setQuick(2, -2); max = vec1.maxValue(); assertEquals(0.0, max, 0.0); idx = vec1.maxValueIndex(); assertEquals(1, idx); vec1.setQuick(0, -1);
@Test public void testProjection() { Vector v1 = new DenseVector(10).assign(Functions.random()); WeightedVector v2 = new WeightedVector(v1, v1, 31); assertEquals(v1.dot(v1), v2.getWeight(), 1.0e-13); assertEquals(31, v2.getIndex()); Matrix y = new DenseMatrix(10, 4).assign(Functions.random()); Matrix q = new QRDecomposition(y.viewPart(0, 10, 0, 3)).getQ(); Vector nullSpace = y.viewColumn(3).minus(q.times(q.transpose().times(y.viewColumn(3)))); WeightedVector v3 = new WeightedVector(q.viewColumn(0).plus(q.viewColumn(1)), nullSpace, 1); assertEquals(0, v3.getWeight(), 1.0e-13); Vector qx = q.viewColumn(0).plus(q.viewColumn(1)).normalize(); WeightedVector v4 = new WeightedVector(qx, q.viewColumn(0), 2); assertEquals(Math.sqrt(0.5), v4.getWeight(), 1.0e-13); WeightedVector v5 = WeightedVector.project(q.viewColumn(0), qx); assertEquals(Math.sqrt(0.5), v5.getWeight(), 1.0e-13); }