@Override public IntSet keySet() { return v.keySet(); }
public static <M extends Number, N extends Number> double cosSim(Vector<M> vec1, Vector<N> vec2, final StopWordList stopWordList) { double ab = 0.0; double a2 = 0.0; for (int i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } ab += (double) vec2.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); a2 += (double) vec1.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); } double b2 = 0.0; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } b2 += (double) vec2.value(i).doubleValue() * (double) vec2.value(i).doubleValue(); } return a2 > 0 && b2 > 0 ? ab / Math.sqrt(a2) / Math.sqrt(b2) : 0; }
@Override public double score(Vector<Integer> vec2) { double ab = 0.0; if (Double.isNaN(a2)) { a2 = 0.0; for (int i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } a2 += (double) vec1.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); } } double b2 = 0.0; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } ab += (double) vec2.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); b2 += (double) vec2.value(i).doubleValue() * (double) vec2.value(i).doubleValue(); } return a2 > 0 && b2 > 0 ? ab / Math.sqrt(a2) / Math.sqrt(b2) : 0; } };
public static double cosSim(double[] vec1, Vector<Integer> vec2, final StopWordList stopWordList) { double ab = 0.0; double a2 = 0.0; double b2 = 0.0; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } ab += (double) vec2.value(i).doubleValue() * vec1[i]; b2 += (double) vec2.value(i).doubleValue() * (double) vec2.value(i).doubleValue(); } for (int i = 0; i < vec1.length; i++) { if (stopWordList.contains(i)) { continue; } a2 += vec1[i] * vec1[i]; } return a2 > 0 && b2 > 0 ? ab / Math.sqrt(a2) / Math.sqrt(b2) : 0; } private static final double KLD_NEG_COST = -5;
public static double diceCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final int v1sum = vec1.size(); final int v2sum = vec2.size(); if (v1sum == 0 || v2sum == 0) { return 0; } int v12 = 0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (vec2.containsKey(i)) { v12++; } } return 2.0 * (double) v12 / (v1sum + v2sum); }
public static double rogersTanimoto(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final int N = vec1.length(); assert (vec2.length() == vec1.length()); int diff = 0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (!vec2.containsKey(i)) { diff++; } } for (Integer i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (!vec1.containsKey(i)) { diff++; } } return (double) (N - diff) / (double) (N + diff); }
public static void chol(SparseMatrix<Double> l, int k, int i, SparseMatrix<Double> a) throws IllegalArgumentException { double sum = 0; final Vector<Double> l_k = l.row(k); for (int j : l_k.keySet()) { sum += l.doubleValue(i, j) * l_k.doubleValue(j); } double a_ii = a.doubleValue(i, i); if (i == k) { if (a_ii - sum < 0) { throw new IllegalArgumentException("Matrix not positive definite"); } l.set(i, k, Math.sqrt(a_ii - sum)); } else { l.set(i, k, (a.doubleValue(i, k) - sum) / l.doubleValue(k, k)); } }
@Override public <M extends Number> Matrix<N> product(Matrix<M> B) { if (this.cols() != B.rows()) { throw new IllegalArgumentException("Matrix dimensions not suitable for product"); } if (defaultValue != 0.0 || (B instanceof SparseMatrix && ((SparseMatrix) B).defaultValue != 0.0)) { throw new UnsupportedOperationException(); } Vector<N>[] res = new Vector[this.rows()]; for (int i = 0; i < this.rows(); i++) { res[i] = using.make(B.cols(), 0.0); for (int j : this.arr[i].keySet()) { final Vector<M> r = B.row(j); for (int k : r.keySet()) { res[i].add(k, this.arr[i].doubleValue(j) * B.doubleValue(j, k)); } } } return new SparseMatrix<N>(this.rows(), res, using); }
public static double dfDiceCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, Vector<Double> df, final StopWordList stopWordList) { double num = 0.0, denom = 0.0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } if (vec2.containsKey(i)) { num += (1.0 - df.doubleValue(i)); } denom += (1.0 - df.doubleValue(i)); } for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } denom += (1.0 - df.doubleValue(i)); } return denom == 0.0 ? 0.0 : (2.0 * num / denom); }
public static double dfJaccardCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, Vector<Double> df, final StopWordList stopWordList) { double num = 0.0, denom = 0.0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } if (vec2.containsKey(i)) { num += (1.0 - df.doubleValue(i)); } if (!vec2.containsKey(i)) { denom += (1.0 - df.doubleValue(i)); } } for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } denom += (1.0 - df.doubleValue(i)); } return denom == 0.0 ? 0.0 : num / denom; }
public static SparseMatrix<Double> decomp(SparseMatrix<Double> a, boolean complete) { int m = a.rows(); SparseMatrix<Double> l = new SparseMatrix<Double>(m, m, Vectors.AS_SPARSE_REALS); //automatically initialzed to 0's for (int i = 0; i < m; i++) { if (complete) { for (int k = 0; k < (i + 1); k++) { chol(l, k, i, a); } } else { final Vector<Double> l_i = l.row(i); for (int k : l_i.keySet()) { if (k >= (i + 1)) { break; } chol(l, k, i, a); } } } return l; }