@Override public <M extends Number> double innerProduct(Vector<M> y) { assert (y.length() == data.length); if (y instanceof RealVector) { final RealVector y2 = (RealVector) y; double innerProduct = 0.0; for (int i = 0; i < data.length; i++) { innerProduct += data[i] * y2.data[i]; } return innerProduct; } else if (y.defaultValue().doubleValue() == 0.0) { double innerProduct = 0.0; for (Map.Entry<Integer, M> e : y.entrySet()) { innerProduct += data[e.getKey()] * e.getValue().doubleValue(); } return innerProduct; } else { double innerProduct = 0.0; for (int i = 0; i < data.length; i++) { innerProduct += data[i] * y.doubleValue(i); } return innerProduct; } }
public static double normalCosSim(Vector<Integer> vec1, Vector<Integer> vec2, Vector<Double> mu, double sumMu2, final StopWordList stopWordList) { final int v1sum = vec1.sum(); final int v2sum = vec2.sum(); if (v1sum == 0 || v2sum == 0) { return 0; for (int i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; if (i > mu.size()) { continue; final double v1i = vec1.value(i).doubleValue();// / v1sum; final double v2i = vec2.value(i).doubleValue();// / v2sum; ab += v2i * v1i - v2sum * mu.doubleValue(i) * v1i - v1sum * mu.doubleValue(i) * v2i; a2 += v1i * v1i - 2 * v1sum * mu.doubleValue(i) * v1i; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; if (i > mu.size()) { continue; final double v2i = vec2.value(i).doubleValue();// / v2sum; if (!vec1.containsKey(i)) { ab -= v1sum * mu.doubleValue(i) * v2i; b2 += v2i * v2i - 2 * v2sum * mu.doubleValue(i) * v2i;
@Override public int add(int idx, int val) { return v.add(idx, val); }
public static double diceCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final int v1sum = vec1.size(); final int v2sum = vec2.size(); if (v1sum == 0 || v2sum == 0) { return 0; } int v12 = 0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (vec2.containsKey(i)) { v12++; } } return 2.0 * (double) v12 / (v1sum + v2sum); }
@Override public <M extends Number, O extends Number> Vector<O> mult(Vector<M> x, Vectors.Factory<O> using) { assert (x.length() == alpha.length); final Vector<O> product = using.make(alpha.length, 0.0); for (int i = 0; i < alpha.length; i++) { double value = 0.0; if (i > 0) { value += x.doubleValue(i - 1) * beta[i - 1]; } value += x.doubleValue(i) * alpha[i]; if (i < beta.length) { value += x.doubleValue(i + 1) * beta[i]; } product.put(i, value); } return product; }
@Override public <M extends Number> void sub(Vector<M> vector) { assert (vector.length() == data.length); if (vector instanceof IntVector) { final int[] data2 = ((IntVector) vector).data; for (int i = 0; i < data.length; i++) { data[i] -= data2[i]; } } else { for (Map.Entry<Integer, M> e : vector.entrySet()) { data[e.getKey()] -= e.getValue().intValue(); } } }
/** * Compute the householder vector of a given vector * * Complexity: O(sparsity(n)) * * @return A vector with v(1) = 1 and (I - 2vv^T /v^Tv)x is zero in all but * first component */ public static Vector<Double> house(Vector<Double> x, int j) { final int n = x.length(); double mu = 0.0; final double[] x2 = x.toDoubleArray(); for (int i = j; i < n; i++) { mu += x2[i] * x2[i]; } mu = Math.sqrt(mu); final Vector<Double> v = x.clone(); if (mu != 0.0) { final double beta = x.doubleValue(0) + Math.signum(x.doubleValue(0)) * mu; for (int i = 1; i < v.length(); i++) { v.divide(i, beta); } } v.put(0, 1); return v; }
@Override @SuppressWarnings("unchecked") public <M extends Number, O extends Number> Matrix<O> outerProduct(Vector<M> y, Factory<O> using) { if (using == Vectors.AS_INTS) { int[][] data2 = new int[data.length][y.length()]; for (int i = 0; i < data.length; i++) { for (int j = 0; j < y.length(); j++) { data2[i][j] = data[i] * y.intValue(j); } } return (Matrix<O>) new IntArrayMatrix(data2); } else if (using == Vectors.AS_REALS) { double[][] data2 = new double[data.length][y.length()]; for (int i = 0; i < data.length; i++) { for (int j = 0; j < y.length(); j++) { data2[i][j] = y.doubleValue(j) * data[i]; } } return (Matrix<O>) new DoubleArrayMatrix(data2); } else { final SparseMatrix<O> matrix = new SparseMatrix<O>(data.length, y.length(), using); for (int i = 0; i < data.length; i++) { for (Map.Entry<Integer, M> e : y.entrySet()) { matrix.set(i, e.getKey(), e.getValue().doubleValue() * data[i]); } } return matrix; } }
public static double dfDiceCoefficient(Vector<Integer> vec1, Vector<Integer> vec2, Vector<Double> df, final StopWordList stopWordList) { double num = 0.0, denom = 0.0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } if (vec2.containsKey(i)) { num += (1.0 - df.doubleValue(i)); } denom += (1.0 - df.doubleValue(i)); } for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (i >= df.size()) { continue; } denom += (1.0 - df.doubleValue(i)); } return denom == 0.0 ? 0.0 : (2.0 * num / denom); }
public static double kullbackLeiblerDivergence(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final double N1 = vec1.sum(), N2 = vec2.sum(); double kld = 0.0; for (Map.Entry<Integer, Integer> e : vec1.entrySet()) { if (stopWordList.contains(e.getKey())) { continue; } final int tfj = vec2.value(e.getKey()).intValue(); if (tfj != 0) { kld += (double) e.getValue() / N1 * Math.max(KLD_NEG_COST, Math.log(N2 / N1 * e.getValue() * tfj)); } else { kld += (double) e.getValue() / N1 * KLD_NEG_COST; } } return kld; }
@Override @SuppressWarnings("unchecked") public <M extends Number, O extends Number> Matrix<O> outerProduct(Vector<M> y, Vectors.Factory<O> using) { if (using == Vectors.AS_INTS) { int[][] data2 = new int[n][y.length()]; for (Map.Entry<Integer, Double> e : entrySet()) { for (int j = 0; j < y.length(); j++) { data2[e.getKey()][j] = (int) (e.getValue().doubleValue() * y.doubleValue(j)); } } return (Matrix<O>) new IntArrayMatrix(data2); } else if (using == Vectors.AS_REALS) { double[][] data2 = new double[n][y.length()]; for (Map.Entry<Integer, Double> e : entrySet()) { for (int j = 0; j < y.length(); j++) { data2[e.getKey()][j] = y.doubleValue(j) * e.getValue().doubleValue(); } } return (Matrix<O>) new DoubleArrayMatrix(data2); } else { final SparseMatrix<O> matrix = new SparseMatrix<O>(n, y.length(), using); for (Map.Entry<Integer, Double> e : entrySet()) { for (Map.Entry<Integer, M> e2 : y.entrySet()) { matrix.set(e.getKey(), e2.getKey(), e2.getValue().doubleValue() * e.getKey().doubleValue()); } } return matrix; } }
assert (m == x.length()); if (x instanceof RealVector) { double[] x2 = ((RealVector) x).data(); value = arr[i].innerProduct(x); y.put(i, using.valueOf(value)); } else { double v = 0.0; for (Map.Entry<Integer, M> e : x.entrySet()) { v += defaultValue * e.getValue().doubleValue(); value = arr[i].innerProduct(x); y.put(i, using.valueOf(value));
@Override public <M extends Number> void sub(Vector<M> vector) { assert (vector.length() != length / SIZE_OF_DOUBLE); final ByteBuffer d = data(); for (int i = 0; i < length; i += SIZE_OF_DOUBLE) { d.position(i); double v = d.getDouble(); d.position(i); d.putDouble(v - vector.doubleValue(i / SIZE_OF_DOUBLE)); } }
@Override public <M extends Number> Vector<N> multTransposed(Vector<M> x) { assert (m == cols()); double[] result = new double[m]; Arrays.fill(result, m * defaultValue); for (int i = 0; i < arr.length; i++) { for (Map.Entry<Integer, N> e : arr[i].entrySet()) { result[e.getKey().intValue()] += x.doubleValue(i) * e.getValue().doubleValue() - defaultValue; } } return using.make(result); }
/** * Multiple this matrix by a sparse array and yield a dense vector * * @param x The vector * @return The result */ public double[] multRealDense(Vector<Double> x) { assert (m == x.length()); final double[] y = new double[m]; for (int i = 0; i < m; i++) { if (arr[i] == null) { if (defaultValue == 0.0) { continue; } else { double v = 0.0; for (Map.Entry<Integer, Double> e : x.entrySet()) { v += defaultValue * e.getValue(); } y[i] = v; } } else { y[i] = arr[i].innerProduct(x); } } return y; }
public static double rogersTanimoto(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final int N = vec1.length(); assert (vec2.length() == vec1.length()); int diff = 0; for (Integer i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } if (!vec2.containsKey(i)) { diff++; } } for (Integer i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } if (!vec1.containsKey(i)) { diff++; } } return (double) (N - diff) / (double) (N + diff); }
public static <M extends Number, N extends Number> double cosSim(Vector<M> vec1, Vector<N> vec2, final StopWordList stopWordList) { double ab = 0.0; double a2 = 0.0; for (int i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } ab += (double) vec2.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); a2 += (double) vec1.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); } double b2 = 0.0; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } b2 += (double) vec2.value(i).doubleValue() * (double) vec2.value(i).doubleValue(); } return a2 > 0 && b2 > 0 ? ab / Math.sqrt(a2) / Math.sqrt(b2) : 0; }
private Vector<Double> simVec(Vector<Integer> termVec, int l) { final int n = termVec.sum(); final int[] x = new int[n]; final int[] z = new int[n]; final int[] N_k = new int[K]; final int[][] N_wk = new int[W][K]; int j = 0; for(Map.Entry<Integer,Integer> e : termVec.entrySet()) { for(int i = 0; i < e.getValue(); i++) { final int k = random.nextInt(K); x[j] = e.getKey()-1; z[j] = k; N_k[k]++; N_wk[e.getKey()-1][k]++; j++; } } for(int iter = 0; iter < 100; iter++) { for(int i = 0; i < n; i++) { final int oldK = z[i]; final int k = sample(x[i], l, oldK, N_k, N_wk); N_k[oldK]--; N_wk[x[i]][oldK]--; z[i] = k; N_wk[x[i]][k]++; N_k[k]++; } } return new Integer2DoubleVector(SparseIntArray.histogram(z, K)); }