@Override public Double value(int idx) { return v.value(idx).doubleValue(); }
@Override public N value(int i, int j) { if (arr[i] == null) { return using.valueOf(defaultValue); } else { return arr[i].value(j); } }
public static <M extends Number, N extends Number> double cosSim(Vector<M> vec1, Vector<N> vec2, final StopWordList stopWordList) { double ab = 0.0; double a2 = 0.0; for (int i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } ab += (double) vec2.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); a2 += (double) vec1.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); } double b2 = 0.0; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } b2 += (double) vec2.value(i).doubleValue() * (double) vec2.value(i).doubleValue(); } return a2 > 0 && b2 > 0 ? ab / Math.sqrt(a2) / Math.sqrt(b2) : 0; }
@Override public double score(Vector<Integer> vec2) { double ab = 0.0; if (Double.isNaN(a2)) { a2 = 0.0; for (int i : vec1.keySet()) { if (stopWordList.contains(i)) { continue; } a2 += (double) vec1.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); } } double b2 = 0.0; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } ab += (double) vec2.value(i).doubleValue() * (double) vec1.value(i).doubleValue(); b2 += (double) vec2.value(i).doubleValue() * (double) vec2.value(i).doubleValue(); } return a2 > 0 && b2 > 0 ? ab / Math.sqrt(a2) / Math.sqrt(b2) : 0; } };
public static double cosSim(double[] vec1, Vector<Integer> vec2, final StopWordList stopWordList) { double ab = 0.0; double a2 = 0.0; double b2 = 0.0; for (int i : vec2.keySet()) { if (stopWordList.contains(i)) { continue; } ab += (double) vec2.value(i).doubleValue() * vec1[i]; b2 += (double) vec2.value(i).doubleValue() * (double) vec2.value(i).doubleValue(); } for (int i = 0; i < vec1.length; i++) { if (stopWordList.contains(i)) { continue; } a2 += vec1[i] * vec1[i]; } return a2 > 0 && b2 > 0 ? ab / Math.sqrt(a2) / Math.sqrt(b2) : 0; } private static final double KLD_NEG_COST = -5;
@Override public boolean isSymmetric() { if (m != n) { return false; } for (int i = 0; i < m; i++) { if (arr[i] == null) { continue; } for (Map.Entry<Integer, N> e : arr[i].entrySet()) { if (arr[e.getKey()] == null) { return false; } if (arr[e.getKey()].value(i).doubleValue() != e.getValue().doubleValue()) { return false; } } } return true; }
@Override public void add(int i, int j, N v) { if (arr[i] == null) { arr[i] = using.make(n, defaultValue); arr[i].put(j, v.doubleValue() + defaultValue); } else { arr[i].put(j, arr[i].value(j).doubleValue() + v.doubleValue()); } }
@Override public void add(int i, int j, double v) { if (arr[i] == null) { arr[i] = using.make(n, defaultValue); arr[i].put(j, using.valueOf(v + defaultValue)); } else { arr[i].put(j, arr[i].value(j).doubleValue() + v); } }
@Override public void add(int i, int j, int v) { if (arr[i] == null) { arr[i] = using.make(n, defaultValue); arr[i].put(j, using.valueOf(v + defaultValue)); } else { arr[i].put(j, arr[i].value(j).intValue() + v); } }
public static double kullbackLeiblerDivergence(Vector<Integer> vec1, Vector<Integer> vec2, final StopWordList stopWordList) { final double N1 = vec1.sum(), N2 = vec2.sum(); double kld = 0.0; for (Map.Entry<Integer, Integer> e : vec1.entrySet()) { if (stopWordList.contains(e.getKey())) { continue; } final int tfj = vec2.value(e.getKey()).intValue(); if (tfj != 0) { kld += (double) e.getValue() / N1 * Math.max(KLD_NEG_COST, Math.log(N2 / N1 * e.getValue() * tfj)); } else { kld += (double) e.getValue() / N1 * KLD_NEG_COST; } } return kld; }
public static Vector<Integer> filter(final IntSet salients, final Vector<Integer> source) { final SparseIntArray filtered = new SparseIntArray(source.length()); for (int i : salients) { filtered.add(i, source.value(i)); } return filtered; }
continue; final double v1i = vec1.value(i).doubleValue();// / v1sum; final double v2i = vec2.value(i).doubleValue();// / v2sum; ab += v2i * v1i - v2sum * mu.doubleValue(i) * v1i - v1sum * mu.doubleValue(i) * v2i; a2 += v1i * v1i - 2 * v1sum * mu.doubleValue(i) * v1i; continue; final double v2i = vec1.value(i).doubleValue();// / v2sum;
continue; final double v1i = vec1.value(i).doubleValue();// / v1sum; final double v2i = vec2.value(i).doubleValue();// / v2sum; ab += v2i * v1i - v2sum * mu.doubleValue(i) * v1i - v1sum * mu.doubleValue(i) * v2i; a2 += v1i * v1i - 2 * v1sum * mu.doubleValue(i) * v1i; continue; final double v2i = vec2.value(i).doubleValue();// / v2sum; if (!vec1.containsKey(i)) { ab -= v1sum * mu.doubleValue(i) * v2i;
public static IntSet mostSalient(final File reference, final File corpus, int W, int topN, SourceType sourceType) throws IOException { final PrecomputedValues precomp1 = PrecomputedValues.precompute(reference, W, SourceType.SIMPLE); final PrecomputedValues precomp2 = PrecomputedValues.precompute(corpus, W, sourceType); final double[] salience = new double[W]; final IntRBTreeSet topNWords = new IntRBTreeSet(new IntComparator() { @Override public int compare(int i, int i1) { return salience[i] < salience[i1] ? -1 : (salience[i] > salience[i1] ? 1 : i - i1); } @Override public int compare(Integer o1, Integer o2) { return compare(o1.intValue(), o2.intValue()); } }); for (int w = 0; w < W; w++) { final double val = precomp1.mu.value(w); final Double val2 = precomp2.mu.value(w); if (val != 0.0 && val2 != 0.0) { salience[w] = val / val2; if (topNWords.size() < topN) { topNWords.add(w); } else if (salience[w] > salience[topNWords.firstInt()]) { topNWords.remove(topNWords.first()); topNWords.add(w); } } } return topNWords; }