@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new GND(backgroundIsSuperset); }
/** * Calculates Google Normalized Distance, as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 * link: http://arxiv.org/pdf/cs/0412098v3.pdf */ @Override public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) { Frequencies frequencies = computeNxys(subsetFreq, subsetSize, supersetFreq, supersetSize, "GND"); double fx = frequencies.N1_; double fy = frequencies.N_1; double fxy = frequencies.N11; double N = frequencies.N; if (fxy == 0) { // no co-occurrence return 0.0; } if ((fx == fy) && (fx == fxy)) { // perfect co-occurrence return 1.0; } double score = (Math.max(Math.log(fx), Math.log(fy)) - Math.log(fxy)) / (Math.log(N) - Math.min(Math.log(fx), Math.log(fy))); //we must invert the order of terms because GND scores relevant terms low score = Math.exp(-1.0d * score); return score; }
@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new GND(backgroundIsSuperset); }
/** * Calculates Google Normalized Distance, as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 * link: http://arxiv.org/pdf/cs/0412098v3.pdf */ @Override public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) { Frequencies frequencies = computeNxys(subsetFreq, subsetSize, supersetFreq, supersetSize, "GND"); double fx = frequencies.N1_; double fy = frequencies.N_1; double fxy = frequencies.N11; double N = frequencies.N; if (fxy == 0) { // no co-occurrence return 0.0; } if ((fx == fy) && (fx == fxy)) { // perfect co-occurrence return 1.0; } double score = (Math.max(Math.log(fx), Math.log(fy)) - Math.log(fxy)) / (Math.log(N) - Math.min(Math.log(fx), Math.log(fy))); //we must invert the order of terms because GND scores relevant terms low score = Math.exp(-1.0d * score); return score; }
@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new GND(backgroundIsSuperset); }
/** * Calculates Google Normalized Distance, as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 * link: http://arxiv.org/pdf/cs/0412098v3.pdf */ @Override public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) { Frequencies frequencies = computeNxys(subsetFreq, subsetSize, supersetFreq, supersetSize, "GND"); double fx = frequencies.N1_; double fy = frequencies.N_1; double fxy = frequencies.N11; double N = frequencies.N; if (fxy == 0) { // no co-occurrence return 0.0; } if ((fx == fy) && (fx == fxy)) { // perfect co-occurrence return 1.0; } double score = (Math.max(Math.log(fx), Math.log(fy)) - Math.log(fxy)) / (Math.log(N) - Math.min(Math.log(fx), Math.log(fy))); //we must invert the order of terms because GND scores relevant terms low score = Math.exp(-1.0d * score); return score; }
@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new GND(backgroundIsSuperset); }
/** * Calculates Google Normalized Distance, as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 * link: http://arxiv.org/pdf/cs/0412098v3.pdf */ @Override public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) { Frequencies frequencies = computeNxys(subsetFreq, subsetSize, supersetFreq, supersetSize, "GND"); double fx = frequencies.N1_; double fy = frequencies.N_1; double fxy = frequencies.N11; double N = frequencies.N; if (fxy == 0) { // no co-occurrence return 0.0; } if ((fx == fy) && (fx == fxy)) { // perfect co-occurrence return 1.0; } double score = (Math.max(Math.log(fx), Math.log(fy)) - Math.log(fxy)) / (Math.log(N) - Math.min(Math.log(fx), Math.log(fy))); //we must invert the order of terms because GND scores relevant terms low score = Math.exp(-1.0d * score); return score; }
@Override protected SignificanceHeuristic newHeuristic(boolean includeNegatives, boolean backgroundIsSuperset) { return new GND(backgroundIsSuperset); }
/** * Calculates Google Normalized Distance, as described in "The Google Similarity Distance", Cilibrasi and Vitanyi, 2007 * link: http://arxiv.org/pdf/cs/0412098v3.pdf */ @Override public double getScore(long subsetFreq, long subsetSize, long supersetFreq, long supersetSize) { Frequencies frequencies = computeNxys(subsetFreq, subsetSize, supersetFreq, supersetSize, "GND"); double fx = frequencies.N1_; double fy = frequencies.N_1; double fxy = frequencies.N11; double N = frequencies.N; if (fxy == 0) { // no co-occurrence return 0.0; } if ((fx == fy) && (fx == fxy)) { // perfect co-occurrence return 1.0; } double score = (Math.max(Math.log(fx), Math.log(fy)) - Math.log(fxy)) / (Math.log(N) - Math.min(Math.log(fx), Math.log(fy))); //we must invert the order of terms because GND scores relevant terms low score = Math.exp(-1.0d * score); return score; }
@Override public SignificanceHeuristic readResult(StreamInput in) throws IOException { return new GND(in.readBoolean()); }