/** * Calculates the Jensen-Shannon divergence between the two counters. That is, * it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] . * This code assumes that the Counters have only non-negative values in them. * * @return The Jensen-Shannon divergence between the distributions */ public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) { // need to normalize the counters first before averaging them! Else buggy if not a probability distribution Counter<E> d1 = asNormalizedCounter(c1); Counter<E> d2 = asNormalizedCounter(c2); Counter<E> average = average(d1, d2); double kl1 = klDivergence(d1, average); double kl2 = klDivergence(d2, average); return (kl1 + kl2) / 2.0; }
/** * Calculates the skew divergence between the two counters. That is, it * calculates KL(c1 || (c2*skew + c1*(1-skew))) . In other words, how well can * c1 be represented by a "smoothed" c2. * * @return The skew divergence between the distributions */ public static <E> double skewDivergence(Counter<E> c1, Counter<E> c2, double skew) { Counter<E> d1 = asNormalizedCounter(c1); Counter<E> d2 = asNormalizedCounter(c2); Counter<E> average = linearCombination(d2, skew, d1, (1.0 - skew)); return klDivergence(d1, average); }
ClassicCounter<List<String>> cntr2 = pr.get(key); double support2 = cntr2.totalCount(); double kl = Counters.klDivergence(cntr2, cntr); answers.add(new Pair<>(key, new Double(kl * support2))); double kl = Counters.klDivergence(cntr2, cntr); answers.add(new Pair<>(key, new Double(kl * support2)));
double kl = Counters.klDivergence(cntr2, cntr); ClassicCounter cntr2 = (ClassicCounter) ((HashMap) rightRules.get(label)).get(sis); double support2 = (cntr2.totalCount()); double kl = Counters.klDivergence(cntr2, cntr); String annotatedLabel = label + "=r=" + sis; System.out.println("KL(" + annotatedLabel + "||" + label + ") = " + nf.format(kl) + "\t" + "support(" + sis + ") = " + support2);
ClassicCounter<List<String>> cntr2 = pRules.get(key); double support2 = (cntr2.totalCount()); double kl = Counters.klDivergence(cntr2, cntr); System.out.println("KL(" + key + "||" + node + ") = " + nf.format(kl) + "\t" + "support(" + key + ") = " + support2); double score = kl * support2; ClassicCounter<List<String>> cntr2 = gPRules.get(key); double support2 = (cntr2.totalCount()); double kl = Counters.klDivergence(cntr2, cntr); System.out.println("KL(" + key + "||" + node + ") = " + nf.format(kl) + "\t" + "support(" + key + ") = " + support2); double score = kl * support2;
/** * Calculate sister annotation statistics suitable for doing * selective sister splitting in the PCFGParser inside the * FactoredParser. * * @param args One argument: path to the Treebank */ public static void main(String[] args) { ClassicCounter<String> c = new ClassicCounter<>(); c.setCount("A", 0); c.setCount("B", 1); double d = Counters.klDivergence(c, c); System.out.println("KL Divergence: " + d); String encoding = "UTF-8"; if (args.length > 1) { encoding = args[1]; } if (args.length < 1) { System.out.println("Usage: ParentAnnotationStats treebankPath"); } else { SisterAnnotationStats pas = new SisterAnnotationStats(); Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), new BobChrisTreeNormalizer()), encoding); treebank.loadPath(args[0]); treebank.apply(pas); pas.printStats(); } }
/** * Calculates the Jensen-Shannon divergence between the two counters. * That is, it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] . * * @return The Jensen-Shannon divergence between the distributions */ public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) { Counter<E> average = average(c1, c2); double kl1 = klDivergence(c1, average); double kl2 = klDivergence(c2, average); return (kl1 + kl2) / 2.0; }
/** * Calculates the Jensen-Shannon divergence between the two counters. That is, * it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] . * * @return The Jensen-Shannon divergence between the distributions */ public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) { Counter<E> average = average(c1, c2); double kl1 = klDivergence(c1, average); double kl2 = klDivergence(c2, average); return (kl1 + kl2) / 2.0; }
/** * Calculates the skew divergence between the two counters. That is, it * calculates KL(c1 || (c2*skew + c1*(1-skew))) . In other words, how well can * c1 be represented by a "smoothed" c2. * * @return The skew divergence between the distributions */ public static <E> double skewDivergence(Counter<E> c1, Counter<E> c2, double skew) { Counter<E> average = linearCombination(c2, skew, c1, (1.0 - skew)); return klDivergence(c1, average); }
/** * Calculates the skew divergence between the two counters. * That is, it calculates KL(c1 || (c2*skew + c1*(1-skew))) . * In other words, how well can c1 be represented by a "smoothed" c2. * * @return The skew divergence between the distributions */ public static <E> double skewDivergence(Counter<E> c1, Counter<E> c2, double skew) { Counter<E> average = linearCombination(c2, skew, c1, (1.0 - skew)); return klDivergence(c1, average); }
/** * Calculates the Jensen-Shannon divergence between the two counters. That is, * it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] . * This code assumes that the Counters have only non-negative values in them. * * @return The Jensen-Shannon divergence between the distributions */ public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) { // need to normalize the counters first before averaging them! Else buggy if not a probability distribution Counter<E> d1 = asNormalizedCounter(c1); Counter<E> d2 = asNormalizedCounter(c2); Counter<E> average = average(d1, d2); double kl1 = klDivergence(d1, average); double kl2 = klDivergence(d2, average); return (kl1 + kl2) / 2.0; }
/** * Calculates the Jensen-Shannon divergence between the two counters. That is, * it calculates 1/2 [KL(c1 || avg(c1,c2)) + KL(c2 || avg(c1,c2))] . * This code assumes that the Counters have only non-negative values in them. * * @return The Jensen-Shannon divergence between the distributions */ public static <E> double jensenShannonDivergence(Counter<E> c1, Counter<E> c2) { // need to normalize the counters first before averaging them! Else buggy if not a probability distribution Counter<E> d1 = asNormalizedCounter(c1); Counter<E> d2 = asNormalizedCounter(c2); Counter<E> average = average(d1, d2); double kl1 = klDivergence(d1, average); double kl2 = klDivergence(d2, average); return (kl1 + kl2) / 2.0; }
/** * Calculates the skew divergence between the two counters. That is, it * calculates KL(c1 || (c2*skew + c1*(1-skew))) . In other words, how well can * c1 be represented by a "smoothed" c2. * * @return The skew divergence between the distributions */ public static <E> double skewDivergence(Counter<E> c1, Counter<E> c2, double skew) { Counter<E> d1 = asNormalizedCounter(c1); Counter<E> d2 = asNormalizedCounter(c2); Counter<E> average = linearCombination(d2, skew, d1, (1.0 - skew)); return klDivergence(d1, average); }
/** * Calculates the skew divergence between the two counters. That is, it * calculates KL(c1 || (c2*skew + c1*(1-skew))) . In other words, how well can * c1 be represented by a "smoothed" c2. * * @return The skew divergence between the distributions */ public static <E> double skewDivergence(Counter<E> c1, Counter<E> c2, double skew) { Counter<E> d1 = asNormalizedCounter(c1); Counter<E> d2 = asNormalizedCounter(c2); Counter<E> average = linearCombination(d2, skew, d1, (1.0 - skew)); return klDivergence(d1, average); }
ClassicCounter<List<String>> cntr2 = pr.get(key); double support2 = cntr2.totalCount(); double kl = Counters.klDivergence(cntr2, cntr); answers.add(new Pair<>(key, new Double(kl * support2))); double kl = Counters.klDivergence(cntr2, cntr); answers.add(new Pair<>(key, new Double(kl * support2)));
ClassicCounter<List<String>> cntr2 = pr.get(key); double support2 = cntr2.totalCount(); double kl = Counters.klDivergence(cntr2, cntr); answers.add(new Pair<>(key, new Double(kl * support2))); double kl = Counters.klDivergence(cntr2, cntr); answers.add(new Pair<>(key, new Double(kl * support2)));
c.setCount("B", 1); double d = Counters.klDivergence(c, c); System.out.println("KL Divergence: " + d);
c.setCount("B", 1); double d = Counters.klDivergence(c, c); System.out.println("KL Divergence: " + d);
/** * Calculate sister annotation statistics suitable for doing * selective sister splitting in the PCFGParser inside the * FactoredParser. * * @param args One argument: path to the Treebank */ public static void main(String[] args) { ClassicCounter<String> c = new ClassicCounter<>(); c.setCount("A", 0); c.setCount("B", 1); double d = Counters.klDivergence(c, c); System.out.println("KL Divergence: " + d); String encoding = "UTF-8"; if (args.length > 1) { encoding = args[1]; } if (args.length < 1) { System.out.println("Usage: ParentAnnotationStats treebankPath"); } else { SisterAnnotationStats pas = new SisterAnnotationStats(); Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), new BobChrisTreeNormalizer()), encoding); treebank.loadPath(args[0]); treebank.apply(pas); pas.printStats(); } }
/** * Calculate sister annotation statistics suitable for doing * selective sister splitting in the PCFGParser inside the * FactoredParser. * * @param args One argument: path to the Treebank */ public static void main(String[] args) { ClassicCounter<String> c = new ClassicCounter<>(); c.setCount("A", 0); c.setCount("B", 1); double d = Counters.klDivergence(c, c); System.out.println("KL Divergence: " + d); String encoding = "UTF-8"; if (args.length > 1) { encoding = args[1]; } if (args.length < 1) { System.out.println("Usage: ParentAnnotationStats treebankPath"); } else { SisterAnnotationStats pas = new SisterAnnotationStats(); Treebank treebank = new DiskTreebank(in -> new PennTreeReader(in, new LabeledScoredTreeFactory(new StringLabelFactory()), new BobChrisTreeNormalizer()), encoding); treebank.loadPath(args[0]); treebank.apply(pas); pas.printStats(); } }