private boolean hasInside() { if (innerIt == null || !innerIt.hasNext()) { if (!outerIt.hasNext()) { return false; } curKey = outerIt.next(); innerIt = getCounter(curKey).keySet().iterator(); } return true; }
/** * This method will remove all elements with counts below given threshold from counter * @param threshold */ public void dropElementsBelowThreshold(double threshold) { Iterator<T> iterator = keySet().iterator(); while (iterator.hasNext()) { T element = iterator.next(); double val = map.get(element).get(); if (val < threshold) { iterator.remove(); dirty.set(true); } } }
/** * This method will increment counts of this counter by counts from other counter * @param other */ public <T2 extends T> void incrementAll(Counter<T2> other) { for (T2 element: other.keySet()) { double cnt = other.getCount(element); incrementCount(element, cnt); } }
/** * This method will apply normalization to counter values and totals. */ public void normalize() { for (T key : keySet()) { setCount(key, getCount(key) / totalCount.get()); } rebuildTotals(); }
protected void rebuildTotals() { totalCount.set(0); for (T key : keySet()) { totalCount.addAndGet(getCount(key)); } dirty.set(false); }
/** * Calculate string similarity with tfidf weights relative to each character * frequency and how many times a character appears in a given string * @param strings the strings to calculate similarity for * @return the cosine similarity between the strings */ public static double stringSimilarity(String... strings) { if (strings == null) return 0; Counter<String> counter = new Counter<>(); Counter<String> counter2 = new Counter<>(); for (int i = 0; i < strings[0].length(); i++) counter.incrementCount(String.valueOf(strings[0].charAt(i)), 1.0f); for (int i = 0; i < strings[1].length(); i++) counter2.incrementCount(String.valueOf(strings[1].charAt(i)), 1.0f); Set<String> v1 = counter.keySet(); Set<String> v2 = counter2.keySet(); Set<String> both = SetUtils.intersection(v1, v2); double sclar = 0, norm1 = 0, norm2 = 0; for (String k : both) sclar += counter.getCount(k) * counter2.getCount(k); for (String k : v1) norm1 += counter.getCount(k) * counter.getCount(k); for (String k : v2) norm2 += counter2.getCount(k) * counter2.getCount(k); return sclar / Math.sqrt(norm1 * norm2); }
private boolean hasInside() { if (innerIt == null || !innerIt.hasNext()) { if (!outerIt.hasNext()) { return false; } curKey = outerIt.next(); innerIt = getCounter(curKey).keySet().iterator(); } return true; }
/** * This method will remove all elements with counts below given threshold from counter * @param threshold */ public void dropElementsBelowThreshold(double threshold) { Iterator<T> iterator = keySet().iterator(); while (iterator.hasNext()) { T element = iterator.next(); double val = map.get(element).get(); if (val < threshold) { iterator.remove(); dirty.set(true); } } }
/** * This method will increment counts of this counter by counts from other counter * @param other */ public <T2 extends T> void incrementAll(Counter<T2> other) { for (T2 element: other.keySet()) { double cnt = other.getCount(element); incrementCount(element, cnt); } }
/** * Calculate string similarity with tfidf weights relative to each character * frequency and how many times a character appears in a given string * @param strings the strings to calculate similarity for * @return the cosine similarity between the strings */ public static double stringSimilarity(String... strings) { if (strings == null) return 0; Counter<String> counter = new Counter<>(); Counter<String> counter2 = new Counter<>(); for (int i = 0; i < strings[0].length(); i++) counter.incrementCount(String.valueOf(strings[0].charAt(i)), 1.0f); for (int i = 0; i < strings[1].length(); i++) counter2.incrementCount(String.valueOf(strings[1].charAt(i)), 1.0f); Set<String> v1 = counter.keySet(); Set<String> v2 = counter2.keySet(); Set<String> both = SetUtils.intersection(v1, v2); double sclar = 0, norm1 = 0, norm2 = 0; for (String k : both) sclar += counter.getCount(k) * counter2.getCount(k); for (String k : v1) norm1 += counter.getCount(k) * counter.getCount(k); for (String k : v2) norm2 += counter2.getCount(k) * counter2.getCount(k); return sclar / Math.sqrt(norm1 * norm2); }
protected void rebuildTotals() { totalCount.set(0); for (T key : keySet()) { totalCount.addAndGet(getCount(key)); } dirty.set(false); }
/** * Calculate string similarity with tfidf weights relative to each character * frequency and how many times a character appears in a given string * @param strings the strings to calculate similarity for * @return the cosine similarity between the strings */ public static double stringSimilarity(String... strings) { if (strings == null) return 0; Counter<String> counter = new Counter<>(); Counter<String> counter2 = new Counter<>(); for (int i = 0; i < strings[0].length(); i++) counter.incrementCount(String.valueOf(strings[0].charAt(i)), 1.0f); for (int i = 0; i < strings[1].length(); i++) counter2.incrementCount(String.valueOf(strings[1].charAt(i)), 1.0f); Set<String> v1 = counter.keySet(); Set<String> v2 = counter2.keySet(); Set<String> both = SetUtils.intersection(v1, v2); double sclar = 0, norm1 = 0, norm2 = 0; for (String k : both) sclar += counter.getCount(k) * counter2.getCount(k); for (String k : v1) norm1 += counter.getCount(k) * counter.getCount(k); for (String k : v2) norm2 += counter2.getCount(k) * counter2.getCount(k); return sclar / Math.sqrt(norm1 * norm2); }
/** * This method will apply normalization to counter values and totals. */ public void normalize() { for (T key : keySet()) { setCount(key, getCount(key) / totalCount.get()); } rebuildTotals(); }
public StringGrid getRowsWithDuplicateValuesInColumn(int column) { checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); List<String> columns = getColumn(column); Counter<String> counter = new Counter<>(); for (String val : columns) counter.incrementCount(val, 1.0f); counter.dropElementsBelowThreshold(2.0f); Set<String> keys = counter.keySet(); for (List<String> row : this) { for (String key : keys) if (row.get(column).equals(key)) grid.addRow(row); } return grid; }
public StringGrid getRowWithOnlyOneOccurrence(int column) { checkInvalidColumn(column); StringGrid grid = new StringGrid(sep, numColumns); List<String> columns = getColumn(column); Counter<String> counter = new Counter<>(); for (String val : columns) counter.incrementCount(val, 1.0f); Set<String> keys = new HashSet<>(counter.keySet()); for (String key : keys) { if (counter.getCount(key) > 1) { counter.removeKey(key); } } for (List<String> row : this) { for (String key : keys) if (row.get(column).equals(key)) grid.addRow(row); } return grid; }