@Override public void setup() { d = new org.apache.commons.text.similarity.CosineDistance(); }
@Override public void setup() { d = new org.apache.commons.text.similarity.JaccardDistance(); }
@Override public void setup() { d = new org.apache.commons.text.similarity.LevenshteinDistance(); }
/** * Compares two Strings with respect to the base String, by Levenshtein distance. * <p/> * The input that is the closest match to the base String will sort before the other. * * @param a an input to compare relative to the base. * @param b an input to compare relative to the base. * * @return -1 if {@code a} is closer to the base than {@code b}; 1 if {@code b} is * closer to the base than {@code a}; 0 if both {@code a} and {@code b} are * equally close to the base. */ @Override public int compare(String a, String b) { // shortcuts if (a.equals(b)) { return 0; // comparing the same value; don't bother } else if (a.equals(base)) { return -1; // a is equal to the base, so it's always first } else if (b.equals(base)) { return 1; // b is equal to the base, so it's always first } // determine which of the two is closer to the base and order it first return Integer.compare(LEVENSHTEIN_DISTANCE.apply(a, base), LEVENSHTEIN_DISTANCE.apply(b, base)); }
/** * Whether {@code term} is at X Lenvenstein of a {@code value} * with X=: * - 0 for strings of one or two characters * - 1 for strings of three, four or five characters * - 2 for strings of more than five characters * @param value * @param term * @return true if {@code term} is similar to {@code value} */ private static boolean isFuzzy(String term, String value){ int distance; term = term.trim(); if (term.length() < 3) { distance = 0; } else if (term.length() < 6) { distance = 1; } else { distance = 2; } return LevenshteinDistance.getDefaultInstance().apply(value, term)<=distance; }
@Signature public Double jaroWinklerDistance(Environment env, Memory other) { JaroWinklerDistance distance = new JaroWinklerDistance(); return distance.apply(text, other.toString()); }
@Signature public Double cosineDistance(Environment env, Memory other) { CosineDistance cosineDistance = new CosineDistance(); return cosineDistance.apply(text, other.toString()); }
@Signature public Double jaccardDistance(Environment env, Memory other) { JaccardDistance distance = new JaccardDistance(); return distance.apply(text, other.toString()); }
@Signature public Integer hammingDistance(Environment env, Memory other) { HammingDistance distance = new HammingDistance(); return distance.apply(text, other.toString()); }
@Override public void setup() { d = new org.apache.commons.text.similarity.FuzzyScore(java.util.Locale.ENGLISH); }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.fuzzyScore(input1, input2); } }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }
@Override public void setup() { d = new org.apache.commons.text.similarity.JaroWinklerDistance(); }
@Override public void setup() { d = new org.apache.commons.text.similarity.HammingDistance(); }
@Override public void setup() { d = new org.apache.commons.text.similarity.LongestCommonSubsequenceDistance(); }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }
@Override public void eval() { String input1 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput1.start, rawInput1.end, rawInput1.buffer); String input2 = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(rawInput2.start, rawInput2.end, rawInput2.buffer); out.value = d.apply(input1, input2); } }