/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }