/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
return wordShapeChris1(inStr); case WORDSHAPEDAN2: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2USELC: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2BIO: return wordShapeDan2Bio(inStr, knownLCWords);
return wordShapeChris1(inStr); case WORDSHAPEDAN2: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2USELC: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2BIO: return wordShapeDan2Bio(inStr, knownLCWords);
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
return wordShapeChris1(inStr); case WORDSHAPEDAN2: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2USELC: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2BIO: return wordShapeDan2Bio(inStr, knownLCWords);
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
return wordShapeChris1(inStr); case WORDSHAPEDAN2: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2USELC: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2BIO: return wordShapeDan2Bio(inStr, knownLCWords);
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }
return wordShapeChris1(inStr); case WORDSHAPEDAN2: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2USELC: return wordShapeDan2(inStr, knownLCWords); case WORDSHAPEDAN2BIO: return wordShapeDan2Bio(inStr, knownLCWords);
/** * Returns a fine-grained word shape classifier, that equivalence classes * lower and upper case and digits, and collapses sequences of the * same type, but keeps all punctuation. This adds an extra recognizer * for a greek letter embedded in the String, which is useful for bio. */ private static String wordShapeDan2Bio(String s, Collection<String> knownLCWords) { if (containsGreekLetter(s)) { return wordShapeDan2(s, knownLCWords) + "-GREEK"; } else { return wordShapeDan2(s, knownLCWords); } }