/** * Removes any characters in the String that we don't care about in the matching procedure * TODO Currently limited to certain 'western' languages */ private String prepareName(String name) { // \s = A whitespace character: [ \t\n\x0B\f\r] String[] arr = name.split("\\s"); List<String> list = new ArrayList<>(arr.length); for (int i = 0; i < arr.length; i++) { String rewrite = NON_WORD_CHAR.matcher(toLowerCase(arr[i])).replaceAll(""); String tmp = rewriteMap.get(rewrite); if (tmp != null) rewrite = tmp; // Ignore matching short frases like de, la, ... if (!rewrite.isEmpty() && rewrite.length() > 2) { list.add(rewrite); } } return listToString(list); }
/** * Removes any characters in the String that we don't care about in the matching procedure * TODO: Remove common street names like: street, road, avenue? */ private String prepareName(String name) { // TODO make this better, also split at ',' and others? // TODO This limits the approach to certain 'western' languages // \s = A whitespace character: [ \t\n\x0B\f\r] String[] arr = name.split("\\s"); String tmp; List<String> list = new ArrayList<>(arr.length); for (int i = 0; i < arr.length; i++) { tmp = NON_WORD_CHAR.matcher(arr[i].toLowerCase()).replaceAll(""); // Ignore matching short frases like, de, rue, st, etc. if (!tmp.isEmpty() && tmp.length() > 3) { list.add(tmp); } } return listToString(list); }
/** * Removes any characters in the String that we don't care about in the matching procedure * TODO Currently limited to certain 'western' languages */ private String prepareName(String name) { // \s = A whitespace character: [ \t\n\x0B\f\r] String[] arr = name.split("\\s"); List<String> list = new ArrayList<>(arr.length); for (int i = 0; i < arr.length; i++) { String rewrite = NON_WORD_CHAR.matcher(toLowerCase(arr[i])).replaceAll(""); String tmp = rewriteMap.get(rewrite); if (tmp != null) rewrite = tmp; // Ignore matching short frases like de, la, ... if (!rewrite.isEmpty() && rewrite.length() > 2) { list.add(rewrite); } } return listToString(list); }