String result = userdictLookup(text, pos); if (result != null) { g2pMethod.append("userdict"); result = lexiconLookup(text, pos); if (result != null) { g2pMethod.append("lexicon"); String normalised = MaryUtils.normaliseUnicodeLetters(text, getLocale()); if (!normalised.equals(text)) { result = userdictLookup(normalised, pos); if (result != null) { g2pMethod.append("userdict"); return result; result = lexiconLookup(normalised, pos); if (result != null) { g2pMethod.append("lexicon");
if (isPosPunctuation(pos)) { return false; if (isUnpronounceable(pos)) { return false;
/** * Look a given text up in the (standard) lexicon. part-of-speech is used in case of ambiguity. * * @param text * text * @param pos * pos * @return null if text == null or text.length is 0, null if entries.length is 0, entries[0] otherwise */ public String lexiconLookup(String text, String pos) { if (text == null || text.length() == 0) return null; String[] entries; entries = lexiconLookupPrimitive(text, pos); // If entry is not found directly, try the following changes: // - lowercase the word // - all lowercase but first uppercase if (entries.length == 0) { text = text.toLowerCase(getLocale()); entries = lexiconLookupPrimitive(text, pos); } if (entries.length == 0) { text = text.substring(0, 1).toUpperCase(getLocale()) + text.substring(1); entries = lexiconLookupPrimitive(text, pos); } if (entries.length == 0) return null; return entries[0]; }
if (maybePronounceable(text, pos)) { String graph = st.nextToken(); StringBuilder helper = new StringBuilder(); String phon = phonemise(graph, pos, helper); setPh(t, ph.toString()); t.setAttribute("g2p_method", g2pMethod); MaryData result = new MaryData(outputType(), d.getLocale()); result.setDocument(doc); return result;
userdict = readLexicon(userdictFilename); } else { logger.info("User dictionary '" + userdictFilename + "' for locale '" + getLocale() + "' does not exist. Ignoring.");
/** * Compile a regex pattern used to determine whether tokens are processed as unprounounceable or not, based on whether their * <code>pos</code> attribute matches the pattern. * */ protected void setUnpronounceablePosRegex() { String language = getLocale().getLanguage(); String propertyName = language + ".pos.unprounounceable.regex"; String defaultRegex = "^[^a-zA-Z]+$"; String regex = MaryProperties.getProperty(propertyName); if (regex == null) { logger.debug(String.format("Property %s not set, using default", propertyName)); regex = defaultRegex; } else { logger.debug(String.format("Using property %s", propertyName)); } try { unpronounceablePosRegex = Pattern.compile(regex); } catch (PatternSyntaxException e) { logger.error(String.format("Could not compile regex pattern /%s/, using default instead", regex)); unpronounceablePosRegex = Pattern.compile(defaultRegex); } logger.debug(String.format("Punctuation regex pattern set to /%s/", unpronounceablePosRegex)); }
if (maybePronounceable(text, pos)) { String graph = st.nextToken(); StringBuilder helper = new StringBuilder(); String phon = phonemise(graph, pos, helper); setPh(t, ph.toString()); t.setAttribute("g2p_method", g2pMethod); MaryData result = new MaryData(outputType(), d.getLocale()); result.setDocument(doc); return result;
userdict = readLexicon(userdictFilename); } else { logger.info("User dictionary '" + userdictFilename + "' for locale '" + getLocale() + "' does not exist. Ignoring.");
/** * Compile a regex pattern used to determine whether tokens are processed as punctuation or not, based on whether their * <code>pos</code> attribute matches the pattern. * */ protected void setPunctuationPosRegex() { String language = getLocale().getLanguage(); String propertyName = language + ".pos.punct.regex"; String defaultRegex = "\\$PUNCT"; String regex = MaryProperties.getProperty(propertyName); if (regex == null) { logger.debug(String.format("Property %s not set, using default", propertyName)); regex = defaultRegex; } else { logger.debug(String.format("Using property %s", propertyName)); } try { punctuationPosRegex = Pattern.compile(regex); } catch (PatternSyntaxException e) { logger.error(String.format("Could not compile regex pattern /%s/, using default instead", regex)); punctuationPosRegex = Pattern.compile(defaultRegex); } logger.debug(String.format("Punctuation regex pattern set to /%s/", punctuationPosRegex)); }
if (maybePronounceable(text, pos)) { String graph = st.nextToken(); StringBuilder helper = new StringBuilder(); String phon = phonemise(graph, pos, helper); setPh(t, ph.toString()); t.setAttribute("g2p_method", g2pMethod); MaryData result = new MaryData(outputType(), d.getLocale()); result.setDocument(doc); return result;
String result = userdictLookup(text, pos); if (result != null) { g2pMethod.append("userdict"); result = lexiconLookup(text, pos); if (result != null) { g2pMethod.append("lexicon"); String normalised = MaryUtils.normaliseUnicodeLetters(text, getLocale()); if (!normalised.equals(text)) { result = userdictLookup(normalised, pos); if (result != null) { g2pMethod.append("userdict"); return result; result = lexiconLookup(normalised, pos); if (result != null) { g2pMethod.append("lexicon");
/** * Look a given text up in the (standard) lexicon. part-of-speech is used in case of ambiguity. * * @param text * text * @param pos * pos * @return null if text == null or text.length is 0, null if entries.length is 0, entries[0] otherwise */ public String lexiconLookup(String text, String pos) { if (text == null || text.length() == 0) return null; String[] entries; entries = lexiconLookupPrimitive(text, pos); // If entry is not found directly, try the following changes: // - lowercase the word // - all lowercase but first uppercase if (entries.length == 0) { text = text.toLowerCase(getLocale()); entries = lexiconLookupPrimitive(text, pos); } if (entries.length == 0) { text = text.substring(0, 1).toUpperCase(getLocale()) + text.substring(1); entries = lexiconLookupPrimitive(text, pos); } if (entries.length == 0) return null; return entries[0]; }
if (isPosPunctuation(pos)) { return false; if (isUnpronounceable(pos)) { return false;
userdict = readLexicon(userdictFilename); } else { logger.info("User dictionary '" + userdictFilename + "' for locale '" + getLocale() + "' does not exist. Ignoring.");
/** * Compile a regex pattern used to determine whether tokens are processed as unprounounceable or not, based on whether their * <code>pos</code> attribute matches the pattern. * */ protected void setUnpronounceablePosRegex() { String language = getLocale().getLanguage(); String propertyName = language + ".pos.unprounounceable.regex"; String defaultRegex = "^[^a-zA-Z]+$"; String regex = MaryProperties.getProperty(propertyName); if (regex == null) { logger.debug(String.format("Property %s not set, using default", propertyName)); regex = defaultRegex; } else { logger.debug(String.format("Using property %s", propertyName)); } try { unpronounceablePosRegex = Pattern.compile(regex); } catch (PatternSyntaxException e) { logger.error(String.format("Could not compile regex pattern /%s/, using default instead", regex)); unpronounceablePosRegex = Pattern.compile(defaultRegex); } logger.debug(String.format("Punctuation regex pattern set to /%s/", unpronounceablePosRegex)); }
String result = userdictLookup(text, pos); if (result != null) { g2pMethod.append("userdict"); result = lexiconLookup(text, pos); if (result != null) { g2pMethod.append("lexicon"); String normalised = MaryUtils.normaliseUnicodeLetters(text, getLocale()); if (!normalised.equals(text)) { result = userdictLookup(normalised, pos); if (result != null) { g2pMethod.append("userdict"); return result; result = lexiconLookup(normalised, pos); if (result != null) { g2pMethod.append("lexicon");
/** * Look a given text up in the (standard) lexicon. part-of-speech is used in case of ambiguity. * * @param text * text * @param pos * pos * @return null if text == null or text.length is 0, null if entries.length is 0, entries[0] otherwise */ public String lexiconLookup(String text, String pos) { if (text == null || text.length() == 0) return null; String[] entries; entries = lexiconLookupPrimitive(text, pos); // If entry is not found directly, try the following changes: // - lowercase the word // - all lowercase but first uppercase if (entries.length == 0) { text = text.toLowerCase(getLocale()); entries = lexiconLookupPrimitive(text, pos); } if (entries.length == 0) { text = text.substring(0, 1).toUpperCase(getLocale()) + text.substring(1); entries = lexiconLookupPrimitive(text, pos); } if (entries.length == 0) return null; return entries[0]; }
if (isPosPunctuation(pos)) { return false; if (isUnpronounceable(pos)) { return false;
/** * Compile a regex pattern used to determine whether tokens are processed as punctuation or not, based on whether their * <code>pos</code> attribute matches the pattern. * */ protected void setPunctuationPosRegex() { String language = getLocale().getLanguage(); String propertyName = language + ".pos.punct.regex"; String defaultRegex = "\\$PUNCT"; String regex = MaryProperties.getProperty(propertyName); if (regex == null) { logger.debug(String.format("Property %s not set, using default", propertyName)); regex = defaultRegex; } else { logger.debug(String.format("Using property %s", propertyName)); } try { punctuationPosRegex = Pattern.compile(regex); } catch (PatternSyntaxException e) { logger.error(String.format("Could not compile regex pattern /%s/, using default instead", regex)); punctuationPosRegex = Pattern.compile(defaultRegex); } logger.debug(String.format("Punctuation regex pattern set to /%s/", punctuationPosRegex)); }