org.apache.commons.codec.language.bm.Rule java code examples

/**
 * Gets rules for a combination of name type, rule type and a single language.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param lang
 *            the language to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
  return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
}

/**
 * Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context
 * and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no
 * match, <code>i</code> is advanced one and the character is silently dropped from the phonetic spelling.
 *
 * @return <code>this</code>
 */
public RulesApplication invoke() {
  this.found = false;
  int patternLength = 1;
  final List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
  if (rules != null) {
    for (final Rule rule : rules) {
      final String pattern = rule.getPattern();
      patternLength = pattern.length();
      if (rule.patternAndContextMatches(this.input, this.i)) {
        this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
        this.found = true;
        break;
      }
    }
  }
  if (!this.found) {
    patternLength = 1;
  }
  this.i += patternLength;
  return this;
}

private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) {
  final String resName = createResourceName(nameType, rt, lang);
  final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
  if (rulesIS == null) {
    throw new IllegalArgumentException("Unable to load resource: " + resName);
  }
  return new Scanner(rulesIS, ResourceConstants.ENCODING);
}

                    location);
final Scanner hashIncludeScanner = createScanner(incl);
try {
  lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl));
} finally {
  hashIncludeScanner.close();
  final String pat = stripQuotes(parts[0]);
  final String lCon = stripQuotes(parts[1]);
  final String rCon = stripQuotes(parts[2]);
  final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
  final int cLine = currentLine;
  final Rule r = new Rule(pat, lCon, rCon, ph) {

@Test(expected = IndexOutOfBoundsException.class)
public void testNegativeIndexForRuleMatchIndexOutOfBoundsException() {
  final Rule r = new Rule("a", "", "", new Rule.Phoneme("", Languages.ANY_LANGUAGE));
  r.patternAndContextMatches("bob", -1);
}

/**
 * Gets rules for a combination of name type, rule type and languages.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param langs
 *            the set of languages to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
                   final Languages.LanguageSet langs) {
  final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
  final List<Rule> allRules = new ArrayList<Rule>();
  for (final List<Rule> rules : ruleMap.values()) {
    allRules.addAll(rules);
  }
  return allRules;
}

private static PhonemeExpr parsePhonemeExpr(final String ph) {
  if (ph.startsWith("(")) { // we have a bracketed list of options
    if (!ph.endsWith(")")) {
      throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
    }
    final List<Phoneme> phs = new ArrayList<Phoneme>();
    final String body = ph.substring(1, ph.length() - 1);
    for (final String part : body.split("[|]")) {
      phs.add(parsePhoneme(part));
    }
    if (body.startsWith("|") || body.endsWith("|")) {
      phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
    }
    return new PhonemeList(phs);
  }
  return parsePhoneme(ph);
}

                    location);
} else {
  lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
} else {
  try {
    final String pat = stripQuotes(parts[0]);
    final String lCon = stripQuotes(parts[1]);
    final String rCon = stripQuotes(parts[2]);
    final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
    final int cLine = currentLine;
    final Rule r = new Rule(pat, lCon, rCon, ph) {

/**
 * Gets rules for a combination of name type, rule type and languages.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param langs
 *            the set of languages to consider
 * @return a map containing all Rules that apply, grouped by the first character of the rule pattern
 * @since 1.9
 */
public static Map<String, List<Rule>> getInstanceMap(final NameType nameType, final RuleType rt,
                           final Languages.LanguageSet langs) {
  return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
                 getInstanceMap(nameType, rt, Languages.ANY);
}

private static PhonemeExpr parsePhonemeExpr(final String ph) {
  if (ph.startsWith("(")) { // we have a bracketed list of options
    if (!ph.endsWith(")")) {
      throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
    }
    final List<Phoneme> phs = new ArrayList<Phoneme>();
    final String body = ph.substring(1, ph.length() - 1);
    for (final String part : body.split("[|]")) {
      phs.add(parsePhoneme(part));
    }
    if (body.startsWith("|") || body.endsWith("|")) {
      phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
    }
    return new PhonemeList(phs);
  } else {
    return parsePhoneme(ph);
  }
}

                    location);
} else {
  lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
} else {
  try {
    final String pat = stripQuotes(parts[0]);
    final String lCon = stripQuotes(parts[1]);
    final String rCon = stripQuotes(parts[2]);
    final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
    final int cLine = currentLine;
    final Rule r = new Rule(pat, lCon, rCon, ph) {

/**
 * Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context
 * and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no
 * match, <code>i</code> is advanced one and the character is silently dropped from the phonetic spelling.
 *
 * @return <code>this</code>
 */
public RulesApplication invoke() {
  this.found = false;
  int patternLength = 1;
  final List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
  if (rules != null) {
    for (final Rule rule : rules) {
      final String pattern = rule.getPattern();
      patternLength = pattern.length();
      if (rule.patternAndContextMatches(this.input, this.i)) {
        this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
        this.found = true;
        break;
      }
    }
  }
  if (!this.found) {
    patternLength = 1;
  }
  this.i += patternLength;
  return this;
}

final Map<String, List<Rule>> rules = Rule.getInstanceMap(this.nameType, RuleType.RULES, languageSet);
final Map<String, List<Rule>> finalRules1 = Rule.getInstanceMap(this.nameType, this.ruleType, "common");
final Map<String, List<Rule>> finalRules2 = Rule.getInstanceMap(this.nameType, this.ruleType, languageSet);

@Test(expected = IllegalArgumentException.class)
public void testInvalidLangIllegalArgumentException() {
  Rule.getInstance(NameType.GENERIC, RuleType.APPROX, "noSuchLanguage");
}

private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) {
  final String resName = createResourceName(nameType, rt, lang);
  final InputStream rulesIS = Languages.class.getClassLoader().getResourceAsStream(resName);
  if (rulesIS == null) {
    throw new IllegalArgumentException("Unable to load resource: " + resName);
  }
  return new Scanner(rulesIS, ResourceConstants.ENCODING);
}

private static PhonemeExpr parsePhonemeExpr(final String ph) {
  if (ph.startsWith("(")) { // we have a bracketed list of options
    if (!ph.endsWith(")")) {
      throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
    }
    final List<Phoneme> phs = new ArrayList<Phoneme>();
    final String body = ph.substring(1, ph.length() - 1);
    for (final String part : body.split("[|]")) {
      phs.add(parsePhoneme(part));
    }
    if (body.startsWith("|") || body.endsWith("|")) {
      phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
    }
    return new PhonemeList(phs);
  }
  return parsePhoneme(ph);
}

                    location);
} else {
  lines.putAll(parseRules(createScanner(incl), location + "->" + incl));
} else {
  try {
    final String pat = stripQuotes(parts[0]);
    final String lCon = stripQuotes(parts[1]);
    final String rCon = stripQuotes(parts[2]);
    final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
    final int cLine = currentLine;
    final Rule r = new Rule(pat, lCon, rCon, ph) {

/**
 * Invokes the rules. Loops over the rules list, stopping at the first one that has a matching context
 * and pattern. Then applies this rule to the phoneme builder to produce updated phonemes. If there was no
 * match, <code>i</code> is advanced one and the character is silently dropped from the phonetic spelling.
 *
 * @return <code>this</code>
 */
public RulesApplication invoke() {
  this.found = false;
  int patternLength = 1;
  final List<Rule> rules = this.finalRules.get(input.subSequence(i, i+patternLength));
  if (rules != null) {
    for (final Rule rule : rules) {
      final String pattern = rule.getPattern();
      patternLength = pattern.length();
      if (rule.patternAndContextMatches(this.input, this.i)) {
        this.phonemeBuilder.apply(rule.getPhoneme(), maxPhonemes);
        this.found = true;
        break;
      }
    }
  }
  if (!this.found) {
    patternLength = 1;
  }
  this.i += patternLength;
  return this;
}

/**
 * Gets rules for a combination of name type, rule type and languages.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param langs
 *            the set of languages to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt,
                   final Languages.LanguageSet langs) {
  final Map<String, List<Rule>> ruleMap = getInstanceMap(nameType, rt, langs);
  final List<Rule> allRules = new ArrayList<Rule>();
  for (final List<Rule> rules : ruleMap.values()) {
    allRules.addAll(rules);
  }
  return allRules;
}

/**
 * Gets rules for a combination of name type, rule type and a single language.
 *
 * @param nameType
 *            the NameType to consider
 * @param rt
 *            the RuleType to consider
 * @param lang
 *            the language to consider
 * @return a list of Rules that apply
 */
public static List<Rule> getInstance(final NameType nameType, final RuleType rt, final String lang) {
  return getInstance(nameType, rt, LanguageSet.from(new HashSet<String>(Arrays.asList(lang))));
}

Javadoc

A phoneme rule.

Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply and a logical flag indicating if all languages must be in play. A rule matches if:

the pattern matches at the current position
the string up until the beginning of the pattern matches the left context
the string from the end of the pattern matches the right context
logical is ALL and all languages are in scope; or
logical is any other value and at least one language is in scope

Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user to explicitly construct their own.

Rules are immutable and thread-safe.

Rules resources

Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically named following the pattern:

org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt

The format of these resources is the following:

Rules: whitespace separated, double-quoted strings. There should be 4 columns to each row, and these will be interpreted as:
1. pattern
2. left context
3. right context
4. phoneme
End-of-line comments: Any occurrence of '//' will cause all text following on that line to be discarded as a comment.
Multi-line comments: Any line starting with '/*' will start multi-line commenting mode. This will skip all content until a line ending in '*' and '/' is found.
Blank lines: All blank lines will be skipped.

Most used methods

getInstance
Gets rules for a combination of name type, rule type and languages.
patternAndContextMatches
Decides if the pattern and context match the input starting at a position. It is a match if thelCont
createResourceName
createScanner
getInstanceMap
Gets rules for a combination of name type, rule type and languages.
getPattern
Gets the pattern. This is a string-literal that must exactly match.
getPhoneme
Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
parsePhoneme
parsePhonemeExpr
parseRules
pattern
Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the wor
stripQuotes

Popular in Java

Running tasks concurrently on multiple threads
startActivity (Activity)
notifyDataSetChanged (ArrayAdapter)
getSystemService (Context)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
JList (javax.swing)
Top Vim plugins

How to useRule in org.apache.commons.codec.language.bm

Best Java code snippets using org.apache.commons.codec.language.bm.Rule (Showing top 20 results out of 315)

How to use
Rule
in
org.apache.commons.codec.language.bm