edu.cmu.sphinx.alignment java code examples

public int getValue() {
  if (isBoundary())
    return max(queryIndex, databaseIndex);
  return hasMatch() ? 0 : 1;
}

public boolean hasMatch() {
  return getQueryWord().equals(getDatabaseWord());
}

public String getQueryWord() {
  if (queryIndex > 0)
    return query.get(getQueryIndex());
  return null;
}

/**
 * Aligns query sequence with the previously built database.
 * @param words list words to look for
 * @param range range of database to look for alignment
 *
 * @return indices of alignment
 */
public int[] align(List<String> words, Range range) {
  
  if (range.upperEndpoint() - range.lowerEndpoint() < tupleSize || words.size() < tupleSize) {
    return alignTextSimple(refWords.subList(range.lowerEndpoint(), range.upperEndpoint()), words, range.lowerEndpoint());
  }
  int[] result = new int[words.size()];
  fill(result, -1);
  int lastIndex = 0;
  for (Alignment.Node node : new Alignment(getTuples(words), range)
      .getIndices()) {
    // for (int j = 0; j < tupleSize; ++j)
    lastIndex = max(lastIndex, node.getQueryIndex());
    for (; lastIndex < node.getQueryIndex() + tupleSize; ++lastIndex)
      result[lastIndex] = node.getDatabaseIndex() + lastIndex -
                node.getQueryIndex();
  }
  return result;
}

/**
 * Adds a word as an Item to this WordRelation object.
 *
 * @param word the word to add
 */
public void addWord(String word) {
  Item tokenItem = tokenToWords.getTokenItem();
  Item wordItem = tokenItem.createDaughter();
  FeatureSet featureSet = wordItem.getFeatures();
  featureSet.setString("name", word);
  relation.appendItem(wordItem);
}

/**
 * Removes the postpunctuation characters from the current token. Copies
 * those postpunctuation characters to the class variable
 * 'postpunctuation'.
 */
private void removeTokenPostpunctuation() {
  if (token == null) {
    return;
  }
  final String tokenWord = token.getWord();
  int tokenLength = tokenWord.length();
  int position = tokenLength - 1;
  while (position > 0
      && postpunctuationSymbols.indexOf((int) tokenWord
          .charAt(position)) != -1) {
    position--;
  }
  if (tokenLength - 1 != position) {
    // Copy postpunctuation from token
    token.setPostpunctuation(tokenWord.substring(position + 1));
    // truncate token at postpunctuation
    token.setWord(tokenWord.substring(0, position + 1));
  } else {
    token.setPostpunctuation("");
  }
}

  private void align(List<String> database, List<String> query,
      Integer... result) {
    LongTextAligner aligner = new LongTextAligner(database, 1);
    int[] alignment = aligner.align(query);

    assertThat(Utilities.asList(alignment), contains(result));
  }
}

/**
 * Returns true if the given token item contains a token that is in a
 * section-like context, e.g., "chapter" or "act".
 *
 * @param tokenItem the token item to check
 *
 * @return true or false
 */
public static boolean sectionLike(Item tokenItem) {
  String sectionType =
      ((String) tokenItem.findFeature("p.name")).toLowerCase();
  return inKingSectionLikeMap(sectionType, SECTION_TYPES);
}

/**
 * Returns true if the given token matches part of a phone number
 *
 * @param tokenItem the token
 * @param tokenVal the string value of the token
 *
 * @return true or false
 */
private boolean matchesPartPhoneNumber(String tokenVal) {
  String n_name = (String) tokenItem.findFeature("n.name");
  String n_n_name = (String) tokenItem.findFeature("n.n.name");
  String p_name = (String) tokenItem.findFeature("p.name");
  String p_p_name = (String) tokenItem.findFeature("p.p.name");
  boolean matches3DigitsP_name = matches(threeDigitsPattern, p_name);
  return ((matches(threeDigitsPattern, tokenVal) && ((!matches(
      digitsPattern, p_name) && matches(threeDigitsPattern, n_name) && matches(
        fourDigitsPattern, n_n_name))
      || (matches(sevenPhoneNumberPattern, n_name)) || (!matches(
      digitsPattern, p_p_name) && matches3DigitsP_name && matches(
        fourDigitsPattern, n_name)))) || (matches(
      fourDigitsPattern, tokenVal) && (!matches(digitsPattern,
      n_name) && matches3DigitsP_name && matches(threeDigitsPattern,
        p_p_name))));
}

public List<WordResult> align(URL audioUrl, String transcript) throws IOException {
  return align(audioUrl, getTokenizer().expand(transcript));
}

/**
 * Aligns query sequence with the previously built database.
 * @param query list of words to look for
 *
 * @return indices of alignment
 */
public int[] align(List<String> query) {
  return align(query, new Range(0, refWords.size()));
}

@BeforeClass
public void setUp() {
  Random rng = new Random(42);
  database = new ArrayList<String>();
  String[] dictionary = new String[] {"foo", "bar", "baz", "quz"};
  for (int i = 0; i < 100000; ++i)
    database.add(dictionary[rng.nextInt(dictionary.length)]);
  aligner = new LongTextAligner(database, 3);
}

public String getDatabaseWord() {
  if (databaseIndex > 0)
    return reftup.get(getDatabaseIndex());
  return null;
}

public List<Node> adjacent() {
  List<Node> result = new ArrayList<Node>(3);
  if (queryIndex < indices.size() &&
    databaseIndex < shifts.size()) {
    result.add(new Node(queryIndex + 1, databaseIndex + 1));
  }
  if (databaseIndex < shifts.size()) {
    result.add(new Node(queryIndex, databaseIndex + 1));
  }
  if (queryIndex < indices.size()) {
    result.add(new Node(queryIndex + 1, databaseIndex));
  }
  return result;
}

public SpeechAligner(String amPath, String dictPath, String g2pPath) throws MalformedURLException, IOException {
  Configuration configuration = new Configuration();
  configuration.setAcousticModelPath(amPath);
  configuration.setDictionaryPath(dictPath);
  context = new Context(configuration);
  if (g2pPath != null) {
    context.setLocalProperty("dictionary->g2pModelPath", g2pPath);
    context.setLocalProperty("dictionary->g2pMaxPron", "2");
  }
  context.setLocalProperty("lexTreeLinguist->languageModel", "dynamicTrigramModel");
  recognizer = context.getInstance(Recognizer.class);
  grammar = context.getInstance(AlignerGrammar.class);
  languageModel = context.getInstance(DynamicTrigramModel.class);
  setTokenizer(new SimpleTokenizer());
}

@BeforeMethod
public void setupMethod() {
  expander = new USEnglishTokenizer();
}

@Test
public void shouldAlignText() {
  align(asList("foo"), asList("bar"), -1);
  align(asList("foo"), asList("foo"), 0);
  align(asList("foo", "bar"), asList("foo"), 0);
  align(asList("foo", "bar"), asList("bar"), 1);
  align(asList("foo"), asList("foo", "bar"), 0, -1);
  align(asList("bar"), asList("foo", "bar"), -1, 0);
  align(asList("foo", "bar", "baz"), asList("foo", "baz"), 0, 2);
  align(asList("foo", "bar", "42", "baz", "qux"), asList("42", "baz"), 2,
      3);
}

/**
 * Returns true if the given token item contains a token that is in a
 * king-like context, e.g., "King" or "Louis".
 *
 * @param tokenItem the token item to check
 *
 * @return true or false
 */
public static boolean kingLike(Item tokenItem) {
  String kingName =
      ((String) tokenItem.findFeature("p.name")).toLowerCase();
  if (inKingSectionLikeMap(kingName, KING_NAMES)) {
    return true;
  } else {
    String kingTitle =
        ((String) tokenItem.findFeature("p.p.name")).toLowerCase();
    return inKingSectionLikeMap(kingTitle, KING_TITLES);
  }
}

  @Test(dataProvider = "data")
  public void textToWords(String text, String expanded) {
    List<String> tokens = expander.expand(text);
    assertThat(Utilities.join(tokens), equalTo(expanded));
  }
}

@Test(dataProvider = "words")
public void align(List<String> words, Matcher<List<Integer>> matcher) {
  assertThat(Utilities.asList(aligner.align(words)), matcher);
}

How to use edu.cmu.sphinx.alignment

Best Java code snippets using edu.cmu.sphinx.alignment (Showing top 20 results out of 315)