opennlp.tools.namefind.RegexNameFinder java code examples

/**
 * Returns a RegexNamefinder that will utilize specified default regexes.
 *
 * @param defaults the OpenNLP default regexes
 * @return {@link RegexNameFinder}
 */
public static synchronized RegexNameFinder getDefaultRegexNameFinders(
  DEFAULT_REGEX_NAME_FINDER... defaults) {
 Objects.requireNonNull(defaults, "defaults must not be null");
 return new RegexNameFinder(defaultsToMap(defaults));
}

/**
 * NEW. This method removes the need for tokenization, but returns the Span
 * with character indices, rather than word.
 *
 * @param text
 * @return
 */
public Span[] find(String text) {
 return getAnnotations(text);
}

 @Test
 public void testFindMatchingPatternWithoutMatchingTokenBounds() {
  Pattern testPattern = Pattern.compile("[0-8] year"); // does match "0 year"

  String[] sentence = new String[]{"a", "80", "year", "c"};
  Pattern[] patterns = new Pattern[]{testPattern};
  Map<String, Pattern[]> regexMap = new HashMap<>();
  String type = "testtype";

  regexMap.put(type, patterns);

  RegexNameFinder finder = new RegexNameFinder(regexMap);

  Span[] result = finder.find(sentence);

  Assert.assertTrue(result.length == 0);
 }
}

@Test
public void testURL() throws Exception {
 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
 Span[] find = regexNameFinder.find(tokens);
 List<Span> spanList = Arrays.asList(find);
 Span urlSpan = new Span(13, 14, "URL");
 Assert.assertTrue(spanList.contains(urlSpan));
 Assert.assertEquals("https://www.google.com", tokens[urlSpan.getStart()]);
}

@Test
public void testPhoneNumber() throws Exception {
 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
 Span[] find = regexNameFinder.find(tokens);
 List<Span> spanList = Arrays.asList(find);
 Span phoneSpan = new Span(9, 10, "PHONE_NUM");
 Assert.assertTrue(spanList.contains(phoneSpan));
 Assert.assertEquals("123-234-5678", tokens[phoneSpan.getStart()]);
}

/**
 * Allows for use of selected Defaults as well as regexes from external
 * configuration
 *
 * @param config   a map where the key is a type, and the value is a
 *                 Pattern[]. If the keys clash with default keys, the config
 *                 map will win
 * @param defaults the OpenNLP default regexes
 * @return {@link RegexNameFinder}
 */
public static synchronized RegexNameFinder getDefaultRegexNameFinders(
  Map<String, Pattern[]> config, DEFAULT_REGEX_NAME_FINDER... defaults) {
 Objects.requireNonNull(config, "config must not be null");
 Map<String, Pattern[]> defaultsToMap = new HashMap<>();
 if (defaults != null) {
  defaultsToMap = defaultsToMap(defaults);
 }
 defaultsToMap.putAll(config);
 return new RegexNameFinder(defaultsToMap);
}

@Test
public void testFindSingleTokenPattern() {
 Pattern testPattern = Pattern.compile("test");
 String[] sentence = new String[]{"a", "test", "b", "c"};
 Pattern[] patterns = new Pattern[]{testPattern};
 Map<String, Pattern[]> regexMap = new HashMap<>();
 String type = "testtype";
 regexMap.put(type, patterns);
 RegexNameFinder finder =
     new RegexNameFinder(regexMap);
 Span[] result = finder.find(sentence);
 Assert.assertTrue(result.length == 1);
 Assert.assertTrue(result[0].getStart() == 1);
 Assert.assertTrue(result[0].getEnd() == 2);
}

/**
 * NEW. This method removes the need for tokenization, but returns the Span
 * with character indices, rather than word.
 *
 * @param text
 * @return
 */
public Span[] find(String text) {
 return getAnnotations(text);
}

@Test
public void testEmail() throws Exception {
 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
 Span[] find = regexNameFinder.find(tokens);
 List<Span> spanList = Arrays.asList(find);
 Assert.assertTrue(spanList.contains(new Span(3, 4, "EMAIL")));
 Span emailSpan = new Span(3, 4, "EMAIL");
 Assert.assertEquals("opennlp@gmail.com", tokens[emailSpan.getStart()]);
}

/**
 * Returns a RegexNamefinder that will utilize specified default regexes.
 *
 * @param defaults the OpenNLP default regexes
 * @return {@link RegexNameFinder}
 */
public static synchronized RegexNameFinder getDefaultRegexNameFinders(
  DEFAULT_REGEX_NAME_FINDER... defaults) {
 Objects.requireNonNull(defaults, "defaults must not be null");
 return new RegexNameFinder(defaultsToMap(defaults));
}

@Test
public void testFindTokenizdPattern() {
 Pattern testPattern = Pattern.compile("[0-9]+ year");
 String[] sentence = new String[]{"a", "80", "year", "b", "c"};
 Pattern[] patterns = new Pattern[]{testPattern};
 Map<String, Pattern[]> regexMap = new HashMap<>();
 String type = "match";
 regexMap.put(type, patterns);
 RegexNameFinder finder =
     new RegexNameFinder(regexMap);
 Span[] result = finder.find(sentence);
 Assert.assertTrue(result.length == 1);
 Assert.assertTrue(result[0].getStart() == 1);
 Assert.assertTrue(result[0].getEnd() == 3);
 Assert.assertTrue(result[0].getType().equals("match"));
}

/**
 * NEW. This method removes the need for tokenization, but returns the Span
 * with character indices, rather than word.
 *
 * @param text
 * @return
 */
public Span[] find(String text) {
 return getAnnotations(text);
}

 @Test
 public void testMgrs() throws Exception {
  String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
  Span[] find = regexNameFinder.find(tokens);
  List<Span> spanList = Arrays.asList(find);
  Span mgrsSpan1 = new Span(18, 19, "MGRS");
  Span mgrsSpan2 = new Span(20, 24, "MGRS");
  Assert.assertTrue(spanList.contains(mgrsSpan1));
  Assert.assertTrue(spanList.contains(mgrsSpan2));
  Assert.assertEquals("11SKU528111".toLowerCase(), tokens[mgrsSpan1.getStart()]);
  Assert.assertEquals("11S", tokens[mgrsSpan2.getStart()]);
 }
}

/**
 * Returns a RegexNamefinder that will utilize specified default regexes.
 *
 * @param defaults the OpenNLP default regexes
 * @return {@link RegexNameFinder}
 */
public static synchronized RegexNameFinder getDefaultRegexNameFinders(
  DEFAULT_REGEX_NAME_FINDER... defaults) {
 Objects.requireNonNull(defaults, "defaults must not be null");
 return new RegexNameFinder(defaultsToMap(defaults));
}

@Test
public void testLatLong() throws Exception {
 String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text);
 Span[] find = regexNameFinder.find(tokens);
 List<Span> spanList = Arrays.asList(find);
 Span latLongSpan1 = new Span(22, 24, "DEGREES_MIN_SEC_LAT_LON");
 Span latLongSpan2 = new Span(35, 41, "DEGREES_MIN_SEC_LAT_LON");
 Assert.assertTrue(spanList.contains(latLongSpan1));
 Assert.assertTrue(spanList.contains(latLongSpan2));
 Assert.assertEquals("528", tokens[latLongSpan1.getStart()]);
 Assert.assertEquals("45", tokens[latLongSpan2.getStart()]);
}

/**
 * Allows for use of selected Defaults as well as regexes from external
 * configuration
 *
 * @param config   a map where the key is a type, and the value is a
 *                 Pattern[]. If the keys clash with default keys, the config
 *                 map will win
 * @param defaults the OpenNLP default regexes
 * @return {@link RegexNameFinder}
 */
public static synchronized RegexNameFinder getDefaultRegexNameFinders(
  Map<String, Pattern[]> config, DEFAULT_REGEX_NAME_FINDER... defaults) {
 Objects.requireNonNull(config, "config must not be null");
 Map<String, Pattern[]> defaultsToMap = new HashMap<>();
 if (defaults != null) {
  defaultsToMap = defaultsToMap(defaults);
 }
 defaultsToMap.putAll(config);
 return new RegexNameFinder(defaultsToMap);
}

/**
 * Allows for use of selected Defaults as well as regexes from external
 * configuration
 *
 * @param config   a map where the key is a type, and the value is a
 *                 Pattern[]. If the keys clash with default keys, the config
 *                 map will win
 * @param defaults the OpenNLP default regexes
 * @return {@link RegexNameFinder}
 */
public static synchronized RegexNameFinder getDefaultRegexNameFinders(
  Map<String, Pattern[]> config, DEFAULT_REGEX_NAME_FINDER... defaults) {
 Objects.requireNonNull(config, "config must not be null");
 Map<String, Pattern[]> defaultsToMap = new HashMap<>();
 if (defaults != null) {
  defaultsToMap = defaultsToMap(defaults);
 }
 defaultsToMap.putAll(config);
 return new RegexNameFinder(defaultsToMap);
}

Javadoc

Name finder based on a series of regular expressions.

Most used methods

Popular in Java

Reactive rest calls using spring rest template
setScale (BigDecimal)
putExtra (Intent)
onCreateOptionsMenu (Activity)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Reference (javax.naming)
JFrame (javax.swing)
Top plugins for Android Studio

How to useRegexNameFinder in opennlp.tools.namefind

Best Java code snippets using opennlp.tools.namefind.RegexNameFinder (Showing top 17 results out of 315)

How to use
RegexNameFinder
in
opennlp.tools.namefind