@Test public void testFindMatchingPatternWithoutMatchingTokenBounds() { Pattern testPattern = Pattern.compile("[0-8] year"); // does match "0 year" String[] sentence = new String[]{"a", "80", "year", "c"}; Pattern[] patterns = new Pattern[]{testPattern}; Map<String, Pattern[]> regexMap = new HashMap<>(); String type = "testtype"; regexMap.put(type, patterns); RegexNameFinder finder = new RegexNameFinder(regexMap); Span[] result = finder.find(sentence); Assert.assertTrue(result.length == 0); } }
@Test public void testPhoneNumber() throws Exception { String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text); Span[] find = regexNameFinder.find(tokens); List<Span> spanList = Arrays.asList(find); Span phoneSpan = new Span(9, 10, "PHONE_NUM"); Assert.assertTrue(spanList.contains(phoneSpan)); Assert.assertEquals("123-234-5678", tokens[phoneSpan.getStart()]); }
@Test public void testURL() throws Exception { String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text); Span[] find = regexNameFinder.find(tokens); List<Span> spanList = Arrays.asList(find); Span urlSpan = new Span(13, 14, "URL"); Assert.assertTrue(spanList.contains(urlSpan)); Assert.assertEquals("https://www.google.com", tokens[urlSpan.getStart()]); }
@Test public void testFindSingleTokenPattern() { Pattern testPattern = Pattern.compile("test"); String[] sentence = new String[]{"a", "test", "b", "c"}; Pattern[] patterns = new Pattern[]{testPattern}; Map<String, Pattern[]> regexMap = new HashMap<>(); String type = "testtype"; regexMap.put(type, patterns); RegexNameFinder finder = new RegexNameFinder(regexMap); Span[] result = finder.find(sentence); Assert.assertTrue(result.length == 1); Assert.assertTrue(result[0].getStart() == 1); Assert.assertTrue(result[0].getEnd() == 2); }
@Test public void testEmail() throws Exception { String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text); Span[] find = regexNameFinder.find(tokens); List<Span> spanList = Arrays.asList(find); Assert.assertTrue(spanList.contains(new Span(3, 4, "EMAIL"))); Span emailSpan = new Span(3, 4, "EMAIL"); Assert.assertEquals("opennlp@gmail.com", tokens[emailSpan.getStart()]); }
@Test public void testMgrs() throws Exception { String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text); Span[] find = regexNameFinder.find(tokens); List<Span> spanList = Arrays.asList(find); Span mgrsSpan1 = new Span(18, 19, "MGRS"); Span mgrsSpan2 = new Span(20, 24, "MGRS"); Assert.assertTrue(spanList.contains(mgrsSpan1)); Assert.assertTrue(spanList.contains(mgrsSpan2)); Assert.assertEquals("11SKU528111".toLowerCase(), tokens[mgrsSpan1.getStart()]); Assert.assertEquals("11S", tokens[mgrsSpan2.getStart()]); } }
@Test public void testFindTokenizdPattern() { Pattern testPattern = Pattern.compile("[0-9]+ year"); String[] sentence = new String[]{"a", "80", "year", "b", "c"}; Pattern[] patterns = new Pattern[]{testPattern}; Map<String, Pattern[]> regexMap = new HashMap<>(); String type = "match"; regexMap.put(type, patterns); RegexNameFinder finder = new RegexNameFinder(regexMap); Span[] result = finder.find(sentence); Assert.assertTrue(result.length == 1); Assert.assertTrue(result[0].getStart() == 1); Assert.assertTrue(result[0].getEnd() == 3); Assert.assertTrue(result[0].getType().equals("match")); }
@Test public void testLatLong() throws Exception { String[] tokens = WhitespaceTokenizer.INSTANCE.tokenize(text); Span[] find = regexNameFinder.find(tokens); List<Span> spanList = Arrays.asList(find); Span latLongSpan1 = new Span(22, 24, "DEGREES_MIN_SEC_LAT_LON"); Span latLongSpan2 = new Span(35, 41, "DEGREES_MIN_SEC_LAT_LON"); Assert.assertTrue(spanList.contains(latLongSpan1)); Assert.assertTrue(spanList.contains(latLongSpan2)); Assert.assertEquals("528", tokens[latLongSpan1.getStart()]); Assert.assertEquals("45", tokens[latLongSpan2.getStart()]); }