private static void addToken(StringBuilder sample, List<Span> tokenSpans, String token, boolean isNextMerged) { int tokenSpanStart = sample.length(); sample.append(token); int tokenSpanEnd = sample.length(); tokenSpans.add(new Span(tokenSpanStart, tokenSpanEnd)); if (!isNextMerged) sample.append(" "); }
private void overrideType(Span[] names) { for (int i = 0; i < names.length; i++) { Span n = names[i]; names[i] = new Span(n.getStart(), n.getEnd(), this.defaultType, n.getProb()); } }
/** * Test if it fails to construct span with invalid start */ @Test(expected = IllegalArgumentException.class) public void testTooSmallStart() throws Exception { new Span(-1, 100); }
/** * Test if it fails to construct span with start > end */ @Test(expected = IllegalArgumentException.class) public void testStartLargerThanEnd() throws Exception { new Span(100, 50); } }
public void add(Parse daughter, HeadRules rules) { if (daughter.prevPunctSet != null) { parts.addAll(daughter.prevPunctSet); } parts.add(daughter); this.span = new Span(span.getStart(),daughter.getSpan().getEnd()); this.head = rules.getHead(getChildren(),type); this.headIndex = head.headIndex; }
/** * Start Start Other */ @Test public void testDecodeAdjacentSingletonFirst() { List<String> encoded = Arrays.asList(B_START, B_START, OTHER); Span[] expected = new Span[] {new Span(0, 1, B_TYPE), new Span(1, 2, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); }
/** * Start Continue Start Other */ @Test public void testDecodeAdjacentPairSingleton() { List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_START, OTHER); Span[] expected = new Span[] {new Span(0, 2, B_TYPE), new Span(2, 3, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); }
/** * Unit, Unit, Other */ @Test public void testDecodeAdjacentSingletonFirst() { List<String> encoded = Arrays.asList(A_UNIT, A_UNIT, OTHER); Span[] expected = new Span[] {new Span(0, 1, A_TYPE), new Span(1, 2, A_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); }
@Test public void testMissingRightContraction() throws IOException { Assert.assertEquals(new Span(0, 1, "person"), samples.get(7).getNames()[0]); Assert.assertEquals(new Span(3, 4, "person"), samples.get(7).getNames()[1]); Assert.assertEquals(new Span(5, 6, "person"), samples.get(7).getNames()[2]); }
/** * Start, Other */ @Test public void testDecodeSingletonFirst() { List<String> encoded = Arrays.asList(B_START, OTHER); Span[] expected = new Span[] {new Span(0, 1, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); }
/** * Start Continue Continue Other */ @Test public void testDecodeTripletFirst() { List<String> encoded = Arrays.asList(B_START, B_CONTINUE, B_CONTINUE, OTHER); Span[] expected = new Span[] {new Span(0, 3, B_TYPE)}; Span[] actual = codec.decode(encoded); Assert.assertArrayEquals(expected, actual); }
@Test public void testEncodeAdjacentUnitSpans() { String[] sentence = "word PersonA PersonB word".split(" "); Span[] singleSpan = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, A_UNIT, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Both PersonA and PersonB are 'unit' tags", expected, acutal); }
@Test public void testTrimWhitespaceSpan() { String string1 = " "; Span span1 = new Span(0, string1.length()); Assert.assertEquals("", span1.trim(string1).getCoveredText(string1)); }
@Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeDoubleTokenSpanNoType() { final String DEFAULT_START = "default" + "-" + BioCodec.START; final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE; String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, null)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeSingleUnitTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeTripleTokenSpan() { String[] sentence = "Secretary - General Anders Fogh Rasmussen is from Denmark.".split(" "); Span[] singleSpan = new Span[] { new Span(3,6, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, OTHER, A_START, A_CONTINUE, A_LAST, OTHER, OTHER, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Anders' should be 'start' only, 'Fogh' is 'inside', " + "'Rasmussen' is 'last' and the rest should be 'other'.", expected, acutal); }