@Override protected String[] toSentence(NameSample sample) { return sample.getSentence(); } }
@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getSentence()), Arrays.hashCode(getNames()), Arrays.hashCode(getAdditionalContext()), isClearAdaptiveDataSet()); }
public TokenSample read() throws IOException { NameSample nameSample = samples.read(); TokenSample tokenSample = null; if (nameSample != null ) { tokenSample = new TokenSample(detokenizer, nameSample.getSentence()); } return tokenSample; }
private void statsAdd(NameSample reference, NameSample prediction) { String[] refTags = sequenceCodec.encode(reference.getNames(), reference.getSentence().length); String[] predTags = sequenceCodec.encode(prediction.getNames(), prediction.getSentence().length); // we don' want it to compute token frequency, so we pass an array of empty strings instead // of tokens getStats().add(new String[reference.getSentence().length], refTags, predTags); }
@Test public void testCheckMergedContractions() throws IOException { Assert.assertEquals("no", samples.get(0).getSentence()[1]); Assert.assertEquals("no", samples.get(0).getSentence()[11]); Assert.assertEquals("Com", samples.get(1).getSentence()[0]); Assert.assertEquals("relação", samples.get(1).getSentence()[1]); Assert.assertEquals("à", samples.get(1).getSentence()[2]); Assert.assertEquals("mais", samples.get(2).getSentence()[4]); Assert.assertEquals("de", samples.get(2).getSentence()[5]); Assert.assertEquals("da", samples.get(2).getSentence()[8]); Assert.assertEquals("num", samples.get(3).getSentence()[26]); }
@Test public void testSize() throws IOException { Assert.assertEquals(25, samples.get(0).getSentence().length); Assert.assertEquals(12, samples.get(1).getSentence().length); Assert.assertEquals(59, samples.get(2).getSentence().length); Assert.assertEquals(33, samples.get(3).getSentence().length); }
@Override public void missclassified(NameSample reference, NameSample prediction) { printError(reference.getId(), reference.getNames(), prediction.getNames(), reference, prediction, reference.getSentence()); }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events); return events; }
/** * Tests if an additional space is correctly treated as one space. * * @throws Exception */ @Test public void testParseWithAdditionalSpace() throws Exception { String line = "<START> M . K . <END> <START> Schwitters <END> ? <START> Heartfield <END> ?"; NameSample test = NameSample.parse(line, false); Assert.assertEquals(8, test.getSentence().length); }
@Test public void testEncodeNoNames() { NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual); }
@Test public void testEncodeSingleTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] spans = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, spans, true); String[] expected = new String[] {OTHER, OTHER, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual); }
@Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] singleSpan = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_LAST, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 'last' " + "and the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeDoubleTokenSpanNoType() { final String DEFAULT_START = "default" + "-" + BioCodec.START; final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE; String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, null)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeSingleUnitTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeTripleTokenSpan() { String[] sentence = "Secretary - General Anders Fogh Rasmussen is from Denmark.".split(" "); Span[] singleSpan = new Span[] { new Span(3,6, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, OTHER, A_START, A_CONTINUE, A_LAST, OTHER, OTHER, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Anders' should be 'start' only, 'Fogh' is 'inside', " + "'Rasmussen' is 'last' and the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeAdjacentUnitSpans() { String[] sentence = "word PersonA PersonB word".split(" "); Span[] singleSpan = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, A_UNIT, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Both PersonA and PersonB are 'unit' tags", expected, acutal); }
@Test public void testEncodeAdjacentSingleSpans() { String[] sentence = "something PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); }
@Test public void testEncodeAdjacentSpans() { String[] sentence = "something PersonA PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); }
@Test public void testParsingGermanSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE); NameSample personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(5, personName.getSentence().length); Assert.assertEquals(0, personName.getNames().length); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); }