@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getSentence()), Arrays.hashCode(getNames()), Arrays.hashCode(getAdditionalContext()), isClearAdaptiveDataSet()); }
private void statsAdd(NameSample reference, NameSample prediction) { String[] refTags = sequenceCodec.encode(reference.getNames(), reference.getSentence().length); String[] predTags = sequenceCodec.encode(prediction.getNames(), prediction.getSentence().length); // we don' want it to compute token frequency, so we pass an array of empty strings instead // of tokens getStats().add(new String[reference.getSentence().length], refTags, predTags); }
@Test public void testMissingRightContraction() throws IOException { Assert.assertEquals(new Span(0, 1, "person"), samples.get(7).getNames()[0]); Assert.assertEquals(new Span(3, 4, "person"), samples.get(7).getNames()[1]); Assert.assertEquals(new Span(5, 6, "person"), samples.get(7).getNames()[2]); }
@Test public void testSingleFilter() throws IOException { final String[] types = new String[] {organization}; filter = new NameSampleTypeFilter(types, sampleStream(text)); NameSample ns = filter.read(); Assert.assertEquals(1, ns.getNames().length); Assert.assertEquals(organization, ns.getNames()[0].getType()); }
@Test public void testEncodeNoNames() { NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual); }
@Test public void testEncodeSingleTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] spans = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, spans, true); String[] expected = new String[] {OTHER, OTHER, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual); }
@Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeDoubleTokenSpanNoType() { final String DEFAULT_START = "default" + "-" + BioCodec.START; final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE; String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, null)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeSingleUnitTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] singleSpan = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_LAST, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 'last' " + "and the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeTripleTokenSpan() { String[] sentence = "Secretary - General Anders Fogh Rasmussen is from Denmark.".split(" "); Span[] singleSpan = new Span[] { new Span(3,6, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, OTHER, A_START, A_CONTINUE, A_LAST, OTHER, OTHER, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Anders' should be 'start' only, 'Fogh' is 'inside', " + "'Rasmussen' is 'last' and the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeAdjacentSingleSpans() { String[] sentence = "something PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); }
@Test public void testEncodeAdjacentSpans() { String[] sentence = "something PersonA PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); }
@Test public void testEncodeAdjacentUnitSpans() { String[] sentence = "word PersonA PersonB word".split(" "); Span[] singleSpan = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, A_UNIT, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Both PersonA and PersonB are 'unit' tags", expected, acutal); }
@Test public void testNoFilter() throws IOException { final String[] types = new String[] {}; filter = new NameSampleTypeFilter(types, sampleStream(text)); NameSample ns = filter.read(); Assert.assertEquals(0, ns.getNames().length); }
@Test public void testPositive() { OutputStream stream = new ByteArrayOutputStream(); TokenNameFinderEvaluationMonitor listener = new NameEvaluationErrorListener(stream); Span[] pred = createSimpleNameSampleA().getNames(); TokenNameFinderEvaluator eval = new TokenNameFinderEvaluator(new DummyNameFinder(pred), listener); eval.evaluateSample(createSimpleNameSampleA()); Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0); Assert.assertEquals(0, stream.toString().length()); }
@Test public void testNegative() { OutputStream stream = new ByteArrayOutputStream(); TokenNameFinderEvaluationMonitor listener = new NameEvaluationErrorListener(stream); Span[] pred = createSimpleNameSampleB().getNames(); TokenNameFinderEvaluator eval = new TokenNameFinderEvaluator(new DummyNameFinder(pred), listener); eval.evaluateSample(createSimpleNameSampleA()); Assert.assertEquals(0.8, eval.getFMeasure().getFMeasure(), 0.0); Assert.assertNotSame(0, stream.toString().length()); }
@Test public void testParsingGermanSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE); NameSample personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(5, personName.getSentence().length); Assert.assertEquals(0, personName.getNames().length); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); }
@Test public void testParsingDutchSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample"); NameSample personName = sampleStream.read(); Assert.assertEquals(0, personName.getNames().length); Assert.assertTrue(personName.isClearAdaptiveDataSet()); personName = sampleStream.read(); Assert.assertFalse(personName.isClearAdaptiveDataSet()); Assert.assertNull(sampleStream.read()); }