@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getSentence()), Arrays.hashCode(getTags())); }
public String[] tag(String[] sentence) { return sample.getTags(); }
@Override public String toString() { StringBuilder result = new StringBuilder(); for (int i = 0; i < getSentence().length; i++) { result.append(getSentence()[i]); result.append('_'); result.append(getTags()[i]); result.append(' '); } if (result.length() > 0) { // get rid of last space result.setLength(result.length() - 1); } return result.toString(); }
public List<String> tag(List<String> sentence) { return Arrays.asList(sample.getTags()); }
@Override protected Iterator<Event> createEvents(POSSample sample) { String[] sentence = sample.getSentence(); String[] tags = sample.getTags(); Object[] ac = sample.getAddictionalContext(); List<Event> events = generateEvents(sentence, tags, ac, cg); return events.iterator(); }
@Override public Sequence read() throws IOException { POSSample sample = psi.read(); if (sample != null) { String[] sentence = sample.getSentence(); String[] tags = sample.getTags(); Event[] events = new Event[sentence.length]; for (int i = 0; i < sentence.length; i++) { // it is safe to pass the tags as previous tags because // the context generator does not look for non predicted tags String[] context = pcg.getContext(i, sentence, tags, null); events[i] = new Event(tags[i], context); } Sequence<POSSample> sequence = new Sequence<POSSample>(events,sample); return sequence; } return null; }
/** * Tests if it can parse an empty tag. * */ @Test public void testParseEmtpyTag() throws InvalidFormatException { String sentence = "the_DT stories_"; POSSample sample = POSSample.parse(sentence); Assert.assertEquals(sample.getTags()[1], ""); }
/** * Evaluates the given reference {@link POSSample} object. * * This is done by tagging the sentence from the reference * {@link POSSample} with the {@link POSTagger}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link POSSample}. * * @return the predicted {@link POSSample}. */ @Override protected POSSample processSample(POSSample reference) { String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext()); String[] referenceTags = reference.getTags(); for (int i = 0; i < referenceTags.length; i++) { if (referenceTags[i].equals(predictedTags[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new POSSample(reference.getSentence(), predictedTags); }
@Test public void testPOSSampleSerDe() throws IOException { POSSample posSample = createGoldSample(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream); out.writeObject(posSample); out.flush(); byte[] bytes = byteArrayOutputStream.toByteArray(); ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes); ObjectInput objectInput = new ObjectInputStream(byteArrayInputStream); POSSample deSerializedPOSSample = null; try { deSerializedPOSSample = (POSSample) objectInput.readObject(); } catch (ClassNotFoundException e) { // do nothing } Assert.assertNotNull(deSerializedPOSSample); Assert.assertArrayEquals(posSample.getAddictionalContext(), deSerializedPOSSample.getAddictionalContext()); Assert.assertArrayEquals(posSample.getSentence(), deSerializedPOSSample.getSentence()); Assert.assertArrayEquals(posSample.getTags(), deSerializedPOSSample.getTags()); }
@Test public void evalChunkerModel() throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); POSTagger tagger = new POSTaggerME(new POSModel( new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin"))); Chunker chunker = new ChunkerME(new ChunkerModel( new File(getOpennlpDataDir(), "models-sf/en-chunker.bin"))); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText())); String[] chunks = chunker.chunk(sentence.getSentence(), sentence.getTags()); for (String chunk : chunks) { digest.update(chunk.getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"), new BigInteger(1, digest.digest())); }
/** * Tests if it can parse an empty {@link String}. */ @Test public void testParseEmptyString() throws InvalidFormatException { String sentence = ""; POSSample sample = POSSample.parse(sentence); Assert.assertEquals(sample.getSentence().length, 0); Assert.assertEquals(sample.getTags().length, 0); }
@Test public void testExpandME() throws IOException { // add one sentence with expandME = true try (ADPOSSampleStream stream = new ADPOSSampleStream( new PlainTextByLineStream(new ResourceAsStreamFactory( ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"), StandardCharsets.UTF_8), true, false)) { POSSample sample = stream.read(); Assert.assertEquals(27, sample.getSentence().length); Assert.assertEquals("Inicia", sample.getSentence()[0]); Assert.assertEquals("v-fin", sample.getTags()[0]); Assert.assertEquals("em", sample.getSentence()[1]); Assert.assertEquals("prp", sample.getTags()[1]); Assert.assertEquals("o", sample.getSentence()[2]); Assert.assertEquals("art", sample.getTags()[2]); Assert.assertEquals("Porto", sample.getSentence()[9]); Assert.assertEquals("B-prop", sample.getTags()[9]); Assert.assertEquals("Poesia", sample.getSentence()[10]); Assert.assertEquals("I-prop", sample.getTags()[10]); } }
@Test public void testIncludeFeats() throws IOException { // add one sentence with includeFeats = true try (ADPOSSampleStream stream = new ADPOSSampleStream( new PlainTextByLineStream(new ResourceAsStreamFactory( ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"), StandardCharsets.UTF_8), false, true)) { POSSample sample = stream.read(); Assert.assertEquals(23, sample.getSentence().length); Assert.assertEquals("Inicia", sample.getSentence()[0]); Assert.assertEquals("v-fin=PR=3S=IND=VFIN", sample.getTags()[0]); Assert.assertEquals("em", sample.getSentence()[1]); Assert.assertEquals("prp", sample.getTags()[1]); Assert.assertEquals("o", sample.getSentence()[2]); Assert.assertEquals("art=DET=M=S", sample.getTags()[2]); Assert.assertEquals("Porto_Poesia", sample.getSentence()[9]); Assert.assertEquals("prop=M=S", sample.getTags()[9]); } }
@Test public void testSimple() throws IOException { // add one sentence with expandME = includeFeats = false try (ADPOSSampleStream stream = new ADPOSSampleStream( new PlainTextByLineStream(new ResourceAsStreamFactory( ADParagraphStreamTest.class, "/opennlp/tools/formats/ad.sample"), StandardCharsets.UTF_8), false, false)) { POSSample sample = stream.read(); Assert.assertEquals(23, sample.getSentence().length); Assert.assertEquals("Inicia", sample.getSentence()[0]); Assert.assertEquals("v-fin", sample.getTags()[0]); Assert.assertEquals("em", sample.getSentence()[1]); Assert.assertEquals("prp", sample.getTags()[1]); Assert.assertEquals("o", sample.getSentence()[2]); Assert.assertEquals("art", sample.getTags()[2]); Assert.assertEquals("Porto_Poesia", sample.getSentence()[9]); Assert.assertEquals("prop", sample.getTags()[9]); } }
Assert.assertEquals(".", words[5]); String[] tags = sample.getTags(); Assert.assertEquals("x1", tags[0]); Assert.assertEquals("x2", tags[1]);