public void createFeatures(List<String> feats, String[] toks, int index, String[] preds) { if (!Arrays.equals(this.cachedTokens, toks)) { this.cachedTokens = toks; this.cachedTags = this.posTagger.tag(toks); } feats.add("pos=" + this.cachedTags[index]); }
private void evalPosModel(POSModel model, BigInteger expectedHash) throws Exception { // break the input stream into sentences // The input stream is tokenized and can be processed here directly MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); POSTagger tagger = new POSTaggerME(model); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { String[] tags = tagger.tag(line.getText()); for (String tag : tags) { digest.update(tag.getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest())); }
/** * Evaluates the given reference {@link POSSample} object. * * This is done by tagging the sentence from the reference * {@link POSSample} with the {@link POSTagger}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link POSSample}. * * @return the predicted {@link POSSample}. */ @Override protected POSSample processSample(POSSample reference) { String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext()); String[] referenceTags = reference.getTags(); for (int i = 0; i < referenceTags.length; i++) { if (referenceTags[i].equals(predictedTags[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new POSSample(reference.getSentence(), predictedTags); }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }
@Test public void evalChunkerModel() throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); POSTagger tagger = new POSTaggerME(new POSModel( new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin"))); Chunker chunker = new ChunkerME(new ChunkerModel( new File(getOpennlpDataDir(), "models-sf/en-chunker.bin"))); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText())); String[] chunks = chunker.chunk(sentence.getSentence(), sentence.getTags()); for (String chunk : chunks) { digest.update(chunk.getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"), new BigInteger(1, digest.digest())); }
public void createFeatures(List<String> feats, String[] toks, int index, String[] preds) { if (!Arrays.equals(this.cachedTokens, toks)) { this.cachedTokens = toks; this.cachedTags = this.posTagger.tag(toks); } feats.add("pos=" + this.cachedTags[index]); }
public void createFeatures(List<String> feats, String[] toks, int index, String[] preds) { if (!Arrays.equals(this.cachedTokens, toks)) { this.cachedTokens = toks; this.cachedTags = this.posTagger.tag(toks); } feats.add("pos=" + this.cachedTags[index]); }
@Test public void testPOSTagger() throws IOException { POSModel posModel = trainPOSModel(ModelType.MAXENT); POSTagger tagger = new POSTaggerME(posModel); String[] tags = tagger.tag(new String[] { "The", "driver", "got", "badly", "injured", "."}); Assert.assertEquals(6, tags.length); Assert.assertEquals("DT", tags[0]); Assert.assertEquals("NN", tags[1]); Assert.assertEquals("VBD", tags[2]); Assert.assertEquals("RB", tags[3]); Assert.assertEquals("VBN", tags[4]); Assert.assertEquals(".", tags[5]); }
posTags = posTagger.tag(tokens); npChunkTags = chunker.chunk(tokens, posTags); } catch (NullPointerException e) {
posTags = posTagger.tag(tokens); npChunkTags = chunker.chunk(tokens, posTags); } catch (NullPointerException e) {
/** * Evaluates the given reference {@link POSSample} object. * * This is done by tagging the sentence from the reference * {@link POSSample} with the {@link POSTagger}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link POSSample}. * * @return the predicted {@link POSSample}. */ @Override protected POSSample processSample(POSSample reference) { String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext()); String[] referenceTags = reference.getTags(); for (int i = 0; i < referenceTags.length; i++) { if (referenceTags[i].equals(predictedTags[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new POSSample(reference.getSentence(), predictedTags); }
/** * Evaluates the given reference {@link POSSample} object. * * This is done by tagging the sentence from the reference * {@link POSSample} with the {@link POSTagger}. The * tags are then used to update the word accuracy score. * * @param reference the reference {@link POSSample}. * * @return the predicted {@link POSSample}. */ @Override protected POSSample processSample(POSSample reference) { String[] predictedTags = tagger.tag(reference.getSentence(), reference.getAddictionalContext()); String[] referenceTags = reference.getTags(); for (int i = 0; i < referenceTags.length; i++) { if (referenceTags[i].equals(predictedTags[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } return new POSSample(reference.getSentence(), predictedTags); }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }
String tags[] = posTagger.tag(sentence);
String[] tags = posTagger.tag(sentence);