public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
@Override protected POSModel loadModel(InputStream modelIn) throws IOException { return new POSModel(modelIn); } }
@Override protected POSModel loadModel(InputStream in) throws IOException { return new POSModel(in); } }
public POSModel create(InputStream in) throws IOException { POSModel posModel = new POSModel(new UncloseableInputStream(in)); // The 1.6.x models write the non-default beam size into the model itself. // In 1.5.x the parser configured the beam size when the model was loaded, // this is not possible anymore with the new APIs Version version = posModel.getVersion(); if (version.getMajor() == 1 && version.getMinor() == 5) { if (posModel.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER) == null) { Map<String, String> manifestInfoEntries = new HashMap<>(); // The version in the model must be correct or otherwise version // dependent code branches in other places fail manifestInfoEntries.put("OpenNLP-Version", "1.5.0"); posModel = new POSModel(posModel.getLanguage(), posModel.getPosModel(), 10, manifestInfoEntries, posModel.getFactory()); } } return posModel; }
@Test public void testPOSModelSerializationMaxent() throws IOException { POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.MAXENT); ByteArrayOutputStream out = new ByteArrayOutputStream(); try { posModel.serialize(out); } finally { out.close(); } POSModel recreatedPosModel = new POSModel(new ByteArrayInputStream(out.toByteArray())); // TODO: add equals to pos model }
@Test public void evalMaxentModel() throws Exception { POSModel maxentModel = new POSModel( new File(getOpennlpDataDir(), "models-sf/en-pos-maxent.bin")); evalPosModel(maxentModel, new BigInteger("231995214522232523777090597594904492687")); }
@Test public void evalPerceptronModel() throws Exception { POSModel perceptronModel = new POSModel( new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin")); evalPosModel(perceptronModel, new BigInteger("209440430718727101220960491543652921728")); }
@Test public void testPOSModelSerializationPerceptron() throws IOException { POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.PERCEPTRON); ByteArrayOutputStream out = new ByteArrayOutputStream(); try { posModel.serialize(out); } finally { out.close(); } POSModel recreatedPosModel = new POSModel(new ByteArrayInputStream(out.toByteArray())); // TODO: add equals to pos model } }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }
@Test public void evalChunkerModel() throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); POSTagger tagger = new POSTaggerME(new POSModel( new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin"))); Chunker chunker = new ChunkerME(new ChunkerModel( new File(getOpennlpDataDir(), "models-sf/en-chunker.bin"))); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText())); String[] chunks = chunker.chunk(sentence.getSentence(), sentence.getTags()); for (String chunk : chunks) { digest.update(chunk.getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"), new BigInteger(1, digest.digest())); }
@Override protected POSModel loadModel(InputStream modelIn) throws IOException { return new POSModel(modelIn); } }
@Override protected POSModel loadModel(InputStream modelIn) throws IOException { return new POSModel(modelIn); } }
@Test public void testPOSTaggerWithDefaultFactory() throws IOException { POSDictionary posDict = POSDictionary.create(POSDictionaryTest.class .getResourceAsStream("TagDictionaryCaseSensitive.xml")); POSModel posModel = trainPOSModel(new POSTaggerFactory(null, null, posDict)); POSTaggerFactory factory = posModel.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() != null); Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator); ByteArrayOutputStream out = new ByteArrayOutputStream(); posModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); POSModel fromSerialized = new POSModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() != null); Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator); }
@Test public void testPOSTaggerWithCustomFactory() throws IOException { DummyPOSDictionary posDict = new DummyPOSDictionary( POSDictionary.create(POSDictionaryTest.class .getResourceAsStream("TagDictionaryCaseSensitive.xml"))); POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(posDict)); POSTaggerFactory factory = posModel.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator); ByteArrayOutputStream out = new ByteArrayOutputStream(); posModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); POSModel fromSerialized = new POSModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator); }
public static POSTagger getDefaultPosTagger() throws IOException { return new POSTaggerME(new POSModel( getResourceAsStream(taggerModelFile))); }
/** * * @param model Trained opennlp model (en-pos-maxent.bin) * @throws IOException */ public OpenNlpPosTagger(InputStream model) throws IOException { final POSModel posModel = new POSModel(model); tagger = ThreadLocal.withInitial(() -> new POSTaggerME(posModel)); }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }