public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
@Test public void testPOSModelSerializationMaxent() throws IOException { POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.MAXENT); ByteArrayOutputStream out = new ByteArrayOutputStream(); try { posModel.serialize(out); } finally { out.close(); } POSModel recreatedPosModel = new POSModel(new ByteArrayInputStream(out.toByteArray())); // TODO: add equals to pos model }
public POSModel create(InputStream in) throws IOException { POSModel posModel = new POSModel(new UncloseableInputStream(in)); // The 1.6.x models write the non-default beam size into the model itself. // In 1.5.x the parser configured the beam size when the model was loaded, // this is not possible anymore with the new APIs Version version = posModel.getVersion(); if (version.getMajor() == 1 && version.getMinor() == 5) { if (posModel.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER) == null) { Map<String, String> manifestInfoEntries = new HashMap<>(); // The version in the model must be correct or otherwise version // dependent code branches in other places fail manifestInfoEntries.put("OpenNLP-Version", "1.5.0"); posModel = new POSModel(posModel.getLanguage(), posModel.getPosModel(), 10, manifestInfoEntries, posModel.getFactory()); } } return posModel; }
/** * Initializes the current instance with the provided model. * * @param model */ public POSTaggerME(POSModel model) { POSTaggerFactory factory = model.getFactory(); int beamSize = POSTaggerME.DEFAULT_BEAM_SIZE; String beamSizeString = model.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER); if (beamSizeString != null) { beamSize = Integer.parseInt(beamSizeString); } modelPackage = model; contextGen = factory.getPOSContextGenerator(beamSize); tagDictionary = factory.getTagDictionary(); size = beamSize; sequenceValidator = factory.getSequenceValidator(); if (model.getPosSequenceModel() != null) { this.model = model.getPosSequenceModel(); } else { this.model = new opennlp.tools.ml.BeamSearch<>(beamSize, model.getPosModel(), 0); } }
@Test public void testPOSTaggerWithDefaultFactory() throws IOException { POSDictionary posDict = POSDictionary.create(POSDictionaryTest.class .getResourceAsStream("TagDictionaryCaseSensitive.xml")); POSModel posModel = trainPOSModel(new POSTaggerFactory(null, null, posDict)); POSTaggerFactory factory = posModel.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() != null); Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator); ByteArrayOutputStream out = new ByteArrayOutputStream(); posModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); POSModel fromSerialized = new POSModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof POSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() != null); Assert.assertTrue(factory.getSequenceValidator() instanceof DefaultPOSSequenceValidator); }
@Override protected POSTaggerME produceResource(InputStream aStream) throws Exception { // Load the POS tagger model from the location the model provider offers POSModel model = new POSModel(aStream); // end::model-provider-decl[] // Extract tagset information from the model OpenNlpTagsetDescriptionProvider tsdp = new OpenNlpTagsetDescriptionProvider( getResourceMetaData().getProperty("pos.tagset"), POS.class, model.getPosModel()); if (getResourceMetaData().containsKey("pos.tagset.tagSplitPattern")) { tsdp.setTagSplitPattern(getResourceMetaData().getProperty( "pos.tagset.tagSplitPattern")); } addTagset(tsdp); if (printTagSet) { getContext().getLogger().log(INFO, tsdp.toString()); } // tag::model-provider-decl[] // Create a new POS tagger instance from the loaded model return new POSTaggerME(model); } };
public void serialize(POSModel artifact, OutputStream out) throws IOException { artifact.serialize(out); } }
private POSTagger getPOSTagger(String language) { String modelName = languageConfig.getParameter(language,MODEL_NAME_PARAM); try { POSModel model; if(modelName == null){ //use the default model = openNLP.getPartOfSpeechModel(language); } else { model = openNLP.getModel(POSModel.class, modelName, null); } if(model != null) { log.debug("POS Tagger Model {} for lanugage '{}' version: {}", new Object[]{model.getClass().getSimpleName(), model.getLanguage(), model.getVersion() != null ? model.getVersion() : "undefined"}); return new POSTaggerME(model); } } catch (Exception e) { log.warn("Unable to load POS model for language '"+language+"'!",e); } log.debug("POS tagging Model for Language '{}' not available.", language); return null; }
public String[] getOrderedTags(List<String> words, List<String> tags, int index,double[] tprobs) { if (modelPackage.getPosModel() != null) { MaxentModel posModel = modelPackage.getPosModel(); double[] probs = posModel.eval(contextGen.getContext(index, words.toArray(new String[words.size()]), tags.toArray(new String[tags.size()]),null)); String[] orderedTags = new String[probs.length]; for (int i = 0; i < probs.length; i++) { int max = 0; for (int ti = 1; ti < probs.length; ti++) { if (probs[ti] > probs[max]) { max = ti; } } orderedTags[i] = posModel.getOutcome(max); if (tprobs != null) { tprobs[i] = probs[max]; } probs[max] = 0; } return orderedTags; } else { throw new UnsupportedOperationException("This method can only be called if the " + "classifcation model is an event model!"); } }
/** * Retrieves the ngram dictionary. * * @return ngram dictionary or null if not used */ public Dictionary getNgramDictionary() { if (getFactory() != null) return getFactory().getDictionary(); return null; }
@Test public void testPOSTaggerWithCustomFactory() throws IOException { DummyPOSDictionary posDict = new DummyPOSDictionary( POSDictionary.create(POSDictionaryTest.class .getResourceAsStream("TagDictionaryCaseSensitive.xml"))); POSModel posModel = trainPOSModel(new DummyPOSTaggerFactory(posDict)); POSTaggerFactory factory = posModel.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator); ByteArrayOutputStream out = new ByteArrayOutputStream(); posModel.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); POSModel fromSerialized = new POSModel(in); factory = fromSerialized.getFactory(); Assert.assertTrue(factory.getTagDictionary() instanceof DummyPOSDictionary); Assert.assertTrue(factory.getPOSContextGenerator() instanceof DummyPOSContextGenerator); Assert.assertTrue(factory.getSequenceValidator() instanceof DummyPOSSequenceValidator); }
/** * Initializes the current instance with the provided model. * * @param model */ public POSTaggerME(POSModel model) { POSTaggerFactory factory = model.getFactory(); int beamSize = POSTaggerME.DEFAULT_BEAM_SIZE; String beamSizeString = model.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER); if (beamSizeString != null) { beamSize = Integer.parseInt(beamSizeString); } modelPackage = model; contextGen = factory.getPOSContextGenerator(beamSize); tagDictionary = factory.getTagDictionary(); size = beamSize; sequenceValidator = factory.getSequenceValidator(); if (model.getPosSequenceModel() != null) { this.model = model.getPosSequenceModel(); } else { this.model = new opennlp.tools.ml.BeamSearch<>(beamSize, model.getPosModel(), 0); } }
public void serialize(POSModel artifact, OutputStream out) throws IOException { artifact.serialize(out); } }
public String[] getOrderedTags(List<String> words, List<String> tags, int index,double[] tprobs) { if (modelPackage.getPosModel() != null) { MaxentModel posModel = modelPackage.getPosModel(); double[] probs = posModel.eval(contextGen.getContext(index, words.toArray(new String[words.size()]), tags.toArray(new String[tags.size()]),null)); String[] orderedTags = new String[probs.length]; for (int i = 0; i < probs.length; i++) { int max = 0; for (int ti = 1; ti < probs.length; ti++) { if (probs[ti] > probs[max]) { max = ti; } } orderedTags[i] = posModel.getOutcome(max); if (tprobs != null) { tprobs[i] = probs[max]; } probs[max] = 0; } return orderedTags; } else { throw new UnsupportedOperationException("This method can only be called if the " + "classifcation model is an event model!"); } }
/** * Retrieves the ngram dictionary. * * @return ngram dictionary or null if not used */ public Dictionary getNgramDictionary() { if (getFactory() != null) return getFactory().getDictionary(); return null; }
public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
public POSModel create(InputStream in) throws IOException { POSModel posModel = new POSModel(new UncloseableInputStream(in)); // The 1.6.x models write the non-default beam size into the model itself. // In 1.5.x the parser configured the beam size when the model was loaded, // this is not possible anymore with the new APIs Version version = posModel.getVersion(); if (version.getMajor() == 1 && version.getMinor() == 5) { if (posModel.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER) == null) { Map<String, String> manifestInfoEntries = new HashMap<>(); // The version in the model must be correct or otherwise version // dependent code branches in other places fail manifestInfoEntries.put("OpenNLP-Version", "1.5.0"); posModel = new POSModel(posModel.getLanguage(), posModel.getPosModel(), 10, manifestInfoEntries, posModel.getFactory()); } } return posModel; }
@Test public void testPOSModelSerializationPerceptron() throws IOException { POSModel posModel = POSTaggerMETest.trainPOSModel(ModelType.PERCEPTRON); ByteArrayOutputStream out = new ByteArrayOutputStream(); try { posModel.serialize(out); } finally { out.close(); } POSModel recreatedPosModel = new POSModel(new ByteArrayInputStream(out.toByteArray())); // TODO: add equals to pos model } }
/** * Initializes the current instance with the provided model. * * @param model */ public POSTaggerME(POSModel model) { POSTaggerFactory factory = model.getFactory(); int beamSize = POSTaggerME.DEFAULT_BEAM_SIZE; String beamSizeString = model.getManifestProperty(BeamSearch.BEAM_SIZE_PARAMETER); if (beamSizeString != null) { beamSize = Integer.parseInt(beamSizeString); } modelPackage = model; contextGen = factory.getPOSContextGenerator(beamSize); tagDictionary = factory.getTagDictionary(); size = beamSize; sequenceValidator = factory.getSequenceValidator(); if (model.getPosSequenceModel() != null) { this.model = model.getPosSequenceModel(); } else { this.model = new opennlp.tools.ml.BeamSearch<>(beamSize, model.getPosModel(), 0); } }
File posModelFile = new File(resourcesFolder.toFile(),"pos-model.bin"); posModel.serialize(posModelFile);