public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
/** * Initializes a new instance. * * @param aPosModel a POSTagger model. */ public POSTaggerNameFeatureGenerator(POSModel aPosModel) { this.posTagger = new POSTaggerME(aPosModel); }
new opennlp.tools.postag.POSTaggerME(model), missclassifiedListener, reportListener);
public Parser(ParserModel model, int beamSize, double advancePercentage) { this(model.getBuildModel(), model.getCheckModel(), new POSTaggerME(model.getParserTaggerModel()), new ChunkerME(model.getParserChunkerModel()), model.getHeadRules(), beamSize, advancePercentage); }
private void evalPosModel(POSModel model, BigInteger expectedHash) throws Exception { // break the input stream into sentences // The input stream is tokenized and can be processed here directly MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); POSTagger tagger = new POSTaggerME(model); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { String[] tags = tagger.tag(line.getText()); for (String tag : tags) { digest.update(tag.getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest())); }
public Parser(ParserModel model, int beamSize, double advancePercentage) { this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(), new POSTaggerME(model.getParserTaggerModel()), new ChunkerME(model.getParserChunkerModel()), model.getHeadRules(), beamSize, advancePercentage); }
/** * Initializes the current instance with the given context. * <p> * Note: Do all initialization in this method, do not use the constructor. */ @Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.context = context; this.logger = context.getLogger(); if (this.logger.isLoggable(Level.INFO)) { this.logger.log(Level.INFO, "Initializing the OpenNLP " + "Part of Speech annotator."); } POSModel model; try { POSModelResource modelResource = (POSModelResource) context .getResourceObject(UimaUtil.MODEL_PARAMETER); model = modelResource.getModel(); } catch (ResourceAccessException e) { throw new ResourceInitializationException(e); } Integer beamSize = AnnotatorUtil.getOptionalIntegerParameter(context, UimaUtil.BEAM_SIZE_PARAMETER); if (beamSize == null) { beamSize = POSTaggerME.DEFAULT_BEAM_SIZE; } this.posTagger = new POSTaggerME(model); }
private void eval(POSModel model, File testData, double expectedAccuracy) throws IOException { ObjectStream<POSSample> samples = new ConllXPOSSampleStream( new MarkableFileInputStreamFactory(testData), StandardCharsets.UTF_8); POSEvaluator evaluator = new POSEvaluator(new POSTaggerME(model)); evaluator.evaluate(samples); Assert.assertEquals(expectedAccuracy, evaluator.getWordAccuracy(), 0.0001); }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }
@Test public void evalChunkerModel() throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); POSTagger tagger = new POSTaggerME(new POSModel( new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin"))); Chunker chunker = new ChunkerME(new ChunkerModel( new File(getOpennlpDataDir(), "models-sf/en-chunker.bin"))); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText())); String[] chunks = chunker.chunk(sentence.getSentence(), sentence.getTags()); for (String chunk : chunks) { digest.update(chunk.getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(new BigInteger("226003515785585284478071030961407561943"), new BigInteger(1, digest.digest())); }
POSTaggerME tagger = new POSTaggerME(model);
params, this.factory); POSEvaluator evaluator = new POSEvaluator(new POSTaggerME(model), listeners);
public OpenNlpTokenExtraction() throws IOException { InputStream modelIn = this.getClass().getClassLoader().getResourceAsStream("en-pos-maxent.bin"); POSModel model = new POSModel(modelIn); posTagger = new POSTaggerME(model); }
/** * Construct the POSModelResource from the inputstream. * @param in the input stream * @throws IOException io exception */ public POSModelResource(InputStream in) throws IOException { posModel = new POSModel(in); posTagger = new POSTaggerME(posModel); }
@Test public void testPOSTagger() throws IOException { POSModel posModel = trainPOSModel(ModelType.MAXENT); POSTagger tagger = new POSTaggerME(posModel); String[] tags = tagger.tag(new String[] { "The", "driver", "got", "badly", "injured", "."}); Assert.assertEquals(6, tags.length); Assert.assertEquals("DT", tags[0]); Assert.assertEquals("NN", tags[1]); Assert.assertEquals("VBD", tags[2]); Assert.assertEquals("RB", tags[3]); Assert.assertEquals("VBN", tags[4]); Assert.assertEquals(".", tags[5]); }
/** * Creates a tagger. * @param modelFile The part of speech model file. * @param sentenceDetector A {@link SentenceDetector}. * @param tokenizer A {@link Tokenizer}. * @throws ModelLoaderException */ public DefaultPartsOfSpeechTagger(String modelPath, StandardModelManifest modelManifest, ModelValidator validator) throws ModelLoaderException { LocalModelLoader<POSModel> posModelLoader = new LocalModelLoader<POSModel>(validator, modelPath); POSModel model = posModelLoader.getModel(modelManifest, POSModel.class); tagger = new POSTaggerME(model); }
public Parser(ParserModel model, int beamSize, double advancePercentage) { this(model.getBuildModel(), model.getCheckModel(), new POSTaggerME(model.getParserTaggerModel()), new ChunkerME(model.getParserChunkerModel()), model.getHeadRules(), beamSize, advancePercentage); }
public Parser(ParserModel model, int beamSize, double advancePercentage) { this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(), new POSTaggerME(model.getParserTaggerModel()), new ChunkerME(model.getParserChunkerModel()), model.getHeadRules(), beamSize, advancePercentage); }
public Parser(ParserModel model, int beamSize, double advancePercentage) { this(model.getBuildModel(), model.getAttachModel(), model.getCheckModel(), new POSTaggerME(model.getParserTaggerModel()), new ChunkerME(model.getParserChunkerModel()), model.getHeadRules(), beamSize, advancePercentage); }