/** * Creates OpenNLP name finder * @param nameType the entity type recognised by the given NER model * @param nerModelPath path to ner model */ public OpenNLPNameFinder(String nameType, String nerModelPath) { this.nameTypes = Collections.singleton(nameType); this.nameType = nameType; InputStream nerModelStream = getClass().getClassLoader().getResourceAsStream(nerModelPath); try { if (nerModelStream != null){ TokenNameFinderModel model = new TokenNameFinderModel(nerModelStream); this.nameFinder = new NameFinderME(model); this.available = true; } else { LOG.warn("Couldn't find model from {} using class loader", nerModelPath); } } catch (IOException e) { LOG.error(e.getMessage(), e); } finally { IOUtils.closeQuietly(nerModelStream); } LOG.info("{} NER : Available for service ? {}", nameType, available); }
try { TokenNameFinderModel model = new TokenNameFinderModel(modelUrl); this.nameFinder = new NameFinderME(model); } catch (Exception e) { LOG.warn("Named Entity Extractor setup failed: {}", e.getMessage(), e);
/** * Initializes the current instance with the given context. * <p> * Note: Do all initialization in this method, do not use the constructor. */ public void initialize() throws ResourceInitializationException { super.initialize(); TokenNameFinderModel model; try { TokenNameFinderModelResource modelResource = (TokenNameFinderModelResource) context.getResourceObject(UimaUtil.MODEL_PARAMETER); model = modelResource.getModel(); } catch (ResourceAccessException e) { throw new ResourceInitializationException(e); } mNameFinder = new NameFinderME(model); }
nameFinders = new TokenNameFinder[] {new NameFinderME( new TokenNameFinderModel(new File(args[args.length - 1])))};
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events); return events; }
new NameFinderME(model), listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()]));
private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash) throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); TokenNameFinder nameFinder = new NameFinderME(model); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { Span[] names = nameFinder.find(line.getText()); for (Span name : names) { digest.update((name.getType() + name.getStart() + name.getEnd()).getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest())); }
nameFinders[i] = new NameFinderME(model);
private void eval(TokenNameFinderModel model, File testData, LANGUAGE lang, int types, double expectedFMeasure) throws IOException { ObjectStream<NameSample> samples = new Conll02NameSampleStream( lang, new MarkableFileInputStreamFactory(testData), types); TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator(new NameFinderME(model)); evaluator.evaluate(samples); Assert.assertEquals(expectedFMeasure, evaluator.getFMeasure().getFMeasure(), 0.0001); }
new NameFinderME(model), listeners);
@Test public void testOnlyWithNamesTypeOverride() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]); Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]); Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNames() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, DEFAULT), names1[0]); Assert.assertEquals(new Span(2, 4, DEFAULT), names1[1]); Assert.assertEquals(new Span(4, 6, DEFAULT), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
public void init(NamedEntityRecognizer.OpenNLPClassifier classifier) throws IOException { InputStream stream = null; try { this.model = NamedEntityRecognizer.loadClassifierModel(classifier); this.finder = new NameFinderME(model); } finally { if (stream != null) { stream.close(); } } }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithEntitiesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT"); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = "NATO United States Barack Obama".split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNamesWithTypes.train. * The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNamesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, "person"), names1[0]); Assert.assertEquals(new Span(2, 4, "person"), names1[1]); Assert.assertEquals(new Span(4, 6, "person"), names1[2]); Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); TokenNameFinder nameFinder = new NameFinderME(nameFinderModel);
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel);
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel);
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events); return events; }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events); return events; }