public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) { this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); }
/** * Creates a {@link TokenNameFinderFactory} that provides the default implementation * of the resources. */ public TokenNameFinderFactory() { this.seqCodec = new BioCodec(); }
public SequenceCodec<String> getSequenceCodec() { return this.getFactory().getSequenceCodec(); }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events); return events; }
@Override protected TokenNameFinderModel loadModel(InputStream modelIn) throws IOException { return new TokenNameFinderModel(modelIn); }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
@Test public void testNoFilter() throws IOException { final String[] types = new String[] {}; filter = new NameSampleTypeFilter(types, sampleStream(text)); NameSample ns = filter.read(); Assert.assertEquals(0, ns.getNames().length); }
protected Span[] find(CAS cas, String[] tokens) { return mNameFinder.find(tokens); }
@Override protected String[] toSentence(NameSample sample) { return sample.getSentence(); } }
/** * Returns a RegexNamefinder that will utilize specified default regexes. * * @param defaults the OpenNLP default regexes * @return {@link RegexNameFinder} */ public static synchronized RegexNameFinder getDefaultRegexNameFinders( DEFAULT_REGEX_NAME_FINDER... defaults) { Objects.requireNonNull(defaults, "defaults must not be null"); return new RegexNameFinder(defaultsToMap(defaults)); }
public NameSampleSequenceStream(ObjectStream<NameSample> psi, AdaptiveFeatureGenerator featureGen, boolean useOutcomes) throws IOException { this(psi, new DefaultNameContextGenerator(featureGen), useOutcomes); }
/** * Forgets all adaptive data which was collected during previous calls to one * of the find methods. * * This method is typical called at the end of a document. */ public void clearAdaptiveData() { contextGenerator.clearAdaptiveData(); }
/** * NEW. This method removes the need for tokenization, but returns the Span * with character indices, rather than word. * * @param text * @return */ public Span[] find(String text) { return getAnnotations(text); }
@Override public SequenceValidator<String> createSequenceValidator() { return new BilouNameFinderSequenceValidator(); }
public TokenNameFinderFactory(byte[] featureGeneratorBytes, final Map<String, Object> resources, SequenceCodec<String> seqCodec) { init(featureGeneratorBytes, resources, seqCodec); }
public static NameSample createPredSample() { return createSimpleNameSample(false); } }
@Override protected TokenNameFinderModel loadModel(InputStream in) throws IOException { return new TokenNameFinderModel(in); }
public TokenNameFinderCrossValidator(String languageCode, String type, TrainingParameters trainParams, byte[] featureGeneratorBytes, Map<String, Object> resources, TokenNameFinderEvaluationMonitor... listeners) { this(languageCode, type, trainParams, featureGeneratorBytes, resources, new BioCodec(), listeners); }
public NameSampleSequenceStream(ObjectStream<NameSample> psi, AdaptiveFeatureGenerator featureGen) throws IOException { this(psi, new DefaultNameContextGenerator(featureGen), true); }
public NameSampleSequenceStream(ObjectStream<NameSample> psi, NameContextGenerator pcg, boolean useOutcomes) throws IOException { this(psi, pcg, useOutcomes, new BioCodec()); }