@Override protected TokenNameFinderModel loadModel(InputStream modelIn) throws IOException { return new TokenNameFinderModel(modelIn); }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
private void init(Object nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries, SequenceCodec<String> seqCodec) { Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); manifest.put(SEQUENCE_CODEC_CLASS_NAME_PARAMETER, seqCodec.getClass().getName()); artifactMap.put(MAXENT_MODEL_ENTRY_NAME, nameFinderModel); if (generatorDescriptor != null && generatorDescriptor.length > 0) artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, generatorDescriptor); if (resources != null) { // The resource map must not contain key which are already taken // like the name finder maxent model name if (resources.containsKey(MAXENT_MODEL_ENTRY_NAME) || resources.containsKey(GENERATOR_DESCRIPTOR_ENTRY_NAME)) { throw new IllegalArgumentException(); } // TODO: Add checks to not put resources where no serializer exists, // make that case fail here, should be done in the BaseModel artifactMap.putAll(resources); } checkArtifactMap(); }
@Override protected TokenNameFinder produceResource(InputStream aStream) throws Exception { TokenNameFinderModel model = new TokenNameFinderModel(aStream); if (printTagSet) { OpenNlpSequenceTagsetDescriptionProvider tsdp = new OpenNlpSequenceTagsetDescriptionProvider( null, NamedEntity.class, model.getNameFinderSequenceModel()); tsdp.setTagSplitPattern("-(?=[^-]*$)"); // FIXME addTagset(tsdp) getContext().getLogger().log(INFO, tsdp.toString()); } return new NameFinderME(model); } };
private boolean hasOtherAsOutcome(TokenNameFinderModel nameFinderModel) { SequenceClassificationModel<String> model = nameFinderModel.getNameFinderSequenceModel(); String[] outcomes = model.getOutcomes(); for (String outcome : outcomes) { if (outcome.equals(NameFinderME.OTHER)) { return true; } } return false; }
@SuppressWarnings("rawtypes") @Override protected void createArtifactSerializers( Map<String,ArtifactSerializer> serializers) { super.createArtifactSerializers(serializers); PooledGenericModelSerializer.register(serializers); } }
public SequenceCodec<String> getSequenceCodec() { return this.getFactory().getSequenceCodec(); }
@SuppressWarnings("rawtypes") @Override protected void createArtifactSerializers( Map<String,ArtifactSerializer> serializers) { super.createArtifactSerializers(serializers); PooledGenericModelSerializer.register(serializers); } }
private boolean isModelValid(MaxentModel model) { String[] outcomes = new String[model.getNumOutcomes()]; for (int i = 0; i < model.getNumOutcomes(); i++) { outcomes[i] = model.getOutcome(i); } return getFactory().createSequenceCodec().areOutcomesCompatible(outcomes); }
@Override protected TokenNameFinderModel loadModel(InputStream in) throws IOException { return new TokenNameFinderModel(in); }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
Map<String, Object> resources = new HashMap<String, Object>(); @SuppressWarnings("rawtypes") Map<String, ArtifactSerializer> artifactSerializers = TokenNameFinderModel.createArtifactSerializers();
public SequenceCodec<String> getSequenceCodec() { return this.getFactory().getSequenceCodec(); }
private void init(Object nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries, SequenceCodec<String> seqCodec) { Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); manifest.put(SEQUENCE_CODEC_CLASS_NAME_PARAMETER, seqCodec.getClass().getName()); artifactMap.put(MAXENT_MODEL_ENTRY_NAME, nameFinderModel); if (generatorDescriptor != null && generatorDescriptor.length > 0) artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, generatorDescriptor); if (resources != null) { // The resource map must not contain key which are already taken // like the name finder maxent model name if (resources.containsKey(MAXENT_MODEL_ENTRY_NAME) || resources.containsKey(GENERATOR_DESCRIPTOR_ENTRY_NAME)) { throw new IllegalArgumentException(); } // TODO: Add checks to not put resources where no serializer exists, // make that case fail here, should be done in the BaseModel artifactMap.putAll(resources); } checkArtifactMap(); }
/** * Creates OpenNLP name finder * @param nameType the entity type recognised by the given NER model * @param nerModelPath path to ner model */ public OpenNLPNameFinder(String nameType, String nerModelPath) { this.nameTypes = Collections.singleton(nameType); this.nameType = nameType; InputStream nerModelStream = getClass().getClassLoader().getResourceAsStream(nerModelPath); try { if (nerModelStream != null){ TokenNameFinderModel model = new TokenNameFinderModel(nerModelStream); this.nameFinder = new NameFinderME(model); this.available = true; } else { LOG.warn("Couldn't find model from {} using class loader", nerModelPath); } } catch (IOException e) { LOG.error(e.getMessage(), e); } finally { IOUtils.closeQuietly(nerModelStream); } LOG.info("{} NER : Available for service ? {}", nameType, available); }
public NameFinderME(TokenNameFinderModel model) { TokenNameFinderFactory factory = model.getFactory(); seqCodec = factory.createSequenceCodec(); sequenceValidator = seqCodec.createSequenceValidator(); this.model = model.getNameFinderSequenceModel(); contextGenerator = factory.createContextGenerator(); // TODO: We should deprecate this. And come up with a better solution! contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); }
public SequenceCodec<String> getSequenceCodec() { return this.getFactory().getSequenceCodec(); }
private void init(Object nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries, SequenceCodec<String> seqCodec) { Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY); manifest.put(SEQUENCE_CODEC_CLASS_NAME_PARAMETER, seqCodec.getClass().getName()); artifactMap.put(MAXENT_MODEL_ENTRY_NAME, nameFinderModel); if (generatorDescriptor != null && generatorDescriptor.length > 0) artifactMap.put(GENERATOR_DESCRIPTOR_ENTRY_NAME, generatorDescriptor); if (resources != null) { // The resource map must not contain key which are already taken // like the name finder maxent model name if (resources.containsKey(MAXENT_MODEL_ENTRY_NAME) || resources.containsKey(GENERATOR_DESCRIPTOR_ENTRY_NAME)) { throw new IllegalArgumentException(); } // TODO: Add checks to not put resources where no serializer exists, // make that case fail here, should be done in the BaseModel artifactMap.putAll(resources); } checkArtifactMap(); }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events); return events; }
private boolean isModelValid(MaxentModel model) { String[] outcomes = new String[model.getNumOutcomes()]; for (int i = 0; i < model.getNumOutcomes(); i++) { outcomes[i] = model.getOutcome(i); } return getFactory().createSequenceCodec().areOutcomesCompatible(outcomes); }