public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
/** * Retrieves the ngram dictionary. * * @return ngram dictionary or null if not used */ public Dictionary getNgramDictionary() { if (getFactory() != null) return getFactory().getDictionary(); return null; }
@Override protected POSModel loadModel(InputStream in) throws IOException { return new POSModel(in); } }
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { Sequence<POSSample> pss = sequence; POSTagger tagger = new POSTaggerME(new POSModel("x-unspecified", model, null, new POSTaggerFactory())); String[] sentence = pss.getSource().getSentence(); Object[] ac = pss.getSource().getAddictionalContext(); String[] tags = tagger.tag(pss.getSource().getSentence()); Event[] events = new Event[sentence.length]; POSSampleEventStream.generateEvents(sentence, tags, ac, pcg) .toArray(events); return events; }
partsOfSpeech = tagger.tag(tokens);
@Override protected Iterator<Event> createEvents(POSSample sample) { String[] sentence = sample.getSentence(); String[] tags = sample.getTags(); Object[] ac = sample.getAddictionalContext(); List<Event> events = generateEvents(sentence, tags, ac, cg); return events.iterator(); }
public SequenceValidator<String> getSequenceValidator() { return new DefaultPOSSequenceValidator(getTagDictionary()); }
public static DummyPOSDictionary create( UncloseableInputStream uncloseableInputStream) throws IOException { return new DummyPOSDictionary(POSDictionary.create(uncloseableInputStream)); }
@Override protected String[] toSentence(POSSample sample) { return sample.getSentence(); } }
public TagDictionary createEmptyTagDictionary() { this.posDictionary = new POSDictionary(true); return this.posDictionary; } }
public void serialize(POSModel artifact, OutputStream out) throws IOException { artifact.serialize(out); } }
public static POSSample createGoldSample() throws InvalidFormatException { String sentence = "the_DT stories_NNS about_IN well-heeled_JJ " + "communities_NNS and_CC developers_NNS"; return POSSample.parse(sentence); }
@Override protected void init(Dictionary ngramDictionary, TagDictionary posDictionary) { super.init(ngramDictionary, null); this.dict = posDictionary; }
/** * Initializes the current instance with given samples * and a {@link DefaultPOSContextGenerator}. * @param samples */ public POSSampleEventStream(ObjectStream<POSSample> samples) { this(samples, new DefaultPOSContextGenerator(null)); }
public static List<Event> generateEvents(String[] sentence, String[] tags, POSContextGenerator cg) { return generateEvents(sentence, tags, null, cg); } }
public POSTaggerFactory(byte[] featureGeneratorBytes, final Map<String, Object> resources, TagDictionary posDictionary) { this.featureGeneratorBytes = featureGeneratorBytes; if (this.featureGeneratorBytes == null) { this.featureGeneratorBytes = loadDefaultFeatureGeneratorBytes(); } this.resources = resources; this.posDictionary = posDictionary; }
public void serialize(DummyPOSDictionary artifact, OutputStream out) throws IOException { artifact.serialize(out); } }
public void startup() throws Exception { super.startup(); InputStream modelStream = MaryProperties.needStream(propertyPrefix + "model"); InputStream posMapperStream = MaryProperties.getStream(propertyPrefix + "posMap"); tagger = new POSTaggerME(new POSModel(modelStream)); modelStream.close(); if (posMapperStream != null) { posMapper = new HashMap<String, String>(); BufferedReader br = new BufferedReader(new InputStreamReader(posMapperStream, "UTF-8")); String line; while ((line = br.readLine()) != null) { // skip comments and empty lines if (line.startsWith("#") || line.trim().equals("")) continue; // Entry format: POS GPOS, i.e. two space-separated entries per line StringTokenizer st = new StringTokenizer(line); String pos = st.nextToken(); String gpos = st.nextToken(); posMapper.put(pos, gpos); } posMapperStream.close(); } }
partsOfSpeech = tagger.tag(tokens);
@Override protected POSModel loadModel(InputStream modelIn) throws IOException { return new POSModel(modelIn); } }