if (t.getPos() != null && POS.getPartOfSpeech(t.getPos().charAt(0)) != null) { POS pos = POS.getPartOfSpeech(t.getPos().charAt(0)); List<String> stems = stemmer.findStems(t.getCoveredText(), pos); if (!stems.isEmpty()) { if (wnWord != null) { WordnetDictTerm wdt = new WordnetDictTerm(jCas, t.getBegin(), t.getEnd()); wdt.setDictCanon(stems.get(0)); wdt.setEntityId(wnWord.getID().toString()); for (POS pos : POS.values()) { List<String> stems = stemmer.findStems( t.getCoveredText(), pos); if (!stems.isEmpty()) { IIndexWord wnWord = dict.getIndexWord(stems.get(0), if (wnWord != null) { WordnetDictTerm wdt = new WordnetDictTerm(jCas, t.getBegin(), t.getEnd()); wdt.setDictCanon(stems.get(0)); wdt.setEntityId(wnWord.getID().toString()); + t.getCoveredText() + "< [" + t.getBegin() + ":" + t.getEnd() + "] from doc " + getHeaderDocId(jCas));
public FeatureStructure createFS(int addr, CASImpl cas) { if (Token_Type.this.useExistingInstance) { // Return eq fs instance if already created FeatureStructure fs = Token_Type.this.jcas.getJfsFromCaddr(addr); if (null == fs) { fs = new Token(addr, Token_Type.this); Token_Type.this.jcas.putJfsFromCaddr(addr, fs); return fs; } return fs; } else return new Token(addr, Token_Type.this); } };
/** * This process(JCas) method cycles through all annotations in the CAS. For * those that are identified as tokens by {@link AnnotationDataExtractor} * implementation being used, an attempt is made to extract part-of-speech * information. The covered text for each token is then lemmatized using the * {@link BioLemmatizer}, using the part-of-speech information if it was * available. */ @Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Token t : JCasUtil.select(jCas, Token.class)) { String pos = BlueCasUtil.getSinglePosTag(t); String lemma = lemmatize(t.getCoveredText(), pos); if (lemma != null) t.setLemmaStr(lemma); } }
if (a instanceof Token) { final Token token = (Token) a; if (prevToken != null && prevToken.getEnd() < token.getBegin()) { states.put(prevToken.getEnd(), new State(prevToken.getEnd())); states.put(token.getBegin(), new State(token.getBegin())); transitions.put(prevToken.getEnd(), new Transition(0, prevToken.getEnd(), token.getBegin(), null));
for (int i = 0; i < allBrs.length; i++) { if (allBrs[i] != null && token.getEnd() > allBrs[i].getBegin()) { coveringBr = allBrs[i]; allBrs[i] = null; while (!endOfBR && tokenIt.hasNext()) { Token nextT = tokenIt.next(); if (nextT.getEnd() >= coveringBr.getEnd()) endOfBR = true; feats[POS] = token.getPos(); feats[ENTITY_TYPE] = BR_LABEL; feats[FORM] = token.getCoveredText(); feats[LEMMA] = token.getLemmaStr();// FIXME ensure lemma feats[POS] = token.getPos(); feats[ENTITY_TYPE] = Word.OTHER_LABEL; feats[LABEL] = Word.OTHER_LABEL;
t.getCoveredText()); data.add(malletToken); malletToken.setFeatureValue(PROPERTY_POS + t.getPos(), 1.0); if (t.getLemmaStr() != null && t.getLemmaStr().length() > 1) malletToken.setFeatureValue( PROPERTY_LEMMA + t.getLemmaStr(), 1.0);
return "Token[" + t.getCoveredText() + "]";
normalized = ((Token) a).getLemmaStr(); if (!caseSensitive){ normalized = normalized.toLowerCase();