@Override public void process(JCas jcas) throws AnalysisEngineProcessException { // Create a dummy IdentifiedAnnotation in the type system // If the BaseToken Part Of Speech is a Noun Collection<BaseToken> tokens = JCasUtil.select(jcas, BaseToken.class); for (BaseToken token : tokens) { if (saveAnnotation && token.getPartOfSpeech() != null && token.getPartOfSpeech().startsWith("N")) { IdentifiedAnnotation ann = new IdentifiedAnnotation(jcas); ann.setBegin(token.getBegin()); ann.setEnd(token.getEnd()); ann.addToIndexes(); if (printAnnotation) { LOG.info("Token:" + token.getCoveredText() + " POS:" + token.getPartOfSpeech()); } } } }
static private BaseToken createBaseToken( final JCas jCas, final String className ) { try { final Class<?> clazz = Class.forName( className ); if ( BaseToken.class.isAssignableFrom( clazz ) ) { final Constructor<?> constructor = clazz.getConstructor( JCas.class ); return (BaseToken)constructor.newInstance( jCas ); } else { LOGGER.error( "Cannot create a ctakes base token for class " + className ); } } catch ( ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException multE ) { LOGGER.error( "Cannot determine ctakes base token type for class " + className ); } return new BaseToken( jCas ); }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public BaseToken(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
public void buildTokenBoundaryMap() { tokenBeginEndTreeSet = new TreeSet<Integer>(); AnnotationIndex<Annotation> annotationIndex = jcas.getAnnotationIndex(BaseToken.type); for (Annotation current : annotationIndex) { BaseToken bt = (BaseToken)current; // filter out NewlineToken if (!(bt instanceof NewlineToken)) { int begin = bt.getBegin(); int end = bt.getEnd(); tokenBeginEndTreeSet.add(begin); tokenBeginEndTreeSet.add(end); } } }
newGoldToken = new WordToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(ContractionToken.class.getName())) newGoldToken = new ContractionToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(NewlineToken.class.getName())) newGoldToken = new NewlineToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(NumToken.class.getName())) newGoldToken = new NumToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(PunctuationToken.class.getName())) newGoldToken = new PunctuationToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(SymbolToken.class.getName())) newGoldToken = new SymbolToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(BaseToken.class.getName())) newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); newGoldToken.setPartOfSpeech(oldSystemToken.getPartOfSpeech()); newGoldToken.setTokenNumber(oldSystemToken.getTokenNumber()); newGoldToken.addToIndexes();
@SuppressWarnings("null") @Override public void process(JCas jCas) throws AnalysisEngineProcessException { ArrayList<BaseToken> tokens = new ArrayList<BaseToken>(JCasUtil.select(jCas, BaseToken.class)); String lastKey = null; BaseToken lastToken = null; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); String key = token.getCoveredText().toLowerCase(); if(cueWords.containsKey(key)){ addCuePhrase(jCas, key, token.getBegin(), token.getEnd()); } if(i > 0){ String twoKey = lastKey + " " + key; if(cueWords.containsKey(twoKey)){ addCuePhrase(jCas, twoKey, lastToken.getBegin(), token.getEnd()); } } lastToken = token; lastKey = key; } }
/** * @param baseToken some token * @return a part of speech text representation if the basetoken is a word token, else "" */ static private String getTokenPos( final BaseToken baseToken ) { if ( !(baseToken instanceof WordToken) ) { return ""; } // We are only interested in tokens that are -words- final String tokenPos = baseToken.getPartOfSpeech(); if ( tokenPos == null ) { return ""; } return tokenPos; }
tokenTree.addChild(new SimpleTree(token.getPartOfSpeech())); if(token.getEnd() <= arg1.getEnd()){ arg1Tree.addChild(tokenTree); }else if(token.getBegin() >= arg2.getBegin()){ arg2Tree.addChild(tokenTree); }else{
if(coveredTokens.size() == 1 && coveredTokens.get(0).getPartOfSpeech() != null && coveredTokens.get(0).getPartOfSpeech().startsWith("PRP") && !markable.getCoveredText().toLowerCase().equals("it")){ toRemove.add(markable); }else if(coveredTokens.size() > 0 && (coveredTokens.get(0).getCoveredText().startsWith("Mr.") || coveredTokens.get(0).getCoveredText().startsWith("Dr.") || coveredTokens.get(0).getCoveredText().startsWith("Mrs.") || coveredTokens.get(0).getCoveredText().startsWith("Ms.") || coveredTokens.get(0).getCoveredText().startsWith("Miss"))){ toRemove.add(markable); }else if(markable.getCoveredText().toLowerCase().endsWith("patient") || markable.getCoveredText().toLowerCase().equals("pt")){
public CharacterTokenAdapter(BaseToken bta) { super(bta); iv_char = bta.getCoveredText().charAt(0); }
BaseToken baseToken = new BaseToken(jCas, wordStart, wordEnd); if (!loadWordsOnly) { String tag = token.substring(split + 1); baseToken.setPartOfSpeech(tag); baseToken.setTokenNumber(wordNumber++); baseToken.addToIndexes();
@Override public synchronized void process( JCas jCas ) throws AnalysisEngineProcessException { LOGGER.info( "Dependency parser starting with thread:" + Thread.currentThread().getName() ); for ( Sentence sentence : JCasUtil.select( jCas, Sentence.class ) ) { List<BaseToken> printableTokens = new ArrayList<>(); for ( BaseToken token : JCasUtil.selectCovered( jCas, BaseToken.class, sentence ) ) { if ( token instanceof NewlineToken ) continue; printableTokens.add( token ); } if ( printableTokens.isEmpty() ) { // If there are no printable tokens then #convert fails continue; } DEPTree tree = new DEPTree(); // Convert CAS data into structures usable by ClearNLP for ( int i = 0; i < printableTokens.size(); i++ ) { BaseToken token = printableTokens.get( i ); String lemma = useLemmatizer ? lemmatizer.getLemma( token.getCoveredText(), token.getPartOfSpeech() ) : token.getNormalizedForm(); DEPNode node = new DEPNode( i + 1, token.getCoveredText(), lemma, token.getPartOfSpeech(), new DEPFeat() ); tree.add( node ); } // Run parser and convert output back to CAS friendly data types synchronized(LOCK){ parser.process( tree ); ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert( jCas, tree, sentence, printableTokens ); DependencyUtility.addToIndexes( jCas, nodes ); } } LOGGER.info( "Dependency parser ending with thread:" + Thread.currentThread().getName() ); }
words[i] = printableTokens.get(i).getCoveredText(); BaseToken token = printableTokens.get(i); String posTag = wordTagList[i]; token.setPartOfSpeech(posTag);
BaseToken.class.cast(bta).addToIndexes(); }else{ throw new AnalysisEngineProcessException("Token returned cannot be cast as BaseToken", new Object[]{bta}); if (bta.getBegin()>=rangeBegin && bta.getBegin()<rangeEnd) { bta.setTokenNumber(tokenCount); tokenCount++;
/** Internal - constructor used by generator * @generated * @param addr low level Feature Structure reference * @param type the type of this Feature Structure */ public BaseToken(int addr, TOP_Type type) { super(addr, type); readObject(); }
/** * A utility method that tokenizes a range of text. */ protected void annotateRange(JCas jcas, int beginPos, int endPos) throws AnalysisEngineProcessException { String text = jcas.getDocumentText().substring(beginPos, endPos); List<Token> tokens = null; try { tokens = tokenizer.tokenizeAndSort(text); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } Iterator<Token> tokenItr = tokens.iterator(); while (tokenItr.hasNext()) { Token token = tokenItr.next(); // convert token into JCas object BaseToken bta = TokenConverter.convert(token, jcas, beginPos); bta.setTokenNumber(tokenCount); // add JCas object to CAS index bta.addToIndexes(); tokenCount++; } } }
public BaseToken parseResource( final JCas jCas, final Basic resource ) { String className = ""; String pos = ""; final CodeableConcept codeableConcept = resource.getCode(); for ( Coding coding : codeableConcept.getCoding() ) { final String system = coding.getSystem(); if ( system.equals( CODING_TYPE_SYSTEM ) ) { className = coding.getCode(); } else if ( system.equals( CODING_PART_OF_SPEECH ) ) { pos = coding.getCode(); } } final BaseToken baseToken = createBaseToken( jCas, className ); baseToken.setPartOfSpeech( pos ); addTextSpan( baseToken, resource, LOGGER ); return baseToken; }
public void buildTokenBoundaryMap() { tokenBeginEndTreeSet = new TreeSet<Integer>(); AnnotationIndex<Annotation> annotationIndex = jcas.getAnnotationIndex(BaseToken.type); for (Annotation current : annotationIndex) { BaseToken bt = (BaseToken)current; // filter out NewlineToken if (!(bt instanceof NewlineToken)) { int begin = bt.getBegin(); int end = bt.getEnd(); tokenBeginEndTreeSet.add(begin); tokenBeginEndTreeSet.add(end); } } }
newGoldToken = new WordToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(ContractionToken.class.getName())) newGoldToken = new ContractionToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(NewlineToken.class.getName())) newGoldToken = new NewlineToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(NumToken.class.getName())) newGoldToken = new NumToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(PunctuationToken.class.getName())) newGoldToken = new PunctuationToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(SymbolToken.class.getName())) newGoldToken = new SymbolToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(BaseToken.class.getName())) newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else newGoldToken = new BaseToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); newGoldToken.setPartOfSpeech(oldSystemToken.getPartOfSpeech()); newGoldToken.setTokenNumber(oldSystemToken.getTokenNumber()); newGoldToken.addToIndexes();
@SuppressWarnings("null") @Override public void process(JCas jCas) throws AnalysisEngineProcessException { ArrayList<BaseToken> tokens = new ArrayList<BaseToken>(JCasUtil.select(jCas, BaseToken.class)); String lastKey = null; BaseToken lastToken = null; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); String key = token.getCoveredText().toLowerCase(); if(cueWords.containsKey(key)){ addCuePhrase(jCas, key, token.getBegin(), token.getEnd()); } if(i > 0){ String twoKey = lastKey + " " + key; if(cueWords.containsKey(twoKey)){ addCuePhrase(jCas, twoKey, lastToken.getBegin(), token.getEnd()); } } lastToken = token; lastKey = key; } }