private static NodeInfo buildNodeInfo(JCas jcas, Token tokenAnno, int serial) throws CasTreeConverterException, UnsupportedPosTagStringException { String word = tokenAnno.getCoveredText(); String lemma = tokenAnno.getLemma().getValue(); String pos = tokenAnno.getPos().getPosValue(); // We rely on the fact the NamedEntity enum values have the same names as the ones // specified in the DKPro mapping (e.g. PERSON, ORGANIZATION) eu.excitementproject.eop.common.representation.parse.representation.basic.NamedEntity namedEntity=null; List<NamedEntity> namedEntities = JCasUtil.selectCovered(NamedEntity.class, tokenAnno); switch (namedEntities.size()) { case 0: break; // if no NER - ignore and move on case 1: namedEntity = eu.excitementproject.eop.common.representation.parse.representation.basic.NamedEntity.valueOf(namedEntities.get(0).getValue()); break; default: throw new CasTreeConverterException(String.format("Got %d NamedEntity annotations for token %s", namedEntities.size(), tokenAnno)); } return new DefaultNodeInfo(word, lemma, serial, namedEntity, new DefaultSyntacticInfo(new PennPartOfSpeech(pos))); }
/** * Get a string POS, and return it in a {@link SyntacticInfo} object. If possible, it uses {@link PennPartOfSpeech} instead of * {@link UnspecifiedPartOfSpeech}. * @param partOfSpeech string representation * @return SyntacticInfo object made out of the string * @throws CompilationException */ public static SyntacticInfo stringToSyntacticInfo(String partOfSpeech) throws CompilationException { SyntacticInfo syntacticInfo = null; if (partOfSpeech != null) { PartOfSpeech pos; if (partOfSpeech.equals(WildcardPartOfSpeech.WILDCARD_POS_STR)) pos = WildcardPartOfSpeech.getWildcardPOS(); else { partOfSpeech = partOfSpeech.toUpperCase(); if (BySimplerCanonicalPartOfSpeech.SIMPLER_CANONICAL_POS_TAG_STRINGS.contains(partOfSpeech)) try { pos = new BySimplerCanonicalPartOfSpeech(partOfSpeech); } catch (UnsupportedPosTagStringException e) { throw new CompilationException("Error reading this part of speech: " + partOfSpeech, e); } else try { pos = new PennPartOfSpeech(partOfSpeech); } catch ( UnsupportedPosTagStringException e) { throw new CompilationException("Error reading this part of speech: " + partOfSpeech + ". It's probably neigher a canonical POS nor a Penn POS, and should be conformed to one of them.", e); } } syntacticInfo = new DefaultSyntacticInfo(pos); } return syntacticInfo; }
public void construct() throws UnsupportedPosTagStringException { bidiMapOriginalToGenerated = new SimpleBidirectionalMap<ExtendedNode, ExtendedNode>(); affectedNodes = new LinkedHashSet<ExtendedNode>(); copyEntities(); entity2.addChild(entity1); ExtendedNode beNode = new ExtendedNode(new ExtendedInfo( new DefaultInfo("IS_A_be", new DefaultNodeInfo("be", "be", 0, null, new DefaultSyntacticInfo(new PennPartOfSpeech(PennPosTag.VBZ))), new DefaultEdgeInfo(new DependencyRelation("cop", null))), AdditionalInformationServices.emptyInformation())); entity2.addChild(beNode); ExtendedNode punctNode = new ExtendedNode(new ExtendedInfo( new DefaultInfo("IS_A_punct", new DefaultNodeInfo(".", ".", 0, null, new DefaultSyntacticInfo(new PennPartOfSpeech("."))), new DefaultEdgeInfo(new DependencyRelation("punct", null))), AdditionalInformationServices.emptyInformation())); entity2.addChild(punctNode); generatedTree = entity2; affectedNodes.add(beNode); affectedNodes.add(punctNode); affectedNodes.add(entity1); affectedNodes.add(entity2); }
node = new BasicConstructionNode(new DefaultInfo(counterString,new DefaultNodeInfo(word, lemma, serial, null, new DefaultSyntacticInfo(new PennPartOfSpeech(pos))),edgeInfo));
taggedWord.setTag("NNP"); posTaggedTokens.add(new PosTaggedToken(taggedWord.word(), new PennPartOfSpeech(taggedWord.tag()))); } catch (UnsupportedPosTagStringException e) { throw new PosTaggerException("MaxentTagger pos tag set don't comply with the Penn pos tag set. What is this supposed to be? "
PennPartOfSpeech partOfSpeech = new PennPartOfSpeech(tokenAnno.getPos().getPosValue()); PosTaggedToken taggedToken = new PosTaggedToken(tokenText, partOfSpeech); taggedTokens.add(taggedToken);