protected static Set<String> wordsInTree(ExtendedNode tree) { Set<String> ret = new LinkedHashSet<String>(); Set<ExtendedNode> setNodes = AbstractNodeUtils.treeToLinkedHashSet(tree); for (ExtendedNode node : setNodes) { if (InfoObservations.infoHasLemma(node.getInfo())) { String lemma = InfoGetFields.getLemma(node.getInfo()); ret.add(lemma); ret.addAll(StringUtil.stringToWords(lemma)); } } return ret; }
public static boolean infoIsContentWord(Info info) { boolean ret = false; if (infoHasLemma(info)) { String lemma = InfoGetFields.getLemma(info); if (!NON_CONTENT_WORDS.contains(lemma)) { PartOfSpeech posObject = InfoGetFields.getPartOfSpeechObject(info); SimplerCanonicalPosTag canonicalPos = SimplerCanonicalPosTag.OTHER; if (posObject!=null) canonicalPos=simplerPos(posObject.getCanonicalPosTag()); if (SimplerCanonicalPosTag.VERB.equals(canonicalPos)) { return infoIsContentVerb(info); } else if (CONTENT_CANONICAL_PART_OF_SPEECH.contains(canonicalPos)) { ret = true; } } } return ret; }
private boolean nodeIsContentWord(ExtendedNode node) { return InfoObservations.infoIsContentWord(node.getInfo()); }
featureVector.putAll(originalFeatureVector); if (InfoObservations.infoHasLemma(insertSpec.getHypothesisNodeToInsert().getInfo())) if (InfoObservations.insertOnlyLexModOfMultiWord(insertSpec.getHypothesisNodeToInsert().getInfo(), insertSpec.getTextNodeToBeParent().getInfo())) boolean isNamedEntity = InfoObservations.infoIsNamedEntity(insertSpec.getHypothesisNodeToInsert().getInfo()); if (isNamedEntity) insertSpec.addDescription("Named Entity"); boolean contentVerb = InfoObservations.infoIsContentVerb(insertSpec.getHypothesisNodeToInsert().getInfo()); boolean contentWord = InfoObservations.infoIsContentWord(insertSpec.getHypothesisNodeToInsert().getInfo());
if (InfoObservations.infoIsContentVerb(node.getInfo())) if (InfoObservations.infoIsContentVerb(node.getInfo())) if (InfoObservations.infoIsContentVerb(path.getTo().getInfo())) ret = true;
if (InfoObservations.infoIsContentVerb(path.getLeastCommonAncestor().getInfo()))
private static boolean isVerb(Info info) { boolean ret = false; if (InfoObservations.infoHasLemma(info)) { PartOfSpeech pos = InfoGetFields.getPartOfSpeechObject(info); if (simplerPos(pos.getCanonicalPosTag())==SimplerCanonicalPosTag.VERB) ret = true; } return ret; }
private Set<String> contentLemmasOfNodes_lowerCase(Iterable<S> nodes, Set<S> exclude) { Set<String> ret = new LinkedHashSet<>(); for (S node : nodes) { if (!setContains(exclude,node)) { if (InfoObservations.infoIsContentWord(node.getInfo())) { String lemma = InfoGetFields.getLemma(node.getInfo()).toLowerCase(); if (!(stopWords.contains(lemma))) { ret.add(lemma); } } } } return ret; }
public static boolean insertOnlyLexModOfMultiWord(Info insertedChildInfo, Info parentInfo) { boolean ret = false; if (InfoGetFields.getRelation(insertedChildInfo).equals(MINIPAR_LEX_MOD_RELATION)) { if (infoHasLemma(insertedChildInfo)) { String parentLemma = InfoGetFields.getLemma(parentInfo); List<String> parentWords = StringUtil.stringToWords(parentLemma); String childLemma = InfoGetFields.getLemma(insertedChildInfo); for (String parentWord : parentWords) { if (parentWord.equalsIgnoreCase(childLemma)) { ret = true; break; } } } } return ret; }
(InfoObservations.infoIsContentWord(node.getInfo())) && (InfoObservations.infoIsContentWord(corefNode.getInfo())) )
private void findUsingMaps() { specs = new LinkedHashSet<SubstituteNodeSpecificationMultiWord>(); for (ExtendedNode textNode : textNodesToMultiWord.keySet()) { for (ExtendedNode hypothesisNode : hypothesisNodesToMultiWord.keySet()) { if ( InfoObservations.infoHasLemma(hypothesisNode.getInfo()) && (!(InfoGetFields.getLemma(textNode.getInfo()).equals(InfoGetFields.getLemma(hypothesisNode.getInfo())))) ) { List<String> textNodeWords = textNodesToMultiWord.get(textNode); List<String> hypothesisNodeWords = hypothesisNodesToMultiWord.get(hypothesisNode); Collection<String> intersection = Utils.intersect(textNodeWords, hypothesisNodeWords, new LinkedList<String>()); if (intersection.size()>0) { SubstituteNodeSpecificationMultiWord spec = new SubstituteNodeSpecificationMultiWord(textNode, hypothesisNode.getInfo().getNodeInfo(), textNode.getInfo().getAdditionalNodeInformation(),textNodeWords,hypothesisNodeWords); specs.add(spec); } } } } }
if (InfoObservations.infoHasLemma(hypothesisNode.getInfo()))
private void constructSetOfLemmas() throws TeEngineMlException { lemmas = new LinkedHashSet<LemmaAndPos>(); for (ExtendedNode node : AbstractNodeUtils.treeToSet(treeAndParentMap.getTree())) { if (InfoObservations.infoHasLemma(node.getInfo())) { String lemma = InfoGetFields.getLemma(node.getInfo()); PartOfSpeech pos = InfoGetFields.getPartOfSpeechObject(node.getInfo()); LemmaAndPos lemmaAndPos = new LemmaAndPos(lemma, pos); lemmas.add(lemmaAndPos); } } }
public static Set<LemmaAndPos> lemmasAndPosesInTree(ExtendedNode tree) throws TeEngineMlException { Set<LemmaAndPos> ret = new LinkedHashSet<LemmaAndPos>(); Set<ExtendedNode> setNodes = AbstractNodeUtils.treeToLinkedHashSet(tree); for (ExtendedNode node : setNodes) { if (InfoObservations.infoHasLemma(node.getInfo())) { String lemma = InfoGetFields.getLemma(node.getInfo()); PartOfSpeech pos = InfoGetFields.getPartOfSpeechObject(node.getInfo()); ret.add(new LemmaAndPos(lemma, pos)); for (String oneLemma : StringUtil.stringToWords(lemma)) { ret.add(new LemmaAndPos(oneLemma, pos)); } } } return ret; }
public static boolean infoIsContentVerb(Info info) { boolean ret = false; if (infoHasLemma(info)) { PartOfSpeech posObject = InfoGetFields.getPartOfSpeechObject(info); if (simplerPos(posObject.getCanonicalPosTag())==SimplerCanonicalPosTag.VERB) { String lemma = InfoGetFields.getLemma(info); if (!StringUtil.setContainsIgnoreCase(NON_CONTENT_VERBS, lemma)) { if (!StringUtil.setContainsIgnoreCase(NON_CONTENT_VERBS_VERBS_ONLY, lemma)) { ret = true; } } } } return ret; }
private void findSpecs() throws TeEngineMlException, RuleBaseException { this.specs = new LinkedHashSet<RuleSubstituteNodeSpecification<T>>(); for (ExtendedNode node : TreeIterator.iterableTree(treeAndParentMap.getTree())) { if (isRelevantNode(node)) { if (InfoObservations.infoHasLemma(node.getInfo())) { LemmaAndPos lemmaAndPos = new LemmaAndPos(InfoGetFields.getLemma(node.getInfo()), InfoGetFields.getPartOfSpeechObject(node.getInfo())); ImmutableSet<T> rules = getRulesForLemmaAndPos(lemmaAndPos); for (T rule : rules) { if (isRelevantRule(rule)) { NodeInfo newNodeInfo = new DefaultNodeInfo(rule.getRhsLemma(), rule.getRhsLemma(), node.getInfo().getNodeInfo().getSerial(), node.getInfo().getNodeInfo().getNamedEntityAnnotation(), new DefaultSyntacticInfo(rule.getRhsPos())); RuleSubstituteNodeSpecification<T> spec = createSpec(node, newNodeInfo,ruleBaseName, rule); // new RuleSubstituteNodeSpecification<T>(node, newNodeInfo, node.getInfo().getAdditionalNodeInformation(), rule.getConfidence(), ruleBaseName, rule); addAdditionalDescription(spec,rule); specs.add(spec); } } } } } }