@Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append(lemma); sb.append(":"); sb.append(pos.name()); return sb.toString(); }
public static boolean isANoun(PartOfSpeech pos) { if (pos != null && pos.getCanonicalPosTag().equals(CanonicalPosTag.N)) { return true; } return false; }
@Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((lemma == null) ? 0 : lemma.hashCode()); result = prime * result + ((pos == null) ? 0 : pos.hashCode()); return result; }
public SetBasedPOSFilter(ConfigurationParams params) { try { this.relevantItems = new HashSet<CanonicalPosTag>(); for (String relevantPOS : params.getStringArray(Configuration.RELEVANT_POS_LIST)) this.relevantItems.add(CanonicalPosTag.valueOf(relevantPOS)); } catch (ConfigurationException e) { this.relevantItems = null; } }
public void setUnspecifiedPosTag(PartOfSpeech unspecifiedPosTag) throws UnsupportedPosTagStringException{ this.unspecifiedPosTag = new ByCanonicalPartOfSpeech(unspecifiedPosTag.getCanonicalPosTag().toString()); this.setPosTag(unspecifiedPosTag); }
@Override public void fromKey(String key) throws UndefinedKeyException { String[] toks = key.split(DELIMITER); if (toks.length != 2) throw new UndefinedKeyException("Cannot decode key " + key + " to a LemmaPos object, since it contains one or more serialization delimiters"); this.lemma = toks[0]; this.pos = CanonicalPosTag.valueOf(toks[1]); }
pos = CanonicalPosTag.OTHER.toString();
@Override public String toKey() throws UndefinedKeyException { if (lemma.contains(DELIMITER)) throw new UndefinedKeyException("Cannot encode lemma " + lemma + ", since it contains one or more serialization delimiters"); StringBuilder sb = new StringBuilder(); sb.append(lemma); sb.append(DELIMITER); sb.append(pos == null ? "*" : pos.name()); return sb.toString(); }
@Override public Set<String> toKeys() throws UndefinedKeyException { if (lemma.equals(DELIMITER)) throw new UndefinedKeyException("Cannot encode lemme " + lemma + ", since it contains one or more serialization delimiters"); Set<String> ret = new HashSet<String>(); if (pos == null) { for (CanonicalPosTag relpos : relevantPos) ret.add(lemma + DELIMITER + relpos); } else ret.add(lemma + DELIMITER + pos.name()); return ret; }
public LexicalIDM(LexicalSentenceProcessor sentPoc) throws UnsupportedPosTagStringException { m_nounPOS = new ByCanonicalPartOfSpeech(CanonicalPosTag.N.name()); m_lexicalSentenceProcessor = sentPoc; }
@Override public List<LexicalRule<? extends EmptyRuleInfo>> getRules(String leftLemma, PartOfSpeech leftPos, String rightLemma, PartOfSpeech rightPos) throws LexicalResourceException { List<LexicalRule<? extends EmptyRuleInfo>> ret = new ArrayList<LexicalRule<? extends EmptyRuleInfo>>(); //If it's not a noun, we ignore it... if ((leftPos !=null) && (!(leftPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) { return ret; } //If it's not a noun, we ignore it... if ((rightPos !=null) && (!(rightPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) { return ret; } try { for (String lemma2 : leftRules.get(leftLemma)) { if (lemma2.equals(rightLemma)) ret.add(new LexicalRule<EmptyRuleInfo>(leftLemma, NOUN, rightLemma, NOUN, null, GEO_RESOURCE_NAME, EMPTY_INFO)); } return ret; } catch (Exception e) { throw new LexicalResourceException("Exception while trying to get Rules For Both sides",e); } }
public ParenthesesExtractor(Lemmatizer lemmatizer) { m_lemmatizer=lemmatizer; m_logger = org.apache.log4j.Logger.getLogger(ParenthesesExtractor.class.getName()); try { m_lemmatizer.init(); m_nounPOS = new eu.excitementproject.eop.common.representation.partofspeech.ByCanonicalPartOfSpeech(eu.excitementproject.eop.common.representation.partofspeech.CanonicalPosTag.N.name()); } catch (UnsupportedPosTagStringException e) { m_logger.warn("should never happend because we only create CanonicalPosTag.NOUN . Exception:",e); } catch (LemmatizerException e) { m_logger.fatal("failed to initialize lemmatizer in ParenthesesExtractor",e); } }
public List<LexicalRule<? extends EmptyRuleInfo>> getRules(String lemma1, PartOfSpeech pos, RedisBasedStringListBasicMap rules) throws LexicalResourceException { List<LexicalRule<? extends EmptyRuleInfo>> ret = new ArrayList<LexicalRule<? extends EmptyRuleInfo>>(); //If it's not a noun, we ignore it... if ((pos !=null) && (!(pos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) { return ret; } //get all rules try { for (String lemma2 : rules.get(lemma1)) { ret.add(new LexicalRule<EmptyRuleInfo>(lemma1, NOUN, lemma2, NOUN, null, GEO_RESOURCE_NAME, EMPTY_INFO)); } return ret; } catch (Exception e) { throw new LexicalResourceException("Exception while trying to get rules",e); } }
public SyntacticIDM(SyntacticUtils utils) throws UnsupportedPosTagStringException, ParserRunException{ super(); m_logger = org.apache.log4j.Logger.getLogger(SyntacticIDM.class.getName()); m_treeParser = utils.getParserInstance(); m_utils = utils; m_nounPOS = new ByCanonicalPartOfSpeech(CanonicalPosTag.N.name()); }
public RedirectExtractor(Lemmatizer lemmatizer) { m_lemmatizer=lemmatizer; m_logger = org.apache.log4j.Logger.getLogger(RedirectExtractor.class.getName()); try { m_lemmatizer.init(); m_nounPOS = new eu.excitementproject.eop.common.representation.partofspeech.ByCanonicalPartOfSpeech(eu.excitementproject.eop.common.representation.partofspeech.CanonicalPosTag.N.name()); } catch (UnsupportedPosTagStringException e) { m_logger.warn("should never happend because we only create CanonicalPosTag.NOUN . Exception:",e); } catch (LemmatizerException e) { m_logger.fatal("failed to initialize lemmatizer in RedirectExtractor",e); } } @Override
public List<LexicalRule<? extends BaseRuleInfo>> getRules(String leftLemma, PartOfSpeech leftPos, String rightLemma, PartOfSpeech rightPos) throws LexicalResourceException { List<RuleData> rulesData; //If it's not a noun, we ignore it... if ((leftPos !=null) && (!(leftPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) { return new LinkedList<LexicalRule<? extends BaseRuleInfo>>(); } //If it's not a noun, we ignore it... if ((rightPos !=null) && (!(rightPos.getCanonicalPosTag().equals(CanonicalPosTag.N)))) { return new LinkedList<LexicalRule<? extends BaseRuleInfo>>(); } //get all rules try { rulesData = this.m_retrivalTool.getRulesForBothSides(leftLemma, rightLemma, this.m_classifier.getClassifierId()); } catch (Exception e) { throw new LexicalResourceException("Exception while trying to get Rules For Both sides",e); } return makeLexicalRules(rulesData); }
public CategoryExtractor(Lemmatizer lemmatizer) { m_lemmatizer=lemmatizer; m_logger = org.apache.log4j.Logger.getLogger(CategoryExtractor.class.getName()); try { m_lemmatizer.init(); m_nounPOS = new eu.excitementproject.eop.common.representation.partofspeech.ByCanonicalPartOfSpeech(eu.excitementproject.eop.common.representation.partofspeech.CanonicalPosTag.N.name()); } catch (UnsupportedPosTagStringException e) { e.printStackTrace(); } catch (LemmatizerException e) { m_logger.fatal("failed to initialize lemmatizer in CategoryExtractor",e); } }