private void removeLearnedPatterns(String label, Collection<E> pats) { Counters.removeKeys(this.learnedPatterns.get(label), pats); for(Map.Entry<Integer, Counter<E>> en: this.learnedPatternsEachIter.get(label).entrySet()) Counters.removeKeys(en.getValue(), pats); if (wordsPatExtracted.containsKey(label)) for (Entry<CandidatePhrase, ClassicCounter<E>> en : this.wordsPatExtracted.get(label).entrySet()) { Counters.removeKeys(en.getValue(), pats); } }
public static<A,B> void divideInPlace(TwoDimensionalCounter<A, B> counter, double divisor) { for(Entry<A, ClassicCounter<B>> c: counter.entrySet()){ Counters.divideInPlace(c.getValue(), divisor); } counter.recomputeTotal(); }
private Set<E> enforceMinSupportRequirements(TwoDimensionalCounter<E, CandidatePhrase> patternsandWords4Label, TwoDimensionalCounter<E, CandidatePhrase> unLabeledPatternsandWords4Label) { Set<E> remove = new HashSet<>(); for (Entry<E, ClassicCounter<CandidatePhrase>> en : patternsandWords4Label.entrySet()) { if (en.getValue().size() < constVars.minPosPhraseSupportForPat) { remove.add(en.getKey()); } } int numRemoved = remove.size(); Redwood.log(Redwood.DBG, "Removing " + numRemoved + " patterns that do not meet minPosPhraseSupportForPat requirement of >= " + constVars.minPosPhraseSupportForPat); for (Entry<E, ClassicCounter<CandidatePhrase>> en : unLabeledPatternsandWords4Label.entrySet()) { if (en.getValue().size() < constVars.minUnlabPhraseSupportForPat) { remove.add(en.getKey()); } } Redwood.log(Redwood.DBG, "Removing " + (remove.size() - numRemoved) + " patterns that do not meet minUnlabPhraseSupportForPat requirement of >= " + constVars.minUnlabPhraseSupportForPat); return remove; }
@Override public Counter<CandidatePhrase> scorePhrases(String label, TwoDimensionalCounter<CandidatePhrase, E> terms, TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted, Counter<E> allSelectedPatterns, Set<CandidatePhrase> alreadyIdentifiedWords, boolean forLearningPatterns) throws IOException, ClassNotFoundException { getAllLabeledWordsCluster(); Counter<CandidatePhrase> scores = new ClassicCounter<>(); edu.stanford.nlp.classify.Classifier classifier = learnClassifier(label, forLearningPatterns, wordsPatExtracted, allSelectedPatterns); for (Entry<CandidatePhrase, ClassicCounter<E>> en : terms.entrySet()) { Double score = this.scoreUsingClassifer(classifier, en.getKey(), label, forLearningPatterns, en.getValue(), allSelectedPatterns); if(!score.isNaN() && !score.isInfinite()){ scores.setCount(en.getKey(), score); }else Redwood.log(Redwood.DBG, "Ignoring " + en.getKey() + " because score is " + score); } return scores; }
/** * Removes all entries with counts below the given threshold, returning the * set of removed entries. * * @param counter The counter. * @param countThreshold * The minimum count for an entry to be kept. Entries (strictly) less * than this threshold are discarded. * @return The set of discarded entries. */ public static <E1, E2> Set<Pair<E1, E2>> retainAbove( TwoDimensionalCounter<E1, E2> counter, double countThreshold) { Set<Pair<E1, E2>> removed = new HashSet<>(); for (Entry<E1, ClassicCounter<E2>> en : counter.entrySet()) { for (Entry<E2, Double> en2 : en.getValue().entrySet()) { if (counter.getCount(en.getKey(), en2.getKey()) < countThreshold) { removed.add(new Pair<>(en.getKey(), en2.getKey())); } } } for (Pair<E1, E2> key : removed) { counter.remove(key.first(), key.second()); } return removed; }
Counter<CandidatePhrase> maxPatWeightTerms = new ClassicCounter<>(); Map<CandidatePhrase, E> wordMaxPat = new HashMap<>(); for (Entry<CandidatePhrase, ClassicCounter<E>> en : terms.entrySet()) { Counter<E> weights = new ClassicCounter<>(); for (E k : en.getValue().keySet())
for (Entry<E, ClassicCounter<CandidatePhrase>> en : patternsandWords4Label.entrySet()) { posWords.addAll(en.getKey(), en.getValue().keySet()); for (Entry<E, ClassicCounter<CandidatePhrase>> en : negPatternsandWords4Label.entrySet()) { negWords.addAll(en.getKey(), en.getValue().keySet()); for (Entry<E, ClassicCounter<CandidatePhrase>> en : unLabeledPatternsandWords4Label.entrySet()) { unlabWords.addAll(en.getKey(), en.getValue().keySet());
for (Entry<CandidatePhrase, ClassicCounter<E>> en : terms.entrySet()) { double score = getPatTFIDFScore(en.getKey(), en.getValue(), allSelectedPatterns); tfidfScores.setCount(en.getKey(), score);
for (Map.Entry<E, ClassicCounter<CandidatePhrase>> en: positivePatternsAndWords.entrySet()) {
Counter<E> logpos_i = new ClassicCounter<>(); for (Entry<E, ClassicCounter<CandidatePhrase>> en : patternsandWords4Label .entrySet()) { logpos_i.setCount(en.getKey(), Math.log(en.getValue().size()));
void removeLearnedPatterns(String label, Collection<SurfacePattern> pats) { Counters.removeKeys(this.learnedPatterns.get(label), pats); if (wordsPatExtracted.containsKey(label)) for (Entry<String, ClassicCounter<SurfacePattern>> en : this.wordsPatExtracted.get(label).entrySet()) { Counters.removeKeys(en.getValue(), pats); } }
void removeLearnedPattern(String label, SurfacePattern p) { this.learnedPatterns.get(label).remove(p); if (wordsPatExtracted.containsKey(label)) for (Entry<String, ClassicCounter<SurfacePattern>> en : this.wordsPatExtracted.get(label).entrySet()) { en.getValue().remove(p); } }
private void removeLearnedPatterns(String label, Collection<E> pats) { Counters.removeKeys(this.learnedPatterns.get(label), pats); for(Map.Entry<Integer, Counter<E>> en: this.learnedPatternsEachIter.get(label).entrySet()) Counters.removeKeys(en.getValue(), pats); if (wordsPatExtracted.containsKey(label)) for (Entry<CandidatePhrase, ClassicCounter<E>> en : this.wordsPatExtracted.get(label).entrySet()) { Counters.removeKeys(en.getValue(), pats); } }
public static<A,B> void divideInPlace(TwoDimensionalCounter<A, B> counter, double divisor) { for(Entry<A, ClassicCounter<B>> c: counter.entrySet()){ Counters.divideInPlace(c.getValue(), divisor); } counter.recomputeTotal(); }
public static<A,B> void divideInPlace(TwoDimensionalCounter<A, B> counter, double divisor) { for(Entry<A, ClassicCounter<B>> c: counter.entrySet()){ Counters.divideInPlace(c.getValue(), divisor); } counter.recomputeTotal(); }
public static<A,B> void divideInPlace(TwoDimensionalCounter<A, B> counter, double divisor) { for(Entry<A, ClassicCounter<B>> c: counter.entrySet()){ Counters.divideInPlace(c.getValue(), divisor); } counter.recomputeTotal(); }
private Set<E> enforceMinSupportRequirements(TwoDimensionalCounter<E, CandidatePhrase> patternsandWords4Label, TwoDimensionalCounter<E, CandidatePhrase> unLabeledPatternsandWords4Label) { Set<E> remove = new HashSet<>(); for (Entry<E, ClassicCounter<CandidatePhrase>> en : patternsandWords4Label.entrySet()) { if (en.getValue().size() < constVars.minPosPhraseSupportForPat) { remove.add(en.getKey()); } } int numRemoved = remove.size(); Redwood.log(Redwood.DBG, "Removing " + numRemoved + " patterns that do not meet minPosPhraseSupportForPat requirement of >= " + constVars.minPosPhraseSupportForPat); for (Entry<E, ClassicCounter<CandidatePhrase>> en : unLabeledPatternsandWords4Label.entrySet()) { if (en.getValue().size() < constVars.minUnlabPhraseSupportForPat) { remove.add(en.getKey()); } } Redwood.log(Redwood.DBG, "Removing " + (remove.size() - numRemoved) + " patterns that do not meet minUnlabPhraseSupportForPat requirement of >= " + constVars.minUnlabPhraseSupportForPat); return remove; }
private Set<SurfacePattern> enforceMinSupportRequirements(TwoDimensionalCounter<SurfacePattern, String> patternsandWords4Label, TwoDimensionalCounter<SurfacePattern, String> unLabeledPatternsandWords4Label) { Set<SurfacePattern> remove = new HashSet<SurfacePattern>(); for (Entry<SurfacePattern, ClassicCounter<String>> en : patternsandWords4Label.entrySet()) { if (en.getValue().size() < constVars.minPosPhraseSupportForPat) { remove.add(en.getKey()); } } int numRemoved = remove.size(); Redwood.log(Redwood.DBG, "Removing " + numRemoved + " patterns that do not meet minPosPhraseSupportForPat requirement of >= " + constVars.minPosPhraseSupportForPat); for (Entry<SurfacePattern, ClassicCounter<String>> en : unLabeledPatternsandWords4Label.entrySet()) { if (en.getValue().size() < constVars.minUnlabPhraseSupportForPat) { remove.add(en.getKey()); } } Redwood.log(Redwood.DBG, "Removing " + (remove.size() - numRemoved) + " patterns that do not meet minUnlabPhraseSupportForPat requirement of >= " + constVars.minUnlabPhraseSupportForPat); return remove; }