public List<String> annotateMulticlass(List<Datum<String, String>> testDatums) { List<String> predictedLabels = new ArrayList<>(); for (Datum<String, String> testDatum: testDatums) { String label = classOf(testDatum, null); Counter<String> probs = probabilityOf(testDatum); double prob = probs.getCount(label); StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); if (logger.isLoggable(Level.FINE)) { justificationOf(testDatum, pw, label); } logger.fine("JUSTIFICATION for label GOLD:" + testDatum.label() + " SYS:" + label + " (prob:" + prob + "):\n" + sw.toString() + "\nJustification done."); predictedLabels.add(label); if(! testDatum.label().equals(label)){ logger.info("Classification: found different type " + label + " for relation: " + testDatum); } else{ logger.info("Classification: found similar type " + label + " for relation: " + testDatum); } } return predictedLabels; }
@Override public void add(Datum<L, F> d) { add(d.asFeatures(), d.label()); }
public void add(Datum<L, F> d, float weight) { add(d.asFeatures(), d.label(), weight); }
public <L,F> PrecisionRecallStats(Classifier<L,F> classifier,Dataset<L,F> data,L positiveClass) { for (int i=0; i < data.size(); ++i) { Datum<L,F> d = data.getDatum(i); L guess = classifier.classOf(d); L label = d.label(); boolean guessPositive = guess.equals(positiveClass); boolean isPositive = label.equals(positiveClass); if (isPositive && guessPositive) tpCount++; if (isPositive && !guessPositive) fnCount++; if (!isPositive && guessPositive) fpCount++; } }
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel) { // TODO: How to copy datum? L2 newLabel = labelMapping.get(d.label()); if (newLabel == null) { newLabel = defaultLabel; } if (d instanceof RVFDatum) { return new RVFDatum<>(((RVFDatum<L, F>) d).asFeaturesCounter(), newLabel); } else { return new BasicDatum<>(d.asFeatures(), newLabel); } }
public double probabilityOf(Datum<L, F> example) { if (example instanceof RVFDatum<?,?>) { return probabilityOfRVFDatum((RVFDatum<L,F>)example); } return probabilityOf(example.asFeatures(), example.label()); }
int labelID = labeledDataset.labelIndex.indexOf(datum.label()); for(F feature : datum.asFeatures()){ if(geFeatureMap.containsKey(feature)){
@Override // If you edit me, also take care of WeightedRVFDataset public void add(Datum<L, F> d) { if (d instanceof RVFDatum<?, ?>) { addLabel(d.label()); addFeatures(((RVFDatum<L, F>) d).asFeaturesCounter()); size++; } else { addLabel(d.label()); addFeatures(Counters.asCounter(d.asFeatures())); size++; } }
/** * Constructs a new RVFDatum taking the data from a Datum. <i>Implementation * note:</i> This constructor allocates its own counter over features, but is * only guaranteed correct if the label and feature names are immutable. * * @param m The Datum to copy. */ public RVFDatum(Datum<L, F> m) { this.features = new ClassicCounter<>(); for (F key : m.asFeatures()) { features.incrementCount(key, 1.0); } setLabel(m.label()); }
public void add(Datum<L, F> d, String src, String id) { if (d instanceof RVFDatum<?, ?>) { addLabel(d.label()); addFeatures(((RVFDatum<L, F>) d).asFeaturesCounter()); addSourceAndId(src, id); size++; } else { addLabel(d.label()); addFeatures(Counters.asCounter(d.asFeatures())); addSourceAndId(src, id); size++; } }
Counter<L> scores = classifier.logProbabilityOf(d); L guess = Counters.argmax(scores); L correctLab = d.label(); double guessScore = scores.getCount(guess); double correctScore = scores.getCount(correctLab);
public <F> double score(ProbabilisticClassifier<L,F> classifier, GeneralDataset<L,F> data) { ArrayList<Pair<Double, Integer>> dataScores = new ArrayList<>(); for (int i = 0; i < data.size(); i++) { Datum<L,F> d = data.getRVFDatum(i); Counter<L> scores = classifier.logProbabilityOf(d); int labelD = d.label().equals(posLabel) ? 1 : 0; dataScores.add(new Pair<>(Math.exp(scores.getCount(posLabel)), labelD)); } PRCurve prc = new PRCurve(dataScores); confWeightedAccuracy = prc.cwa(); accuracy = prc.accuracy(); optAccuracy = prc.optimalAccuracy(); optConfWeightedAccuracy = prc.optimalCwa(); logLikelihood = prc.logLikelihood(); accrecall = prc.cwaArray(); optaccrecall = prc.optimalCwaArray(); return accuracy; }
/** * Returns the log conditional likelihood of the given dataset. * * @return The log conditional likelihood of the given dataset. */ public double loglikelihood(List<IN> lineInfos) { double cll = 0.0; for (int i = 0; i < lineInfos.size(); i++) { Datum<String, String> d = makeDatum(lineInfos, i, featureFactories); Counter<String> c = classifier.logProbabilityOf(d); double total = Double.NEGATIVE_INFINITY; for (String s : c.keySet()) { total = SloppyMath.logAdd(total, c.getCount(s)); } cll -= c.getCount(d.label()) - total; } // quadratic prior // HN: TODO: add other priors if (classifier instanceof LinearClassifier) { double sigmaSq = flags.sigma * flags.sigma; LinearClassifier<String, String> lc = (LinearClassifier<String, String>)classifier; for (String feature: lc.features()) { for (String classLabel: classIndex) { double w = lc.weight(feature, classLabel); cll += w * w / 2.0 / sigmaSq; } } } return cll; }
Collection<F> features = datum.asFeatures(); for (F feature : features) { int i = indexOf(featureIndex.indexOf(feature), labelIndex.indexOf(datum.label())); if (dataWeights == null) { derivativeNumerator[i] -= 1; int label = this.labelIndex.indexOf(datum.label()); double dV = sums[label] - total; value -= dV;
public Classifier<L, F> trainClassifier(Iterable<Datum<L, F>> dataIterable) { Minimizer<DiffFunction> minimizer = getMinimizer(); Index<F> featureIndex = Generics.newIndex(); Index<L> labelIndex = Generics.newIndex(); for (Datum<L, F> d : dataIterable) { labelIndex.add(d.label()); featureIndex.addAll(d.asFeatures());//If there are duplicates, it doesn't add them again. } logger.info(String.format("Training linear classifier with %d features and %d labels", featureIndex.size(), labelIndex.size())); LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<>(dataIterable, logPrior, featureIndex, labelIndex); // [cdm 2014] Commented out next line. Why not use the logPrior set up previously and used at creation??? // objective.setPrior(new LogPrior(LogPrior.LogPriorType.QUADRATIC)); double[] initial = objective.initial(); double[] weights = minimizer.minimize(objective, TOL, initial); LinearClassifier<L, F> classifier = new LinearClassifier<>(objective.to2D(weights), featureIndex, labelIndex); return classifier; }
for (Datum<String, String> datum : dataset) { String guess = classifier.classOf(datum); trainAccuracy.predict(Collections.singleton(guess), Collections.singleton(datum.label()));
/** * Method to convert features from counts to L1-normalized TFIDF based features * @param datum with a collection of features. * @param featureDocCounts a counter of doc-count for each feature. * @return RVFDatum with l1-normalized tf-idf features. */ public RVFDatum<L,F> getL1NormalizedTFIDFDatum(Datum<L,F> datum,Counter<F> featureDocCounts){ Counter<F> tfidfFeatures = new ClassicCounter<>(); for(F feature : datum.asFeatures()){ if(featureDocCounts.containsKey(feature)) tfidfFeatures.incrementCount(feature,1.0); } double l1norm = 0; for(F feature: tfidfFeatures.keySet()){ double idf = Math.log(((double)(this.size()+1))/(featureDocCounts.getCount(feature)+0.5)); double tf = tfidfFeatures.getCount(feature); tfidfFeatures.setCount(feature, tf*idf); l1norm += tf*idf; } for(F feature: tfidfFeatures.keySet()){ double tfidf = tfidfFeatures.getCount(feature); tfidfFeatures.setCount(feature, tfidf/l1norm); } RVFDatum<L,F> rvfDatum = new RVFDatum<>(tfidfFeatures, datum.label()); return rvfDatum; }
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel) { // TODO: How to copy datum? L2 newLabel = labelMapping.get(d.label()); if (newLabel == null) { newLabel = defaultLabel; } if (d instanceof RVFDatum) { return new RVFDatum<>(((RVFDatum<L, F>) d).asFeaturesCounter(), newLabel); } else { return new BasicDatum<>(d.asFeatures(), newLabel); } }
public double probabilityOf(Datum<L, F> example) { if (example instanceof RVFDatum<?,?>) { return probabilityOfRVFDatum((RVFDatum<L,F>)example); } return probabilityOf(example.asFeatures(), example.label()); }
public double probabilityOf(Datum<L, F> example) { if (example instanceof RVFDatum<?,?>) { return probabilityOfRVFDatum((RVFDatum<L,F>)example); } return probabilityOf(example.asFeatures(), example.label()); }