protected static Datum<String,String> makeStopLights(String ns, String ew) { List<String> features = new ArrayList<>(); // Create the north-south light feature features.add("NS=" + ns); // Create the east-west light feature features.add("EW=" + ew); // Create the label String label = (ns.equals(ew) ? BROKEN : WORKING); return new BasicDatum<>(features, label); }
public String toAllWeightsString() { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); pw.println("Linear classifier with the following weights"); Datum<L, F> allFeatures = new BasicDatum<>(features(), (L) null); justificationOf(allFeatures, pw); return sw.toString(); }
public static Datum<String, String> svmLightLineToDatum(String l) { line1++; l = l.replaceAll("#.*", ""); // remove any trailing comments String[] line = l.split("\\s+"); Collection<String> features = new ArrayList<>(); for (int i = 1; i < line.length; i++) { String[] f = line[i].split(":"); if (f.length != 2) { logger.info("Dataset error: line " + line1); } int val = (int) Double.parseDouble(f[1]); for (int j = 0; j < val; j++) { features.add(f[0]); } } features.add(String.valueOf(Integer.MAX_VALUE)); // a constant feature for a class Datum<String, String> d = new BasicDatum<>(features, line[0]); return d; }
/** * Print all features in the classifier and the weight that they assign * to each class. The feature names are printed in sorted order. */ public void dumpSorted() { Datum<L, F> allFeatures = new BasicDatum<>(features(), (L) null); justificationOf(allFeatures, new PrintWriter(System.err, true), true); }
/** * Print all features in the classifier and the weight that they assign * to each class. Print to stderr. */ public void dump() { Datum<L, F> allFeatures = new BasicDatum<>(features(), (L) null); justificationOf(allFeatures); }
private void setSingleton(LogisticClassifier<String, String> predictor, Dictionaries dict){ double coreference_score = predictor.probabilityOf( new BasicDatum<>(getSingletonFeatures(dict), "1")); if(coreference_score < 0.2) this.isSingleton = true; }
private void setSingleton(LogisticClassifier<String, String> predictor, Dictionaries dict){ double coreference_score = predictor.probabilityOf( new BasicDatum<>(getSingletonFeatures(dict), "1")); if(coreference_score < 0.2) this.isSingleton = true; }
/** * @return the index-ed datum */ @Override public Datum<L, F> getDatum(int index) { return new BasicDatum<>(featureIndex.objects(data[index]), labelIndex.get(labels[index])); }
/** * Print all features in the classifier and the weight that they assign * to each class. Print to the given PrintWriter. */ public void dump(PrintWriter pw) { Datum<L, F> allFeatures = new BasicDatum<>(features(), (L) null); justificationOf(allFeatures, pw); }
/** * Takes a String[] of elements and translates them into a Datum of features. * If real-valued features are used, this method accesses makeRVFDatumFromLine * and returns an RVFDatum; otherwise, categorical features are used. * * @param strings The elements that features are made from (the columns of a TSV/CSV file) * @return A Datum (may be an RVFDatum; never null) */ public Datum<String,String> makeDatumFromStrings(String[] strings) { if (globalFlags.usesRealValues) { return makeRVFDatumFromStrings(strings); } if (globalFlags.featureFormat) { Collection<String> theFeatures = new ArrayList<>(); for (int i = 0; i < strings.length; i++) { if (i != globalFlags.goldAnswerColumn) { if (globalFlags.significantColumnId) { theFeatures.add(String.format("%d:%s", i, strings[i])); } else { theFeatures.add(strings[i]); } } } return new BasicDatum<>(theFeatures, strings[globalFlags.goldAnswerColumn]); } else { //logger.info("Read in " + strings); return makeDatum(strings); } }
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel) { // TODO: How to copy datum? L2 newLabel = labelMapping.get(d.label()); if (newLabel == null) { newLabel = defaultLabel; } if (d instanceof RVFDatum) { return new RVFDatum<>(((RVFDatum<L, F>) d).asFeaturesCounter(), newLabel); } else { return new BasicDatum<>(d.asFeatures(), newLabel); } }
/** Make an individual Datum out of the data list info, focused at position loc. * * @param info A List of IN objects * @param loc The position in the info list to focus feature creation on * @param featureFactories The factory that constructs features out of the item * @return A Datum (BasicDatum) representing this data instance */ public Datum<String, String> makeDatum(List<IN> info, int loc, List<FeatureFactory<IN>> featureFactories) { PaddedList<IN> pInfo = new PaddedList<>(info, pad); Collection<String> features = new ArrayList<>(); for (FeatureFactory<IN> featureFactory : featureFactories) { List<Clique> cliques = featureFactory.getCliques(); for (Clique c : cliques) { Collection<String> feats = featureFactory.getCliqueFeatures(pInfo, loc, c); feats = addOtherClasses(feats, pInfo, loc, c); features.addAll(feats); } } printFeatures(pInfo.get(loc), features); CoreLabel c = info.get(loc); return new BasicDatum<>(features, c.get(CoreAnnotations.AnswerAnnotation.class)); }
/** * Extracts all the features from a certain input datum. * * @param strs The data String[] to extract features from * @return The constructed Datum */ private Datum<String,String> makeDatum(String[] strs) { String goldAnswer = globalFlags.goldAnswerColumn < strs.length ? strs[globalFlags.goldAnswerColumn]: ""; List<String> theFeatures = new ArrayList<>(); Collection<String> globalFeatures = Generics.newHashSet(); if (globalFlags.useClassFeature) { globalFeatures.add("CLASS"); } addAllInterningAndPrefixing(theFeatures, globalFeatures, ""); for (int i = 0; i < flags.length; i++) { Collection<String> featuresC = Generics.newHashSet();//important that this is a hash set to prevent same feature from being added multiple times makeDatum(strs[i], flags[i], featuresC, goldAnswer); addAllInterningAndPrefixing(theFeatures, featuresC, i + "-"); } if (globalFlags.printFeatures != null) { printFeatures(strs, theFeatures); } //System.out.println("Features are: " + theFeatures); return new BasicDatum<>(theFeatures, goldAnswer); }
if(head == null) continue; ArrayList<String> feats = mention.getSingletonFeatures(dict); dataset.add(new BasicDatum<>(feats, "1")); dataset.add(new BasicDatum<>( predicted_men.getSingletonFeatures(dict), "0"));
if(head == null) continue; ArrayList<String> feats = mention.getSingletonFeatures(dict); dataset.add(new BasicDatum<>(feats, "1")); dataset.add(new BasicDatum<>( predicted_men.getSingletonFeatures(dict), "0"));
private void ensureProbs(int word, boolean subtractTagScore) { if (word == lastWord) { return; } lastWord = word; if (functionWordTags.containsKey(wordIndex.get(word))) { logProbs = new ClassicCounter<>(); String trueTag = functionWordTags.get(wordIndex.get(word)); for (String tag : tagIndex.objectsList()) { if (ctlp.basicCategory(tag).equals(trueTag)) { logProbs.setCount(tag, 0); } else { logProbs.setCount(tag, Double.NEGATIVE_INFINITY); } } return; } Datum datum = new BasicDatum(featExtractor.makeFeatures(wordIndex.get(word))); logProbs = scorer.logProbabilityOf(datum); if (subtractTagScore) { Set<String> tagSet = logProbs.keySet(); for (String tag : tagSet) { logProbs.incrementCount(tag, -Math.log(tagDist.probabilityOf(tag))); } } }
for (int i = testMin; i < testMax; i++) { score -= c.logProbabilityOf(new BasicDatum<>(featureIndex.objects(data[i]))).getCount(labelIndex.get(labels[i]));
while(iter.hasNext()){ RVFDatum<String, ScorePhraseMeasures> inst = iter.next(); newdataset.add(new BasicDatum<>(inst.asFeatures(), inst.label()));
else feat = this.getFeatures(label, word, patternsThatExtractedPat, allSelectedPatterns); BasicDatum<String, ScorePhraseMeasures> d = new BasicDatum<>(feat.keySet(), Boolean.FALSE.toString()); Counter<String> sc = ((MultinomialLogisticClassifier)classifier).probabilityOf(d); score = sc.getCount(Boolean.TRUE.toString());
count = 1; data.add(new BasicDatum(featExtractor.makeFeatures(word.word()), word.tag()), count);