edu.stanford.nlp.ling.Datum.asFeatures java code examples

/**
 * Returns whether the given Datum contains the same features as this Datum.
 * Doesn't check the labels, should we change this?
 */
@SuppressWarnings("unchecked")
@Override
public boolean equals(Object o) {
 if (!(o instanceof Datum)) {
  return (false);
 }
 Datum<LabelType, FeatureType> d = (Datum<LabelType, FeatureType>) o;
 return features.equals(d.asFeatures());
}

/**
 * Returns whether the given Datum contains the same features as this Datum.
 * Doesn't check the labels, should we change this?
 * (CDM Feb 2012: Also doesn't correctly respect the contract for equals,
 * since it gives one way equality with other Datum's.)
 *
 * @param o The object to test equality with
 * @return Whether it is equal to this CRFDatum in terms of features
 */
@Override
public boolean equals(Object o) {
 if (!(o instanceof Datum)) {
  return (false);
 }
 Datum<?, ?> d = (Datum<?, ?>) o;
 return features.equals(d.asFeatures());
}

@Override
public L classOf(Datum<L, F> datum) {
 if(datum instanceof RVFDatum<?,?>){
  return classOfRVFDatum((RVFDatum<L,F>) datum);
 }
 return classOf(datum.asFeatures());
}

@Override
public void add(Datum<L, F> d) {
 add(d.asFeatures(), d.label());
}

public void add(Datum<L, F> d, float weight) {
 add(d.asFeatures(), d.label(), weight);
}

public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel) {
 // TODO: How to copy datum?
 L2 newLabel = labelMapping.get(d.label());
 if (newLabel == null) {
  newLabel = defaultLabel;
 }
 if (d instanceof RVFDatum) {
  return new RVFDatum<>(((RVFDatum<L, F>) d).asFeaturesCounter(), newLabel);
 } else {
  return new BasicDatum<>(d.asFeatures(), newLabel);
 }
}

public double probabilityOf(Datum<L, F> example) {
 if (example instanceof RVFDatum<?,?>) {
  return probabilityOfRVFDatum((RVFDatum<L,F>)example);
 }
 return probabilityOf(example.asFeatures(), example.label());
}

Datum<L,F> datum = labeledDataset.getDatum(i);
int labelID = labeledDataset.labelIndex.indexOf(datum.label());
for(F feature : datum.asFeatures()){
 if(geFeatureMap.containsKey(feature)){
  int geFnum = geFeatureMap.get(feature);
for (F feature : datum.asFeatures()) {
 if (geFeatureMap.containsKey(feature)) {
  int geFnum = geFeatureMap.get(feature);

/** Returns of the score of the Datum for the specified label.
 *  Ignores the true label of the Datum.
 */
public double scoreOf(Datum<L, F> example, L label) {
 if (example instanceof RVFDatum<?, ?>) {
  return scoreOfRVFDatum((RVFDatum<L,F>)example, label);
 }
 int iLabel = labelIndex.indexOf(label);
 double score = 0.0;
 for (F f : example.asFeatures()) {
  score += weight(f, iLabel);
 }
 return score + thresholds[iLabel];
}

/**
 * Constructs a new RVFDatum taking the data from a Datum. <i>Implementation
 * note:</i> This constructor allocates its own counter over features, but is
 * only guaranteed correct if the label and feature names are immutable.
 *
 * @param m The Datum to copy.
 */
public RVFDatum(Datum<L, F> m) {
 this.features = new ClassicCounter<>();
 for (F key : m.asFeatures()) {
  features.incrementCount(key, 1.0);
 }
 setLabel(m.label());
}

@Override
public Counter<L> probabilityOf(Datum<L, F> example) {
 // calculate the feature indices and feature values
 int[] featureIndices = LogisticUtils.indicesOf(example.asFeatures(), featureIndex);
 double[] featureValues;
 if (example instanceof RVFDatum<?, ?>) {
  Collection<Double> featureValuesCollection =
    ((RVFDatum<?, ?>) example).asFeaturesCounter().values();
  featureValues = LogisticUtils.convertToArray(featureValuesCollection);
 } else {
  featureValues = new double[example.asFeatures().size()];
  Arrays.fill(featureValues, 1.0);
 }
 // calculate probability of each class
 Counter<L> result = new ClassicCounter<>();
 int numClasses = labelIndex.size();
 double[] sigmoids = LogisticUtils.calculateSigmoids(weights, featureIndices, featureValues);
 for (int c = 0; c < numClasses; c++) {
  L label = labelIndex.get(c);
  result.incrementCount(label, sigmoids[c]);
 }
 return result;
}

/**
 * returns the scores for both the classes
 */
@Override
public Counter<L> scoresOf(Datum<L, F> datum) {
 if(datum instanceof RVFDatum<?,?>)return scoresOfRVFDatum((RVFDatum<L,F>)datum);
 Collection<F> features = datum.asFeatures();
 double sum = scoreOf(features);
 Counter<L> c = new ClassicCounter<>();
 c.setCount(classes[0], -sum);
 c.setCount(classes[1], sum);
 return c;
}

/** Construct a counter with keys the labels of the classifier and
 *  values the score (unnormalized log probability) of each class.
 */
@Override
public Counter<L> scoresOf(Datum<L, F> example) {
 if(example instanceof RVFDatum<?, ?>)return scoresOfRVFDatum((RVFDatum<L,F>)example);
 Collection<F> feats = example.asFeatures();
 int[] features = new int[feats.size()];
 int i = 0;
 for (F f : feats) {
  int index = featureIndex.indexOf(f);
  if (index >= 0) {
   features[i++] = index;
  // } else {
   //logger.info("FEATURE LESS THAN ZERO: " + f);
  }
 }
 int[] activeFeatures = new int[i];
 synchronized (System.class) {
  System.arraycopy(features, 0, activeFeatures, 0, i);
 }
 Counter<L> scores = new ClassicCounter<>();
 for (L lab : labels()) {
  scores.setCount(lab, scoreOf(activeFeatures, lab));
 }
 return scores;
}

Datum<String, String> d = makeDatum(doc, i, featureFactories);
Collection<String> newFeats = new ArrayList<>();
for (String f : d.asFeatures()) {
 if ( ! origFeatIndex.contains(f)) {
  newFeats.add(f);

private void updateDerivative(Datum<L,F> datum, double[] probs,Counter<Triple<Integer,Integer,Integer>> feature2classPairDerivatives){
 for (F feature : datum.asFeatures()) {
  int fID = labeledDataset.featureIndex.indexOf(feature);
  if (fID >= 0) {
   for (int c = 0; c < numClasses; c++) {
    for (int cPrime = 0; cPrime < numClasses; cPrime++) {
     if (cPrime == c) {
      feature2classPairDerivatives.incrementCount(new Triple<>(fID, c, cPrime), - probs[c]*(1-probs[c])*valueOfFeature(feature,datum));
     } else {
      feature2classPairDerivatives.incrementCount(new Triple<>(fID, c, cPrime), probs[c]*probs[cPrime]*valueOfFeature(feature,datum));
     }
    }
   }
  }
 }
}

@Override
// If you edit me, also take care of WeightedRVFDataset
public void add(Datum<L, F> d) {
 if (d instanceof RVFDatum<?, ?>) {
  addLabel(d.label());
  addFeatures(((RVFDatum<L, F>) d).asFeaturesCounter());
  size++;
 } else {
  addLabel(d.label());
  addFeatures(Counters.asCounter(d.asFeatures()));
  size++;
 }
}

 Collection<F> features = datum.asFeatures();
 for (F feature : features) {
  int i = indexOf(featureIndex.indexOf(feature), labelIndex.indexOf(datum.label()));
Collection<F> features = datum.asFeatures();
for (F feature : features) {
 for (int c = 0; c < numClasses; c++) {

public void add(Datum<L, F> d, String src, String id) {
 if (d instanceof RVFDatum<?, ?>) {
  addLabel(d.label());
  addFeatures(((RVFDatum<L, F>) d).asFeaturesCounter());
  addSourceAndId(src, id);
  size++;
 } else {
  addLabel(d.label());
  addFeatures(Counters.asCounter(d.asFeatures()));
  addSourceAndId(src, id);
  size++;
 }
}

public Classifier<L, F> trainClassifier(Iterable<Datum<L, F>> dataIterable) {
 Minimizer<DiffFunction> minimizer = getMinimizer();
 Index<F> featureIndex = Generics.newIndex();
 Index<L> labelIndex = Generics.newIndex();
 for (Datum<L, F> d : dataIterable) {
  labelIndex.add(d.label());
  featureIndex.addAll(d.asFeatures());//If there are duplicates, it doesn't add them again.
 }
 logger.info(String.format("Training linear classifier with %d features and %d labels", featureIndex.size(), labelIndex.size()));
 LogConditionalObjectiveFunction<L, F> objective = new LogConditionalObjectiveFunction<>(dataIterable, logPrior, featureIndex, labelIndex);
 // [cdm 2014] Commented out next line. Why not use the logPrior set up previously and used at creation???
 // objective.setPrior(new LogPrior(LogPrior.LogPriorType.QUADRATIC));
 double[] initial = objective.initial();
 double[] weights = minimizer.minimize(objective, TOL, initial);
 LinearClassifier<L, F> classifier = new LinearClassifier<>(objective.to2D(weights), featureIndex, labelIndex);
 return classifier;
}

/**
 * Method to convert features from counts to L1-normalized TFIDF based features
 * @param datum with a collection of features.
 * @param featureDocCounts a counter of doc-count for each feature.
 * @return RVFDatum with l1-normalized tf-idf features.
 */
public RVFDatum<L,F> getL1NormalizedTFIDFDatum(Datum<L,F> datum,Counter<F> featureDocCounts){
  Counter<F> tfidfFeatures = new ClassicCounter<>();
  for(F feature : datum.asFeatures()){
   if(featureDocCounts.containsKey(feature))
    tfidfFeatures.incrementCount(feature,1.0);
  }
  double l1norm = 0;
  for(F feature: tfidfFeatures.keySet()){
   double idf = Math.log(((double)(this.size()+1))/(featureDocCounts.getCount(feature)+0.5));
   double tf = tfidfFeatures.getCount(feature);
   tfidfFeatures.setCount(feature, tf*idf);
   l1norm += tf*idf;
  }
  for(F feature: tfidfFeatures.keySet()){
   double tfidf = tfidfFeatures.getCount(feature);
   tfidfFeatures.setCount(feature, tfidf/l1norm);
  }
  RVFDatum<L,F> rvfDatum = new RVFDatum<>(tfidfFeatures, datum.label());
  return rvfDatum;
}

Popular methods of Datum

label

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
setRequestProperty (URLConnection)
requestLocationUpdates (LocationManager)
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Top plugins for WebStorm

How to use asFeaturesmethodin edu.stanford.nlp.ling.Datum

Best Java code snippets using edu.stanford.nlp.ling.Datum.asFeatures (Showing top 20 results out of 315)

How to use
asFeatures
method
in
edu.stanford.nlp.ling.Datum