public void testSetRankOrder () { Alphabet v = new Alphabet (); RankedFeatureVector rfv = new RankedFeatureVector (v, new int[] {v.lookupIndex ("a"), v.lookupIndex ("b"), v.lookupIndex ("c"), v.lookupIndex ("d") }, new double[] {3.0, 1.0, 2.0, 6.0}); System.out.println ("vector size ="+rfv.numLocations()); for (int i = 0; i < rfv.numLocations(); i++) System.out.println ("Rank="+i+" value="+rfv.getValueAtRank(i)); }
public static void writeRankedToFile(RankedFeatureVector rfv, File outputFile) { try { try (PrintWriter writer = new PrintWriter(Files.newWriter(outputFile, Charsets.UTF_8))) { for (int i = 0; i < rfv.singleSize(); i++) { Object objectAtRank = rfv.getObjectAtRank(i); double gradAtRank = rfv.getValueAtRank(i); writer.println(String.format("%s,%.5f", objectAtRank.toString(), gradAtRank)); } } } catch (IOException e) { throw Throwables.propagate(e); } } }
public FeatureSelection (RankedFeatureVector rsv, int numFeatures) { this.dictionary = rsv.getAlphabet(); this.selectedFeatures = new BitSet (dictionary.size()); int numSelections = Math.min (numFeatures, dictionary.size()); for (int i = 0; i < numSelections; i++) selectedFeatures.set (rsv.getIndexAtRank(i)); }
public int getMaxValuedIndex () { if (rankOrder == null) { setRankOrder (0); } return getIndexAtRank(0); // was return rankOrder[0]; }
public void printLowerK (final PrintWriter out, final int num) { int length = numLocations(); assert(num < length); for (int rank = length-num ; rank < length; rank++) { int idx = getIndexAtRank (rank); double val = getValueAtRank (rank); Object obj = dictionary.lookupObject (idx); out.print (obj+":"+val + " "); } }
public double getMaxValueIn (final FeatureSelection fs) { if (fs == null) { return getMaxValue(); } int i = 0; while (!fs.contains(i)) { setRankOrder (i); i++; } return values[rankOrder[i]]; }
RankedFeatureVector rfv = new RankedFeatureVector (alphabet, probs); for (int ri = 0; ri < numWords; ri++) { int fi = rfv.getIndexAtRank(ri); pout.println (" <term weight=\""+probs[fi]+"\" count=\""+this.getCountFeatureTopic(fi,ti)+"\">"+alphabet.lookupObject(fi)+ "</term>"); if (ri < 20) // consider top 20 individual words as candidate titles double countssum = MatrixOps.sum (counts); Alphabet alph = new Alphabet(keys); rfv = new RankedFeatureVector (alph, counts); int max = rfv.numLocations() < numWords ? rfv.numLocations() : numWords; int fi = rfv.getIndexAtRank(ri); pout.println (" <phrase weight=\""+counts[fi]/countssum+"\" count=\""+values[fi]+"\">"+alph.lookupObject(fi)+ "</phrase>"); rfv = new RankedFeatureVector (titles.getAlphabet(), titles); int numTitles = 10; for (int ri = 0; ri < numTitles && ri < rfv.numLocations(); ri++) { if (titlesStringBuffer.indexOf(rfv.getObjectAtRank(ri).toString()) == -1) { titlesStringBuffer.append (rfv.getObjectAtRank(ri)); if (ri < numTitles-1) titlesStringBuffer.append (", ");
RankedFeatureVector gg = ranker.newRankedFeatureVector (ilist); logger.info ("Rank values before this round of conjunction-building"); int n = Math.min (200, gg.numLocations()); for (int i = 0; i < n; i++) logger.info ("Rank="+i+' '+Double.toString(gg.getValueAtRank(i)) + ' ' + gg.getObjectAtRank(i).toString()); int maxBeam = Math.max (beam1, beam2); logger.info ("Using minBeam="+minBeam+" maxBeam="+maxBeam); int max = maxBeam < gg.numLocations() ? maxBeam : gg.numLocations(); for (int b = 0; b < max; b++) { if (gg.getValueAtRank(b) == 0) break; int index = gg.getIndexAtRank(b); fsMax.add (index); if (b < minBeam) for (int i = 0; i < 200 && i < gg2.numLocations(); i++) logger.info ("Conjunction Rank="+i+' '+Double.toString(gg2.getValueAtRank(i)) + ' ' + gg2.getObjectAtRank(i).toString()); int origVSize = origV.size(); nextfeatures: for (int i = 0; i < gg2.numLocations(); i++) { double gain = gg2.getValueAtRank (i); if (gain < minGain) { if (gg2.getIndexAtRank(i) >= origVSize) { String s = (String) gg2.getObjectAtRank(i);
public void selectFeaturesForPerLabel (InstanceList ilist) { RankedFeatureVector[] rankings = perLabelRanker.newRankedFeatureVectors (ilist); int numClasses = rankings.length; FeatureSelection[] fs = new FeatureSelection[numClasses]; for (int i = 0; i < numClasses; i++) { fs[i] = new FeatureSelection (ilist.getDataAlphabet()); RankedFeatureVector ranking = rankings[i]; int nf = Math.min (numFeatures, ranking.singleSize()); if (nf >= 0) { for (int j = 0; j < nf; j++) fs[i].add (ranking.getIndexAtRank(j)); } else { for (int j = 0; j < ranking.singleSize(); j++) { if (ranking.getValueAtRank(j) > minThreshold) fs[i].add (ranking.getIndexAtRank(j)); else break; } } } ilist.setFeatureSelection (null); ilist.setPerLabelFeatureSelection (fs); }
public static TableFactor retainMass (DiscreteFactor ptl, double alpha) { int[] idxs = new int [ptl.numLocations ()]; double[] vals = new double [ptl.numLocations ()]; for (int i = 0; i < idxs.length; i++) { idxs[i] = ptl.indexAtLocation (i); vals[i] = ptl.logValue (i); } RankedFeatureVector rfv = new RankedFeatureVector (new Alphabet(), idxs, vals); TIntArrayList idxList = new TIntArrayList (); TDoubleArrayList valList = new TDoubleArrayList (); double mass = Double.NEGATIVE_INFINITY; double logAlpha = Math.log (alpha); for (int rank = 0; rank < rfv.numLocations (); rank++) { int idx = rfv.getIndexAtRank (rank); double val = rfv.value (idx); mass = Maths.sumLogProb (mass, val); idxList.add (idx); valList.add (val); if (mass > logAlpha) { break; } } int[] szs = computeSizes (ptl); SparseMatrixn m = new SparseMatrixn (szs, idxList.toNativeArray (), valList.toNativeArray ()); TableFactor result = new TableFactor (computeVars (ptl)); result.setValues (m); return result; }
if (transitionWeights.numLocations () == 0) continue; RankedFeatureVector rfv = new RankedFeatureVector (inputAlphabet, transitionWeights); for (int m = 0; m < rfv.numLocations (); m++) { double v = rfv.getValueAtRank (m); int index = rfv.getIndexAtRank (m); Object feature = inputAlphabet.lookupObject (index); if (v != 0) {
public void printWords (int numToPrint) { Alphabet alphabet = instancePipe.getDataAlphabet(); int numFeatures = alphabet.size(); int numLabels = instancePipe.getTargetAlphabet().size(); double[] probs = new double[numFeatures]; numToPrint = Math.min(numToPrint, numFeatures); for (int li = 0; li < numLabels; li++) { Arrays.fill (probs, 0.0); p[li].addProbabilities(probs); RankedFeatureVector rfv = new RankedFeatureVector (alphabet, probs); System.out.println ("\nFeature probabilities "+instancePipe.getTargetAlphabet().lookupObject(li)); for (int i = 0; i < numToPrint; i++) System.out.println (rfv.getObjectAtRank(i)+" "+rfv.getValueAtRank(i)); } }
public void printExtremeFeatures (PrintWriter out,int num) { final Alphabet dict = getAlphabet(); final LabelAlphabet labelDict = getLabelAlphabet(); int numFeatures = dict.size() + 1; int numLabels = labelDict.size(); // Include the feature weights according to each label RankedFeatureVector rfv; double[] weights = new double[numFeatures-1]; // do not deal with the default feature for (int li = 0; li < numLabels; li++) { out.print ("FEATURES FOR CLASS "+labelDict.lookupObject (li) + " "); for (int i = 0; i < defaultFeatureIndex; i++) { Object name = dict.lookupObject (i); double weight = parameters [li*numFeatures + i]; weights[i] = weight; } rfv = new RankedFeatureVector(dict,weights); rfv.printTopK(out,num); out.print (" <default> "+parameters [li*numFeatures + defaultFeatureIndex] + " "); rfv.printLowerK(out, num); out.println(); } }
public int getMaxValuedIndexIn (final FeatureSelection fs) { if (fs == null) { return getMaxValuedIndex(); } assert (fs.getAlphabet() == dictionary); // xxx Make this more efficient! I'm pretty sure that Java BitSet's can do this more efficiently int i = 0; while (!fs.contains(rankOrder[i])) { setRankOrder (i); i++; } //System.out.println ("RankedFeatureVector.getMaxValuedIndexIn feature=" //+dictionary.lookupObject(rankOrder[i])); return getIndexAtRank(i); // was return rankOrder[i] }
public void printRank (PrintWriter out) { final Alphabet dict = getAlphabet(); final LabelAlphabet labelDict = getLabelAlphabet(); int numFeatures = dict.size() + 1; int numLabels = labelDict.size(); // Include the feature weights according to each label RankedFeatureVector rfv; double[] weights = new double[numFeatures-1]; // do not deal with the default feature for (int li = 0; li < numLabels; li++) { out.print ("FEATURES FOR CLASS "+labelDict.lookupObject (li) + " "); for (int i = 0; i < defaultFeatureIndex; i++) { double weight = parameters [li*numFeatures + i]; weights[i] = weight; } rfv = new RankedFeatureVector(dict,weights); rfv.printByRank(out); out.println (" <default> "+parameters [li*numFeatures + defaultFeatureIndex] + " "); } }
RankedFeatureVector rfv = new RankedFeatureVector (crf.inputAlphabet, input.getIndices (), absVals); for (int rank = 0; rank < absVals.length; rank++) { int fidx = rfv.getIndexAtRank (rank); Object fname = crf.inputAlphabet.lookupObject (input.indexAtLocation (fidx)); if (absVals[fidx] < cutoff) break; // Break looping over features
private static RankedFeatureVector makeRatioVector(InstanceList ilist, int numFeatures, double[] gradientgains, double[] gradientlosses) { double[] ratios = new double[numFeatures]; for (int i = 0; i < numFeatures; i++) { double pos = gradientgains[i]; double neg = gradientlosses[i]; ratios[i] = (pos + 1.0) / (neg + 1.0); } return new RankedFeatureVector(ilist.getDataAlphabet(), ratios); }
for (int i = 0; i < rfv.singleSize(); i++) { double ratio = rfv.value(i); if (ratio > opts.trimFeaturesByGradientGain) { newDict.lookupIndex(alpha.lookupObject(i), true);
public void testBiNormalSeparation() { BinaryTestData binaryTestData = new BinaryTestData(4); binaryTestData.addInstance(new int[] {0, 1}, true); binaryTestData.addInstance(new int[] {0, 2}, true); binaryTestData.addInstance(new int[] {2, 3}, false); binaryTestData.addInstance(new int[] {3}, false); InstanceList iList = binaryTestData.getInstanceList(); RankedFeatureVector rankedFeatureVector = new BiNormalSeparation.Factory() .newRankedFeatureVector(iList); assertEquals(6.58, rankedFeatureVector.getValueAtRank(0), 0.005); assertEquals(3.29, rankedFeatureVector.getValueAtRank(2), 0.005); assertEquals(0, rankedFeatureVector.getValueAtRank(3), 0); assertEquals(6.58, rankedFeatureVector.getValueAtRank(1), 0.005); assertEquals(2, rankedFeatureVector.getIndexAtRank(3)); assertEquals(1, rankedFeatureVector.getIndexAtRank(2)); } }
public Object getMaxValuedObject () { return dictionary.lookupObject (getMaxValuedIndex()); }