/** Calculates the confidence in the tagging of a {@link Segment}. @return 0-1 confidence value. higher = more confident. */ public double estimateConfidenceFor (Segment segment, SumLatticeDefault cachedLattice) { Sequence predSequence = segment.getPredicted (); Sequence input = segment.getInput (); SumLatticeDefault lattice = (cachedLattice==null) ? new SumLatticeDefault (model, input) : cachedLattice; double confidence = 1; for (int i=segment.getStart(); i <= segment.getEnd(); i++) confidence *= lattice.getGammaProbability (i+1, model.getState (stateIndexOfString ((String)predSequence.get (i)))); return confidence; }
public boolean equals (Object o) { Segment s = (Segment) o; if (start == s.getStart() && end == s.getEnd() && correct == s.correct() && input.size() == s.getInput().size()) { for (int i=start; i <= end; i++) { if (!pred.get( i ).equals( s.getPredicted( i ) ) || !truth.get( i ).equals( s.getTruth( i ) ) ) return false; } return true; } return false; } }
public ConfidenceEvaluator (Segment[] segments, boolean sorted) { this.confidences = new Vector (); for (int i=0; i < segments.length; i++) { confidences.add (new EntityConfidence (segments[i].getConfidence(), segments[i].correct(), segments[i].getInput(), segments[i].getStart(), segments[i].getEnd())); } if (!sorted) Collections.sort (confidences, new ConfidenceComparator()); this.nBins = DEFAULT_NUM_BINS; this.numCorrect = getNumCorrectEntities (); }
logger.fine ("Ordered Segments:\n"); for (int j=0; j < orderedSegments.length; j++) { logger.fine (orderedSegments[j].toString()); logger.fine ("Ordered Segments:\n"); for (int j=0; j < orderedSegments.length; j++) { logger.fine (orderedSegments[j].toString()); if (findIncorrect) { for (int j=0; j < orderedSegments.length; j++) { if (!orderedSegments[j].correct()) { leastConfidentSegment = orderedSegments[j]; break; if (findIncorrect && leastConfidentSegment.correct()) { logger.warning ("cannot find incorrect segment, probably because error is in background state\n"); this.leastConfidentSegments.add (null); if (leastConfidentSegment.indexInSegment (j)) { sequence[j] = (String)truth.get (j); numCorrectedTokens++; if (leastConfidentSegment.endsPrematurely ()) { sequence[leastConfidentSegment.getEnd()+1] = (String)truth.get (leastConfidentSegment.getEnd()+1); numCorrectedTokens++; orderedSegments[0].getInput (), segmentCorrectedOutput).bestOutputSequence();
for (int c = 0; c < constraints.length; c++) constraints[c] = 0; for (int i=requiredSegment.getStart (); i <= requiredSegment.getEnd(); i++) { int si = t.stateIndexOfString ((String)constrainedSequence.get (i)); if (si == -1) if (requiredSegment.getEnd() + 2 < constraints.length) { // if String endTag = requiredSegment.getInTag().toString(); int statei = t.stateIndexOfString (endTag); if (statei == -1) throw new IllegalArgumentException ("Could not find state " + endTag + ". Check that state labels match startTags and InTags."); constraints[requiredSegment.getEnd() + 2] = - (statei + 1); logger.fine ("Segment:\n" + requiredSegment.sequenceToString () + "\nconstrainedSequence:\n" + constrainedSequence + "\nConstraints:\n");
orderedSegments = confidenceEstimator.rankSegmentsByConfidence (instance, startTags, continueTags); Segment leastConfidentSegment = orderedSegments[0]; logger.fine ("Ordered Segments:\nTrue sequence: " + leastConfidentSegment.getTruth()); for (int j=0; j < orderedSegments.length; j++) { logger.fine (orderedSegments[j].toString()); Sequence truth = leastConfidentSegment.getTruth(); Sequence predicted = leastConfidentSegment.getPredicted(); int numIncorrect = eval.numIncorrectSegments (truth, predicted); String[] sequence = new String[truth.size()]; for (int j=0; j < truth.size(); j++) { if (j <= leastConfidentSegment.getEnd() && j >= leastConfidentSegment.getStart()) sequence[j] = (String)truth.get (j); else sequence[j] = (String) predicted.get (j);
/** Calculates the confidence in the tagging of a {@link Segment}. @return 0-1 confidence value. higher = more confident. */ public double estimateConfidenceFor (Segment segment, SumLatticeDefault cachedLattice) { Sequence predSequence = segment.getPredicted (); Sequence input = segment.getInput (); SumLatticeDefault lattice = (cachedLattice == null) ? new SumLatticeDefault (model, input) : cachedLattice; // constrained lattice SumLatticeDefault constrainedLattice = new SumLatticeConstrained (model, input, null, segment, predSequence); double latticeWeight = lattice.getTotalWeight (); double constrainedLatticeWeight = constrainedLattice.getTotalWeight (); double confidence = Math.exp (latticeWeight - constrainedLatticeWeight); //System.err.println ("confidence: " + confidence); return confidence; }
/** Ranks all {@link Segment}s in this {@link InstanceList} by confidence estimate. @param ilist list of segmentation instances @param startTags represent the labels for the start states (B-) of all segments @param continueTags represent the labels for the continue state (I-) of all segments @return array of {@link Segment}s ordered by non-decreasing confidence scores, as calculated by <code>estimateConfidenceFor</code> */ public Segment[] rankSegmentsByConfidence (InstanceList ilist, Object[] startTags, Object[] continueTags) { ArrayList segmentList = new ArrayList (); SegmentIterator iter = new SegmentIterator (this.model, ilist, startTags, continueTags); if (this.segmentConfidences == null) segmentConfidences = new java.util.Vector (); while (iter.hasNext ()) { Segment segment = (Segment) iter.nextSegment (); double confidence = estimateConfidenceFor (segment); segment.setConfidence (confidence); logger.fine ("confidence=" + segment.getConfidence() + " for segment\n" + segment.sequenceToString() + "\n"); segmentList.add (segment); } Collections.sort (segmentList); Segment[] ret = new Segment[1]; ret = (Segment[]) segmentList.toArray (ret); return ret; }
final int start = (new Integer(offset[0])).intValue(); final int stop = (new Integer(offset[1])).intValue(); final Segment seg = new Segment(input, output, output, start, stop, entLabel, entLabel);
public Instance next () { Segment nextSegment = (Segment) subIterator.next(); return new Instance (nextSegment, nextSegment.getTruth (), null, null); }
logger.fine ("Ordered Segments:\n"); for (int j=0; j < orderedSegments.length; j++) { logger.fine (orderedSegments[j].toString()); logger.fine ("Ordered Segments:\n"); for (int j=0; j < orderedSegments.length; j++) { logger.fine (orderedSegments[j].toString()); if (findIncorrect) { for (int j=0; j < orderedSegments.length; j++) { if (!orderedSegments[j].correct()) { leastConfidentSegment = orderedSegments[j]; break; if (findIncorrect && leastConfidentSegment.correct()) { logger.warning ("cannot find incorrect segment, probably because error is in background state\n"); this.leastConfidentSegments.add (null); if (leastConfidentSegment.indexInSegment (j)) { sequence[j] = (String)truth.get (j); numCorrectedTokens++; if (leastConfidentSegment.endsPrematurely ()) { sequence[leastConfidentSegment.getEnd()+1] = (String)truth.get (leastConfidentSegment.getEnd()+1); numCorrectedTokens++; orderedSegments[0].getInput (), segmentCorrectedOutput).bestOutputSequence();
for (int c = 0; c < constraints.length; c++) constraints[c] = 0; for (int i=requiredSegment.getStart (); i <= requiredSegment.getEnd(); i++) { int si = t.stateIndexOfString ((String)constrainedSequence.get (i)); if (si == -1) if (requiredSegment.getEnd() + 2 < constraints.length) { // if String endTag = requiredSegment.getInTag().toString(); int statei = t.stateIndexOfString (endTag); if (statei == -1) throw new IllegalArgumentException ("Could not find state " + endTag + ". Check that state labels match startTags and InTags."); constraints[requiredSegment.getEnd() + 2] = - (statei + 1); logger.fine ("Segment:\n" + requiredSegment.sequenceToString () + "\nconstrainedSequence:\n" + constrainedSequence + "\nConstraints:\n");
orderedSegments = confidenceEstimator.rankSegmentsByConfidence (instance, startTags, continueTags); Segment leastConfidentSegment = orderedSegments[0]; logger.fine ("Ordered Segments:\nTrue sequence: " + leastConfidentSegment.getTruth()); for (int j=0; j < orderedSegments.length; j++) { logger.fine (orderedSegments[j].toString()); Sequence truth = leastConfidentSegment.getTruth(); Sequence predicted = leastConfidentSegment.getPredicted(); int numIncorrect = eval.numIncorrectSegments (truth, predicted); String[] sequence = new String[truth.size()]; for (int j=0; j < truth.size(); j++) { if (j <= leastConfidentSegment.getEnd() && j >= leastConfidentSegment.getStart()) sequence[j] = (String)truth.get (j); else sequence[j] = (String) predicted.get (j);
/** Calculates the confidence in the tagging of a {@link Segment}. @return 0-1 confidence value. higher = more confident. */ public double estimateConfidenceFor (Segment segment, SumLatticeDefault cachedLattice) { Sequence predSequence = segment.getPredicted (); Sequence input = segment.getInput (); SumLatticeDefault lattice = (cachedLattice == null) ? new SumLatticeDefault (model, input) : cachedLattice; // constrained lattice SumLatticeDefault constrainedLattice = new SumLatticeConstrained (model, input, null, segment, predSequence); double latticeWeight = lattice.getTotalWeight (); double constrainedLatticeWeight = constrainedLattice.getTotalWeight (); double confidence = Math.exp (latticeWeight - constrainedLatticeWeight); //System.err.println ("confidence: " + confidence); return confidence; }
/** Ranks all {@link Segment}s in this {@link InstanceList} by confidence estimate. @param ilist list of segmentation instances @param startTags represent the labels for the start states (B-) of all segments @param continueTags represent the labels for the continue state (I-) of all segments @return array of {@link Segment}s ordered by non-decreasing confidence scores, as calculated by <code>estimateConfidenceFor</code> */ public Segment[] rankSegmentsByConfidence (InstanceList ilist, Object[] startTags, Object[] continueTags) { ArrayList segmentList = new ArrayList (); SegmentIterator iter = new SegmentIterator (this.model, ilist, startTags, continueTags); if (this.segmentConfidences == null) segmentConfidences = new java.util.Vector (); while (iter.hasNext ()) { Segment segment = (Segment) iter.nextSegment (); double confidence = estimateConfidenceFor (segment); segment.setConfidence (confidence); logger.fine ("confidence=" + segment.getConfidence() + " for segment\n" + segment.sequenceToString() + "\n"); segmentList.add (segment); } Collections.sort (segmentList); Segment[] ret = new Segment[1]; ret = (Segment[]) segmentList.toArray (ret); return ret; }
final int start = (new Integer(offset[0])).intValue(); final int stop = (new Integer(offset[1])).intValue(); final Segment seg = new Segment(input, output, output, start, stop, entLabel, entLabel);
public Instance next () { Segment nextSegment = (Segment) subIterator.next(); return new Instance (nextSegment, nextSegment.getTruth (), null, null); }
/** Calculates the confidence in the tagging of a {@link Segment}. @return 0-1 confidence value. higher = more confident. */ public double estimateConfidenceFor (Segment segment, SumLatticeDefault cachedLattice) { Sequence predSequence = segment.getPredicted (); Sequence input = segment.getInput (); SumLatticeDefault lattice = (cachedLattice==null) ? new SumLatticeDefault (model, input) : cachedLattice; double confidence = 0; for (int i=segment.getStart(); i <= segment.getEnd(); i++) { int stateIndex = stateIndexOfString((String)predSequence.get(i)); if (stateIndex == -1) // Unknown label. return 0.0; confidence += lattice.getGammaProbability(i+1, model.getState(stateIndex)); } return confidence/(double)segment.size(); }