private void addLabeledSpan (LabeledSpans labeled, Tokenization input, TagStart tagStart, int end, Label backgroundTag) { Span span = input.subspan (tagStart.start, end); Label splitTag = tagStart.label; labeled.add (new LabeledSpan (span, splitTag, splitTag == backgroundTag)); }
public String toString () { return label.toString ()+" [span "+getStartIdx ()+".."+getEndIdx ()+" confidence="+confidence+"]"; }
public Span intersection (Span r) { LabeledSpan other = (LabeledSpan) r; Span newSpan = getSpan ().intersection (other.getSpan ()); return new LabeledSpan (newSpan, label, isBackground, confidence); }
for (int i = 0; i < childSpans.size(); i++) { LabeledSpan childSpan = (LabeledSpan) childSpans.get (i); Label childLabel = childSpan.getLabel(); int childStart = childSpan.getStartIdx () - start; if (childStart > current) { childElts.add (new Text (span.getText().substring (current, childStart))); childElts.add (new Text (childSpan.getText())); } else { String name = childLabel.getEntry ().toString(); current = childSpan.getEndIdx () - start;
out.println ("</HEAD><BODY>"); outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ()); outputRightWrongLegend (out); LabeledSpan trueSpan = spans.get (i, 1); Label predLabel = predSpan.getLabel (); Label trueLabel = trueSpan.getLabel (); boolean predNonBgrnd = !predSpan.isBackground (); boolean trueNonBgrnd = !trueSpan.isBackground (); boolean isBackground = !predNonBgrnd && !trueNonBgrnd; if (spanClass != null) { out.print ("<SPAN CLASS=\""+spanClass+"\">"); } String text = predSpan.getSpan ().getText (); text = text.replaceAll ("<", "<"); text = text.replaceAll ("\n", "\n<P>");
public Record (String name, LabeledSpans spans) { this.name = name; fieldMap = new THashMap (); for (int i = 0; i < spans.size(); i++) { LabeledSpan span = spans.getLabeledSpan (i); if (!span.isBackground()) { Label tag = span.getLabel (); Field field = (Field) fieldMap.get (tag); if (field == null) { field = new Field (span); fieldMap.put (tag, field); } else { field.addFiller (span); } } } }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
boolean correct = (trueField != null && trueField.isValue (predField.value (j), comparator)); entityConfidences.add(new ConfidenceEvaluator.EntityConfidence (span.getConfidence(), correct, span.getText())); if (correct) numCorrValues++;
public Field (LabeledSpan span) { name = span.getLabel (); addFiller (span); }
for (int j = i-1; j >= 0; j--) { LabeledSpan parent = (LabeledSpan) orderedByStart.get (j); if (parent.isSubspan (child)) { List childList = (List) children.get (parent); if (childList == null) {
for (int i = 0; i < childSpans.size(); i++) { LabeledSpan childSpan = (LabeledSpan) childSpans.get (i); Label childLabel = childSpan.getLabel(); int childStart = childSpan.getStartIdx () - start; if (childStart > current) { childElts.add (new Text (span.getText().substring (current, childStart))); childElts.add (new Text (childSpan.getText())); } else { String name = childLabel.getEntry ().toString(); current = childSpan.getEndIdx () - start;
out.println ("</HEAD><BODY>"); outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ()); outputRightWrongLegend (out); LabeledSpan trueSpan = spans.get (i, 1); Label predLabel = predSpan.getLabel (); Label trueLabel = trueSpan.getLabel (); boolean predNonBgrnd = !predSpan.isBackground (); boolean trueNonBgrnd = !trueSpan.isBackground (); boolean isBackground = !predNonBgrnd && !trueNonBgrnd; if (spanClass != null) { out.print ("<SPAN CLASS=\""+spanClass+"\">"); } String text = predSpan.getSpan ().getText (); text = text.replaceAll ("<", "<"); text = text.replaceAll ("\n", "\n<P>");
public Span intersection (Span r) { LabeledSpan other = (LabeledSpan) r; Span newSpan = getSpan ().intersection (other.getSpan ()); return new LabeledSpan (newSpan, label, isBackground, confidence); }
public Record (String name, LabeledSpans spans) { this.name = name; fieldMap = new THashMap (); for (int i = 0; i < spans.size(); i++) { LabeledSpan span = spans.getLabeledSpan (i); if (!span.isBackground()) { Label tag = span.getLabel (); Field field = (Field) fieldMap.get (tag); if (field == null) { field = new Field (span); fieldMap.put (tag, field); } else { field.addFiller (span); } } } }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
boolean correct = (trueField != null && trueField.isValue (predField.value (j), comparator)); entityConfidences.add(new ConfidenceEvaluator.EntityConfidence (span.getConfidence(), correct, span.getText())); if (correct) numCorrValues++;
public Label getLabel (int i) { LabeledSpan span = (LabeledSpan) get (i); return span.getLabel (); }