public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { LabeledSpans labeled = new LabeledSpans (document); addSpansFromTags (labeled, input, seq, dict, backgroundTag); return labeled; }
public Span getSpan (int i) { return (Span) get (i); }
LabeledSpan get (int t, int i) { return ls[i].getLabeledSpan (t); } }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
public Record (String name, LabeledSpans spans) { this.name = name; fieldMap = new THashMap (); for (int i = 0; i < spans.size(); i++) { LabeledSpan span = spans.getLabeledSpan (i); if (!span.isBackground()) { Label tag = span.getLabel (); Field field = (Field) fieldMap.get (tag); if (field == null) { field = new Field (span); fieldMap.put (tag, field); } else { field.addFiller (span); } } } }
private void addLabeledSpan (LabeledSpans labeled, Tokenization input, TagStart tagStart, int end, Label backgroundTag) { Span span = input.subspan (tagStart.start, end); Label splitTag = tagStart.label; labeled.add (new LabeledSpan (span, splitTag, splitTag == backgroundTag)); }
public int size () { return extractedSpans.size(); }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
public Record (String name, LabeledSpans spans) { this.name = name; fieldMap = new THashMap (); for (int i = 0; i < spans.size(); i++) { LabeledSpan span = spans.getLabeledSpan (i); if (!span.isBackground()) { Label tag = span.getLabel (); Field field = (Field) fieldMap.get (tag); if (field == null) { field = new Field (span); fieldMap.put (tag, field); } else { field.addFiller (span); } } } }
private void addLabeledSpan (LabeledSpans labeled, Tokenization input, TagStart tagStart, int end, Label backgroundTag) { Span span = input.subspan (tagStart.start, end); Label splitTag = tagStart.label; labeled.add (new LabeledSpan (span, splitTag, splitTag == backgroundTag)); }
public int size () { return extractedSpans.size(); }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
public Record (String name, LabeledSpans spans) { this.name = name; fieldMap = new THashMap (); for (int i = 0; i < spans.size(); i++) { LabeledSpan span = spans.getLabeledSpan (i); if (!span.isBackground()) { Label tag = span.getLabel (); Field field = (Field) fieldMap.get (tag); if (field == null) { field = new Field (span); fieldMap.put (tag, field); } else { field.addFiller (span); } } } }
private void addLabeledSpan (LabeledSpans labeled, Tokenization input, TagStart tagStart, int end, Label backgroundTag) { Span span = input.subspan (tagStart.start, end); Label splitTag = tagStart.label; labeled.add (new LabeledSpan (span, splitTag, splitTag == backgroundTag)); }
public Span getSpan (int i) { return (Span) get (i); }
public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { LabeledSpans labeled = new LabeledSpans (document); addSpansFromTags (labeled, input, seq, dict, backgroundTag); return labeled; }
public int size () { return extractedSpans.size(); }
LabeledSpan get (int t, int i) { return ls[i].getLabeledSpan (t); } }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
private void addBackgroundIfNecessary (LabeledSpans labeled, StringSpan span, int docidx, Label background) { int nextIdx = span.getStartIdx (); if (docidx < nextIdx) { Span newSpan = new StringSpan ((CharSequence) span.getDocument (), docidx, nextIdx); labeled.add (new LabeledSpan (newSpan, background, true)); } }