public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { DocumentExtraction extraction = new DocumentExtraction("Extraction", dict, input, seq, null, backgroundTag.toString()); confidenceEstimator.estimateConfidence(extraction); return extraction.getExtractedSpans(); }
public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { DocumentExtraction extraction = new DocumentExtraction("Extraction", dict, input, seq, null, backgroundTag.toString()); confidenceEstimator.estimateConfidence(extraction); return extraction.getExtractedSpans(); }
public void addDocumentExtraction (DocumentExtraction docseq) { byDocs.add (docseq); records.add (new Record (docseq.getName (), docseq.getExtractedSpans ())); if (docseq.getTargetSpans () != null) { trueRecords.add (new Record ("TRUE:"+docseq.getName (), docseq.getTargetSpans ())); } }
public void addDocumentExtraction (DocumentExtraction docseq) { byDocs.add (docseq); records.add (new Record (docseq.getName (), docseq.getExtractedSpans ())); if (docseq.getTargetSpans () != null) { trueRecords.add (new Record ("TRUE:"+docseq.getName (), docseq.getTargetSpans ())); } }
public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { DocumentExtraction extraction = new DocumentExtraction("Extraction", dict, input, seq, null, backgroundTag.toString()); confidenceEstimator.estimateConfidence(extraction); return extraction.getExtractedSpans(); }
public void addDocumentExtraction (DocumentExtraction docseq) { byDocs.add (docseq); records.add (new Record (docseq.getName (), docseq.getExtractedSpans ())); if (docseq.getTargetSpans () != null) { trueRecords.add (new Record ("TRUE:"+docseq.getName (), docseq.getTargetSpans ())); } }
out.println ("</HEAD><BODY>"); outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ()); outputRightWrongLegend (out);
out.println ("</HEAD><BODY>"); outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ()); outputRightWrongLegend (out);
out.println ("</HEAD><BODY>"); outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ()); outputRightWrongLegend (out);
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }