private static void writeDualExtractions (PrintWriter out, Extraction e1, CRFExtractor extor1, Extraction e2, CRFExtractor extor2, int start, int end, boolean showLattice) { writeHeader (out); for (int i = start; i < end; i++) { DocumentExtraction doc1 = e1.getDocumentExtraction (i); DocumentExtraction doc2 = e2.getDocumentExtraction (i); String desc = doc1.getName(); String doc1Str = ((CharSequence) doc1.getDocument ()).toString(); String doc2Str = ((CharSequence) doc2.getDocument ()).toString(); if (!doc1Str.equals (doc2Str)) { System.err.println ("Skipping document "+i+": Extractions don't match"); continue; } Sequence targ1 = doc1.getPredictedLabels (); Sequence targ2 = doc2.getPredictedLabels (); if (!predictionsMatch (targ1, targ2)) { ExtorInfo info1 = infoForDoc (doc1Str, "CRF1::"+desc, "C1I"+i, doc1, extor1, showLattice); ExtorInfo info2 = infoForDoc (doc1Str, "CRF2::"+desc, "C2I"+i, doc2, extor2, showLattice); if (!showLattice) { // add links from errors.html --> lattice.html info1.link = info2.link = computeLatticeFname (i); } dualLattice2html (out, desc, info1, info2); } } writeFooter (out); }
private static void writeDualExtractions (PrintWriter out, Extraction e1, CRFExtractor extor1, Extraction e2, CRFExtractor extor2, int start, int end, boolean showLattice) { writeHeader (out); for (int i = start; i < end; i++) { DocumentExtraction doc1 = e1.getDocumentExtraction (i); DocumentExtraction doc2 = e2.getDocumentExtraction (i); String desc = doc1.getName(); String doc1Str = ((CharSequence) doc1.getDocument ()).toString(); String doc2Str = ((CharSequence) doc2.getDocument ()).toString(); if (!doc1Str.equals (doc2Str)) { System.err.println ("Skipping document "+i+": Extractions don't match"); continue; } Sequence targ1 = doc1.getPredictedLabels (); Sequence targ2 = doc2.getPredictedLabels (); if (!predictionsMatch (targ1, targ2)) { ExtorInfo info1 = infoForDoc (doc1Str, "CRF1::"+desc, "C1I"+i, doc1, extor1, showLattice); ExtorInfo info2 = infoForDoc (doc1Str, "CRF2::"+desc, "C2I"+i, doc2, extor2, showLattice); if (!showLattice) { // add links from errors.html --> lattice.html info1.link = info2.link = computeLatticeFname (i); } dualLattice2html (out, desc, info1, info2); } } writeFooter (out); }
private static void writeDualExtractions (PrintWriter out, Extraction e1, CRFExtractor extor1, Extraction e2, CRFExtractor extor2, int start, int end, boolean showLattice) { writeHeader (out); for (int i = start; i < end; i++) { DocumentExtraction doc1 = e1.getDocumentExtraction (i); DocumentExtraction doc2 = e2.getDocumentExtraction (i); String desc = doc1.getName(); String doc1Str = ((CharSequence) doc1.getDocument ()).toString(); String doc2Str = ((CharSequence) doc2.getDocument ()).toString(); if (!doc1Str.equals (doc2Str)) { System.err.println ("Skipping document "+i+": Extractions don't match"); continue; } Sequence targ1 = doc1.getPredictedLabels (); Sequence targ2 = doc2.getPredictedLabels (); if (!predictionsMatch (targ1, targ2)) { ExtorInfo info1 = infoForDoc (doc1Str, "CRF1::"+desc, "C1I"+i, doc1, extor1, showLattice); ExtorInfo info2 = infoForDoc (doc1Str, "CRF2::"+desc, "C2I"+i, doc2, extor2, showLattice); if (!showLattice) { // add links from errors.html --> lattice.html info1.link = info2.link = computeLatticeFname (i); } dualLattice2html (out, desc, info1, info2); } } writeFooter (out); }
private static ExtorInfo infoForDoc (String doc, String desc, String idx, DocumentExtraction docextr, CRFExtractor extor, boolean showLattice) { // Instance c2 = new Instance (doc, null, null, null, extor.getTokenizationPipe ()); // TokenSequence input = (TokenSequence) c2.getData (); TokenSequence input = (TokenSequence) docextr.getInput (); LabelSequence target = docextr.getTarget (); Sequence predicted = docextr.getPredictedLabels (); ExtorInfo info = new ExtorInfo (input, predicted, target, desc, idx); if (showLattice == true) { CRF crf = extor.getCrf(); // xxx perhaps the next two lines could be a transducer method??? Instance carrier = extor.getFeaturePipe().pipe(new Instance (input, null, null, null)); info.fvs = (FeatureVectorSequence) carrier.getData (); info.lattice = new MaxLatticeDefault (crf, (Sequence) carrier.getData(), null); info.bestStates = info.lattice.bestOutputSequence(); } return info; }
private static ExtorInfo infoForDoc (String doc, String desc, String idx, DocumentExtraction docextr, CRFExtractor extor, boolean showLattice) { // Instance c2 = new Instance (doc, null, null, null, extor.getTokenizationPipe ()); // TokenSequence input = (TokenSequence) c2.getData (); TokenSequence input = (TokenSequence) docextr.getInput (); LabelSequence target = docextr.getTarget (); Sequence predicted = docextr.getPredictedLabels (); ExtorInfo info = new ExtorInfo (input, predicted, target, desc, idx); if (showLattice == true) { CRF crf = extor.getCrf(); // xxx perhaps the next two lines could be a transducer method??? Instance carrier = extor.getFeaturePipe().pipe(new Instance (input, null, null, null)); info.fvs = (FeatureVectorSequence) carrier.getData (); info.lattice = new MaxLatticeDefault (crf, (Sequence) carrier.getData(), null); info.bestStates = info.lattice.bestOutputSequence(); } return info; }
private static ExtorInfo infoForDoc (String doc, String desc, String idx, DocumentExtraction docextr, CRFExtractor extor, boolean showLattice) { // Instance c2 = new Instance (doc, null, null, null, extor.getTokenizationPipe ()); // TokenSequence input = (TokenSequence) c2.getData (); TokenSequence input = (TokenSequence) docextr.getInput (); LabelSequence target = docextr.getTarget (); Sequence predicted = docextr.getPredictedLabels (); ExtorInfo info = new ExtorInfo (input, predicted, target, desc, idx); if (showLattice == true) { CRF crf = extor.getCrf(); // xxx perhaps the next two lines could be a transducer method??? Instance carrier = extor.getFeaturePipe().pipe(new Instance (input, null, null, null)); info.fvs = (FeatureVectorSequence) carrier.getData (); info.lattice = new MaxLatticeDefault (crf, (Sequence) carrier.getData(), null); info.bestStates = info.lattice.bestOutputSequence(); } return info; }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }
public void estimateConfidence (DocumentExtraction documentExtraction) { Tokenization input = documentExtraction.getInput(); // WARNING: input Tokenization will likely already have many // features appended from the last time it was passed through a // featurePipe. To avoid a redundant calculation of features, the // caller may want to set this.featurePipe = // TokenSequence2FeatureVectorSequence Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); Sequence pipedInput = (Sequence) carrier.getData(); Sequence prediction = documentExtraction.getPredictedLabels(); LabeledSpans labeledSpans = documentExtraction.getExtractedSpans(); SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput); for (int i=0; i < labeledSpans.size(); i++) { LabeledSpan span = labeledSpans.getLabeledSpan(i); if (span.isBackground()) continue; int[] segmentBoundaries = getSegmentBoundaries(input, span); Segment segment = new Segment(pipedInput, prediction, prediction, segmentBoundaries[0], segmentBoundaries[1], null, null); span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice)); } }