cc.mallet.extract.DocumentExtraction.getExtractedSpans java code examples

public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag,
                      Tokenization input, Sequence seq)
{
 DocumentExtraction extraction = new DocumentExtraction("Extraction",
                             dict,
                             input,
                             seq,
                             null,
                             backgroundTag.toString());
 confidenceEstimator.estimateConfidence(extraction);
 return extraction.getExtractedSpans();
}

public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag,
                      Tokenization input, Sequence seq)
{
 DocumentExtraction extraction = new DocumentExtraction("Extraction",
                             dict,
                             input,
                             seq,
                             null,
                             backgroundTag.toString());
 confidenceEstimator.estimateConfidence(extraction);
 return extraction.getExtractedSpans();
}

public void addDocumentExtraction (DocumentExtraction docseq)
{
 byDocs.add (docseq);
 records.add (new Record (docseq.getName (), docseq.getExtractedSpans ()));
 if (docseq.getTargetSpans () != null) {
  trueRecords.add (new Record ("TRUE:"+docseq.getName (), docseq.getTargetSpans ()));
 }
}

public void addDocumentExtraction (DocumentExtraction docseq)
{
 byDocs.add (docseq);
 records.add (new Record (docseq.getName (), docseq.getExtractedSpans ()));
 if (docseq.getTargetSpans () != null) {
  trueRecords.add (new Record ("TRUE:"+docseq.getName (), docseq.getTargetSpans ()));
 }
}

public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag,
                      Tokenization input, Sequence seq)
{
 DocumentExtraction extraction = new DocumentExtraction("Extraction",
                             dict,
                             input,
                             seq,
                             null,
                             backgroundTag.toString());
 confidenceEstimator.estimateConfidence(extraction);
 return extraction.getExtractedSpans();
}

public void addDocumentExtraction (DocumentExtraction docseq)
{
 byDocs.add (docseq);
 records.add (new Record (docseq.getName (), docseq.getExtractedSpans ()));
 if (docseq.getTargetSpans () != null) {
  trueRecords.add (new Record ("TRUE:"+docseq.getName (), docseq.getTargetSpans ()));
 }
}

out.println ("</HEAD><BODY>");
outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ());
outputRightWrongLegend (out);

out.println ("</HEAD><BODY>");
outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ());
outputRightWrongLegend (out);

out.println ("</HEAD><BODY>");
outputClassLegend (out, docExtr.getExtractedSpans ().getLabeledSpan (0).getLabel ().getLabelAlphabet ());
outputRightWrongLegend (out);

public void estimateConfidence (DocumentExtraction documentExtraction) {
 Tokenization input = documentExtraction.getInput();
 // WARNING: input Tokenization will likely already have many
 // features appended from the last time it was passed through a
 // featurePipe. To avoid a redundant calculation of features, the
 // caller may want to set this.featurePipe =
 // TokenSequence2FeatureVectorSequence
 Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); 
 Sequence pipedInput = (Sequence) carrier.getData();
 Sequence prediction = documentExtraction.getPredictedLabels();
 LabeledSpans labeledSpans = documentExtraction.getExtractedSpans();
 SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput);
 for (int i=0; i < labeledSpans.size(); i++) {
  LabeledSpan span = labeledSpans.getLabeledSpan(i);
  if (span.isBackground()) 
   continue;
  int[] segmentBoundaries = getSegmentBoundaries(input, span);
  Segment segment = new Segment(pipedInput, prediction, prediction, 
                 segmentBoundaries[0], segmentBoundaries[1],
                 null, null);
  span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice));
 }
}

public void estimateConfidence (DocumentExtraction documentExtraction) {
 Tokenization input = documentExtraction.getInput();
 // WARNING: input Tokenization will likely already have many
 // features appended from the last time it was passed through a
 // featurePipe. To avoid a redundant calculation of features, the
 // caller may want to set this.featurePipe =
 // TokenSequence2FeatureVectorSequence
 Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); 
 Sequence pipedInput = (Sequence) carrier.getData();
 Sequence prediction = documentExtraction.getPredictedLabels();
 LabeledSpans labeledSpans = documentExtraction.getExtractedSpans();
 SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput);
 for (int i=0; i < labeledSpans.size(); i++) {
  LabeledSpan span = labeledSpans.getLabeledSpan(i);
  if (span.isBackground()) 
   continue;
  int[] segmentBoundaries = getSegmentBoundaries(input, span);
  Segment segment = new Segment(pipedInput, prediction, prediction, 
                 segmentBoundaries[0], segmentBoundaries[1],
                 null, null);
  span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice));
 }
}

public void estimateConfidence (DocumentExtraction documentExtraction) {
 Tokenization input = documentExtraction.getInput();
 // WARNING: input Tokenization will likely already have many
 // features appended from the last time it was passed through a
 // featurePipe. To avoid a redundant calculation of features, the
 // caller may want to set this.featurePipe =
 // TokenSequence2FeatureVectorSequence
 Instance carrier = this.featurePipe.pipe(new Instance(input, null, null, null)); 
 Sequence pipedInput = (Sequence) carrier.getData();
 Sequence prediction = documentExtraction.getPredictedLabels();
 LabeledSpans labeledSpans = documentExtraction.getExtractedSpans();
 SumLatticeDefault lattice = new SumLatticeDefault (this.confidenceEstimator.getTransducer(), pipedInput);
 for (int i=0; i < labeledSpans.size(); i++) {
  LabeledSpan span = labeledSpans.getLabeledSpan(i);
  if (span.isBackground()) 
   continue;
  int[] segmentBoundaries = getSegmentBoundaries(input, span);
  Segment segment = new Segment(pipedInput, prediction, prediction, 
                 segmentBoundaries[0], segmentBoundaries[1],
                 null, null);
  span.setConfidence(confidenceEstimator.estimateConfidenceFor(segment, lattice));
 }
}

private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr)
{
 int predIdx = 0;
 int trueIdx = 0;
 LabeledSpans trueSpans = docExtr.getTargetSpans ();
 LabeledSpans predSpans = docExtr.getExtractedSpans ();
 LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ());
 LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ());
 while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) {
  LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx);
  LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx);
  LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan);
  LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan);
  retPredSpans.add (newPredSpan);
  retTrueSpans.add (newTrueSpan);
  if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) {
   predIdx++;
  }
  if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) {
   trueIdx++;
  }
 }
 assert (retPredSpans.size() == retTrueSpans.size());
 return new DualLabeledSpans (retPredSpans, retTrueSpans);
}

private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr)
{
 int predIdx = 0;
 int trueIdx = 0;
 LabeledSpans trueSpans = docExtr.getTargetSpans ();
 LabeledSpans predSpans = docExtr.getExtractedSpans ();
 LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ());
 LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ());
 while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) {
  LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx);
  LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx);
  LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan);
  LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan);
  retPredSpans.add (newPredSpan);
  retTrueSpans.add (newTrueSpan);
  if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) {
   predIdx++;
  }
  if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) {
   trueIdx++;
  }
 }
 assert (retPredSpans.size() == retTrueSpans.size());
 return new DualLabeledSpans (retPredSpans, retTrueSpans);
}

private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr)
{
 int predIdx = 0;
 int trueIdx = 0;
 LabeledSpans trueSpans = docExtr.getTargetSpans ();
 LabeledSpans predSpans = docExtr.getExtractedSpans ();
 LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ());
 LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ());
 while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) {
  LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx);
  LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx);
  LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan);
  LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan);
  retPredSpans.add (newPredSpan);
  retTrueSpans.add (newTrueSpan);
  if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) {
   predIdx++;
  }
  if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) {
   trueIdx++;
  }
 }
 assert (retPredSpans.size() == retTrueSpans.size());
 return new DualLabeledSpans (retPredSpans, retTrueSpans);
}

Popular in Java

Making http post requests using okhttp
getResourceAsStream (ClassLoader)
getApplicationContext (Context)
setRequestProperty (URLConnection)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Top PhpStorm plugins

How to use getExtractedSpansmethodin cc.mallet.extract.DocumentExtraction

Best Java code snippets using cc.mallet.extract.DocumentExtraction.getExtractedSpans (Showing top 15 results out of 315)

How to use
getExtractedSpans
method
in
cc.mallet.extract.DocumentExtraction