public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, Sequence target, String background) { this (name, dict, input, predicted, target, background, new BIOTokenizationFilter ()); }
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict, Label backgroundTag) { int i = 0; int docidx = 0; while (i < tags.size ()) { Label thisTag = dict.lookupLabel (tags.get (i).toString ()); int startTokenIdx = i; while (++i < tags.size ()) { Label nextTag = dict.lookupLabel (tags.get (i).toString ()); if (isBeginTag (nextTag) || !tagsMatch (thisTag, nextTag)) break; } int endTokenIdx = i; Span span = createSpan (input, startTokenIdx, endTokenIdx); addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag); docidx = ((StringSpan) span).getEndIdx (); if (isBeginTag (thisTag) || isInsideTag (thisTag)) { thisTag = trimTag (dict, thisTag); } labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag)); } }
private boolean tagsMatch (Label tag1, Label tag2) { String name1 = (String) tag1.getEntry (); String name2 = (String) tag2.getEntry (); if (isBeginTag (tag1) || isInsideTag (tag1)) { name1 = name1.substring (2); } if (isInsideTag (tag2)) { name2 = name2.substring (2); } return name1.equals (name2); }
public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { LabeledSpans labeled = new LabeledSpans (document); addSpansFromTags (labeled, input, seq, dict, backgroundTag); return labeled; }
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict, Label backgroundTag) { int i = 0; int docidx = 0; while (i < tags.size ()) { Label thisTag = dict.lookupLabel (tags.get (i).toString ()); int startTokenIdx = i; while (++i < tags.size ()) { Label nextTag = dict.lookupLabel (tags.get (i).toString ()); if (isBeginTag (nextTag) || !tagsMatch (thisTag, nextTag)) break; } int endTokenIdx = i; Span span = createSpan (input, startTokenIdx, endTokenIdx); addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag); docidx = ((StringSpan) span).getEndIdx (); if (isBeginTag (thisTag) || isInsideTag (thisTag)) { thisTag = trimTag (dict, thisTag); } labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag)); } }
private boolean tagsMatch (Label tag1, Label tag2) { String name1 = (String) tag1.getEntry (); String name2 = (String) tag2.getEntry (); if (isBeginTag (tag1) || isInsideTag (tag1)) { name1 = name1.substring (2); } if (isInsideTag (tag2)) { name2 = name2.substring (2); } return name1.equals (name2); }
public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { LabeledSpans labeled = new LabeledSpans (document); addSpansFromTags (labeled, input, seq, dict, backgroundTag); return labeled; }
private void addSpansFromTags (LabeledSpans labeled, Tokenization input, Sequence tags, LabelAlphabet dict, Label backgroundTag) { int i = 0; int docidx = 0; while (i < tags.size ()) { Label thisTag = dict.lookupLabel (tags.get (i).toString ()); int startTokenIdx = i; while (++i < tags.size ()) { Label nextTag = dict.lookupLabel (tags.get (i).toString ()); if (isBeginTag (nextTag) || !tagsMatch (thisTag, nextTag)) break; } int endTokenIdx = i; Span span = createSpan (input, startTokenIdx, endTokenIdx); addBackgroundIfNecessary (labeled, (StringSpan) span, docidx, backgroundTag); docidx = ((StringSpan) span).getEndIdx (); if (isBeginTag (thisTag) || isInsideTag (thisTag)) { thisTag = trimTag (dict, thisTag); } labeled.add (new LabeledSpan (span, thisTag, thisTag == backgroundTag)); } }
public ACRFExtractor (ACRF acrf, Pipe tokPipe, Pipe featurePipe) { this.acrf = acrf; this.tokPipe = tokPipe; this.featurePipe = featurePipe; this.filter = new BIOTokenizationFilter (); }
private boolean tagsMatch (Label tag1, Label tag2) { String name1 = (String) tag1.getEntry (); String name2 = (String) tag2.getEntry (); if (isBeginTag (tag1) || isInsideTag (tag1)) { name1 = name1.substring (2); } if (isInsideTag (tag2)) { name2 = name2.substring (2); } return name1.equals (name2); }
public LabeledSpans constructLabeledSpans (LabelAlphabet dict, Object document, Label backgroundTag, Tokenization input, Sequence seq) { LabeledSpans labeled = new LabeledSpans (document); addSpansFromTags (labeled, input, seq, dict, backgroundTag); return labeled; }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, String background) { this (name, dict, input, predicted, null, background, new BIOTokenizationFilter ()); }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, Sequence target, String background) { this (name, dict, input, predicted, target, background, new BIOTokenizationFilter ()); }
public ACRFExtractor (ACRF acrf, Pipe tokPipe, Pipe featurePipe) { this.acrf = acrf; this.tokPipe = tokPipe; this.featurePipe = featurePipe; this.filter = new BIOTokenizationFilter (); }
public ACRFExtractor (ACRF acrf, Pipe tokPipe, Pipe featurePipe) { this.acrf = acrf; this.tokPipe = tokPipe; this.featurePipe = featurePipe; this.filter = new BIOTokenizationFilter (); }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, Sequence target, String background) { this (name, dict, input, predicted, target, background, new BIOTokenizationFilter ()); }
public CRFExtractor (CRF crf, Pipe tokpipe) { this (crf, tokpipe, new BIOTokenizationFilter ()); }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, String background) { this (name, dict, input, predicted, null, background, new BIOTokenizationFilter ()); }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, String background) { this (name, dict, input, predicted, null, background, new BIOTokenizationFilter ()); }
public CRFExtractor (CRF crf, Pipe tokpipe) { this (crf, tokpipe, new BIOTokenizationFilter ()); }