/** Convert the indices of a LabeledSpan into indices for a Tokenization. * @return array of size two, where first index is start Token, * second is end Token, inclusive */ private int[] getSegmentBoundaries (Tokenization tokens, LabeledSpan labeledSpan) { int startCharIndex = labeledSpan.getStartIdx(); int endCharIndex = labeledSpan.getEndIdx()-1; int[] ret = new int[]{-1,-1}; for (int i=0; i < tokens.size(); i++) { int charIndex = tokens.getSpan(i).getStartIdx(); if (charIndex <= endCharIndex && charIndex >= startCharIndex) { if (ret[0] == -1) { ret[0] = i; ret[1] = i; } else ret[1] = i; } } if (ret[0] == -1 || ret[1] == -1) throw new IllegalArgumentException("Unable to find segment boundaries from span " + labeledSpan); return ret; } }
/** Convert the indices of a LabeledSpan into indices for a Tokenization. * @return array of size two, where first index is start Token, * second is end Token, inclusive */ private int[] getSegmentBoundaries (Tokenization tokens, LabeledSpan labeledSpan) { int startCharIndex = labeledSpan.getStartIdx(); int endCharIndex = labeledSpan.getEndIdx()-1; int[] ret = new int[]{-1,-1}; for (int i=0; i < tokens.size(); i++) { int charIndex = tokens.getSpan(i).getStartIdx(); if (charIndex <= endCharIndex && charIndex >= startCharIndex) { if (ret[0] == -1) { ret[0] = i; ret[1] = i; } else ret[1] = i; } } if (ret[0] == -1 || ret[1] == -1) throw new IllegalArgumentException("Unable to find segment boundaries from span " + labeledSpan); return ret; } }
/** Convert the indices of a LabeledSpan into indices for a Tokenization. * @return array of size two, where first index is start Token, * second is end Token, inclusive */ private int[] getSegmentBoundaries (Tokenization tokens, LabeledSpan labeledSpan) { int startCharIndex = labeledSpan.getStartIdx(); int endCharIndex = labeledSpan.getEndIdx()-1; int[] ret = new int[]{-1,-1}; for (int i=0; i < tokens.size(); i++) { int charIndex = tokens.getSpan(i).getStartIdx(); if (charIndex <= endCharIndex && charIndex >= startCharIndex) { if (ret[0] == -1) { ret[0] = i; ret[1] = i; } else ret[1] = i; } } if (ret[0] == -1 || ret[1] == -1) throw new IllegalArgumentException("Unable to find segment boundaries from span " + labeledSpan); return ret; } }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, Sequence target, String background, TokenizationFilter filter) { this.document = input.getDocument (); this.name = name; assert (input.size() == predicted.size()); this.backgroundTag = dict.lookupLabel (background); this.input = input; this.predictedLabels = predicted; this.extractedSpans = filter.constructLabeledSpans (dict, document, backgroundTag, input, predicted); if (target != null) { if (target instanceof LabelSequence) this.target = (LabelSequence) target; this.targetSpans = filter.constructLabeledSpans (dict, document, backgroundTag, input, target); } }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, Sequence target, String background, TokenizationFilter filter) { this.document = input.getDocument (); this.name = name; assert (input.size() == predicted.size()); this.backgroundTag = dict.lookupLabel (background); this.input = input; this.predictedLabels = predicted; this.extractedSpans = filter.constructLabeledSpans (dict, document, backgroundTag, input, predicted); if (target != null) { if (target instanceof LabelSequence) this.target = (LabelSequence) target; this.targetSpans = filter.constructLabeledSpans (dict, document, backgroundTag, input, target); } }
public DocumentExtraction (String name, LabelAlphabet dict, Tokenization input, Sequence predicted, Sequence target, String background, TokenizationFilter filter) { this.document = input.getDocument (); this.name = name; assert (input.size() == predicted.size()); this.backgroundTag = dict.lookupLabel (background); this.input = input; this.predictedLabels = predicted; this.extractedSpans = filter.constructLabeledSpans (dict, document, backgroundTag, input, predicted); if (target != null) { if (target instanceof LabelSequence) this.target = (LabelSequence) target; this.targetSpans = filter.constructLabeledSpans (dict, document, backgroundTag, input, target); } }