public String toString () { return label.toString ()+" [span "+getStartIdx ()+".."+getEndIdx ()+" confidence="+confidence+"]"; }
public String toString () { return label.toString ()+" [span "+getStartIdx ()+".."+getEndIdx ()+" confidence="+confidence+"]"; }
public String toString () { return label.toString ()+" [span "+getStartIdx ()+".."+getEndIdx ()+" confidence="+confidence+"]"; }
/** Convert the indices of a LabeledSpan into indices for a Tokenization. * @return array of size two, where first index is start Token, * second is end Token, inclusive */ private int[] getSegmentBoundaries (Tokenization tokens, LabeledSpan labeledSpan) { int startCharIndex = labeledSpan.getStartIdx(); int endCharIndex = labeledSpan.getEndIdx()-1; int[] ret = new int[]{-1,-1}; for (int i=0; i < tokens.size(); i++) { int charIndex = tokens.getSpan(i).getStartIdx(); if (charIndex <= endCharIndex && charIndex >= startCharIndex) { if (ret[0] == -1) { ret[0] = i; ret[1] = i; } else ret[1] = i; } } if (ret[0] == -1 || ret[1] == -1) throw new IllegalArgumentException("Unable to find segment boundaries from span " + labeledSpan); return ret; } }
/** Convert the indices of a LabeledSpan into indices for a Tokenization. * @return array of size two, where first index is start Token, * second is end Token, inclusive */ private int[] getSegmentBoundaries (Tokenization tokens, LabeledSpan labeledSpan) { int startCharIndex = labeledSpan.getStartIdx(); int endCharIndex = labeledSpan.getEndIdx()-1; int[] ret = new int[]{-1,-1}; for (int i=0; i < tokens.size(); i++) { int charIndex = tokens.getSpan(i).getStartIdx(); if (charIndex <= endCharIndex && charIndex >= startCharIndex) { if (ret[0] == -1) { ret[0] = i; ret[1] = i; } else ret[1] = i; } } if (ret[0] == -1 || ret[1] == -1) throw new IllegalArgumentException("Unable to find segment boundaries from span " + labeledSpan); return ret; } }
/** Convert the indices of a LabeledSpan into indices for a Tokenization. * @return array of size two, where first index is start Token, * second is end Token, inclusive */ private int[] getSegmentBoundaries (Tokenization tokens, LabeledSpan labeledSpan) { int startCharIndex = labeledSpan.getStartIdx(); int endCharIndex = labeledSpan.getEndIdx()-1; int[] ret = new int[]{-1,-1}; for (int i=0; i < tokens.size(); i++) { int charIndex = tokens.getSpan(i).getStartIdx(); if (charIndex <= endCharIndex && charIndex >= startCharIndex) { if (ret[0] == -1) { ret[0] = i; ret[1] = i; } else ret[1] = i; } } if (ret[0] == -1 || ret[1] == -1) throw new IllegalArgumentException("Unable to find segment boundaries from span " + labeledSpan); return ret; } }
current = childSpan.getEndIdx () - start;
current = childSpan.getEndIdx () - start;
current = childSpan.getEndIdx () - start;
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }
private static DualLabeledSpans intersectSpans (DocumentExtraction docExtr) { int predIdx = 0; int trueIdx = 0; LabeledSpans trueSpans = docExtr.getTargetSpans (); LabeledSpans predSpans = docExtr.getExtractedSpans (); LabeledSpans retPredSpans = new LabeledSpans (predSpans.getDocument ()); LabeledSpans retTrueSpans = new LabeledSpans (predSpans.getDocument ()); while ((predIdx < predSpans.size()) && (trueIdx < trueSpans.size ())) { LabeledSpan predSpan = predSpans.getLabeledSpan (predIdx); LabeledSpan trueSpan = trueSpans.getLabeledSpan (trueIdx); LabeledSpan newPredSpan = (LabeledSpan) predSpan.intersection (trueSpan); LabeledSpan newTrueSpan = (LabeledSpan) trueSpan.intersection (predSpan); retPredSpans.add (newPredSpan); retTrueSpans.add (newTrueSpan); if (predSpan.getEndIdx () <= trueSpan.getEndIdx ()) { predIdx++; } if (trueSpan.getEndIdx () <= predSpan.getEndIdx ()) { trueIdx++; } } assert (retPredSpans.size() == retTrueSpans.size()); return new DualLabeledSpans (retPredSpans, retTrueSpans); }