public Span intersection (Span r) { StringSpan other = (StringSpan) r; int newStart = Math.max (start, other.start); int newEnd = Math.min (end, other.end); return new StringSpan (document, newStart, newEnd); }
public Span intersection (Span r) { StringSpan other = (StringSpan) r; int newStart = Math.max (start, other.start); int newEnd = Math.min (end, other.end); return new StringSpan (document, newStart, newEnd); }
public Span intersection (Span r) { StringSpan other = (StringSpan) r; int newStart = Math.max (start, other.start); int newEnd = Math.min (end, other.end); return new StringSpan (document, newStart, newEnd); }
private void addBackgroundIfNecessary (LabeledSpans labeled, StringSpan span, int docidx, Label background) { int nextIdx = span.getStartIdx (); if (docidx < nextIdx) { Span newSpan = new StringSpan ((CharSequence) span.getDocument (), docidx, nextIdx); labeled.add (new LabeledSpan (newSpan, background, true)); } }
public Span subspan (int firstToken, int lastToken) { StringSpan firstSpan = (StringSpan) get(firstToken); int startIdx = firstSpan.getStartIdx (); int endIdx; if (lastToken > size()) { endIdx = document.length (); } else { StringSpan lastSpan = (StringSpan) get(lastToken - 1); endIdx = lastSpan.getEndIdx (); } return new StringSpan (document, startIdx, endIdx); }
public Span subspan (int firstToken, int lastToken) { StringSpan firstSpan = (StringSpan) get(firstToken); int startIdx = firstSpan.getStartIdx (); int endIdx; if (lastToken > size()) { endIdx = document.length (); } else { StringSpan lastSpan = (StringSpan) get(lastToken - 1); endIdx = lastSpan.getEndIdx (); } return new StringSpan (document, startIdx, endIdx); }
private void addBackgroundIfNecessary (LabeledSpans labeled, StringSpan span, int docidx, Label background) { int nextIdx = span.getStartIdx (); if (docidx < nextIdx) { Span newSpan = new StringSpan ((CharSequence) span.getDocument (), docidx, nextIdx); labeled.add (new LabeledSpan (newSpan, background, true)); } }
private void addBackgroundIfNecessary (LabeledSpans labeled, StringSpan span, int docidx, Label background) { int nextIdx = span.getStartIdx (); if (docidx < nextIdx) { Span newSpan = new StringSpan ((CharSequence) span.getDocument (), docidx, nextIdx); labeled.add (new LabeledSpan (newSpan, background, true)); } }
private void addBackgroundIfNecessary (LabeledSpans labeled, StringSpan span, int docidx, Label background) { int nextIdx = span.getStartIdx (); if (docidx < nextIdx) { Span newSpan = new StringSpan ((CharSequence) span.getDocument (), docidx, nextIdx); labeled.add (new LabeledSpan (newSpan, background, true)); } }
private void addBackgroundIfNecessary (LabeledSpans labeled, StringSpan span, int docidx, Label background) { int nextIdx = span.getStartIdx (); if (docidx < nextIdx) { Span newSpan = new StringSpan ((CharSequence) span.getDocument (), docidx, nextIdx); labeled.add (new LabeledSpan (newSpan, background, true)); } }
private void addBackgroundIfNecessary (LabeledSpans labeled, StringSpan span, int docidx, Label background) { int nextIdx = span.getStartIdx (); if (docidx < nextIdx) { Span newSpan = new StringSpan ((CharSequence) span.getDocument (), docidx, nextIdx); labeled.add (new LabeledSpan (newSpan, background, true)); } }
public Span subspan (int firstToken, int lastToken) { StringSpan firstSpan = (StringSpan) get(firstToken); int startIdx = firstSpan.getStartIdx (); int endIdx; if (lastToken > size()) { endIdx = document.length (); } else { StringSpan lastSpan = (StringSpan) get(lastToken - 1); endIdx = lastSpan.getEndIdx (); } return new StringSpan (document, startIdx, endIdx); }
public Instance pipe (Instance carrier) { Object data = carrier.getData (); if (data instanceof Tokenization) { // we're done } else if (data instanceof TokenSequence) { StringBuffer buf = new StringBuffer (); TokenSequence ts = (TokenSequence) data; StringTokenization spans = new StringTokenization (buf); // I can use a StringBuffer as the doc! Awesome! for (int i = 0; i < ts.size(); i++) { Token token = ts.get(i); int start = buf.length (); buf.append (token.getText()); int end = buf.length(); StringSpan span = new StringSpan (buf, start, end); span.setFeatures (token.getFeatures ()); span.setProperties (token.getProperties ()); spans.add (span); buf.append (" "); } carrier.setData (spans); } else { throw new IllegalArgumentException ("Can't convert "+data+" to Tokenization."); } return carrier; }
public Instance pipe (Instance carrier) { Object data = carrier.getData (); if (data instanceof Tokenization) { // we're done } else if (data instanceof TokenSequence) { StringBuffer buf = new StringBuffer (); TokenSequence ts = (TokenSequence) data; StringTokenization spans = new StringTokenization (buf); // I can use a StringBuffer as the doc! Awesome! for (int i = 0; i < ts.size(); i++) { Token token = ts.get(i); int start = buf.length (); buf.append (token.getText()); int end = buf.length(); StringSpan span = new StringSpan (buf, start, end); span.setFeatures (token.getFeatures ()); span.setProperties (token.getProperties ()); spans.add (span); buf.append (" "); } carrier.setData (spans); } else { throw new IllegalArgumentException ("Can't convert "+data+" to Tokenization."); } return carrier; }
/** * Creates a tokenization of the given string. Tokens are * added from all the matches of the given lexer. */ public StringTokenization (CharSequence string, CharSequenceLexer lexer) { super(); this.document = string; lexer.setCharSequence (string); while (lexer.hasNext()) { lexer.next (); this.add (new StringSpan (string, lexer.getStartOffset(), lexer.getEndOffset())); } }
/** * Creates a tokenization of the given string. Tokens are * added from all the matches of the given lexer. */ public StringTokenization (CharSequence string, CharSequenceLexer lexer) { super(); this.document = string; lexer.setCharSequence (string); while (lexer.hasNext()) { lexer.next (); this.add (new StringSpan (string, lexer.getStartOffset(), lexer.getEndOffset())); } }
/** * Creates a tokenization of the given string. Tokens are * added from all the matches of the given lexer. */ public StringTokenization (CharSequence string, CharSequenceLexer lexer) { super(); this.document = string; lexer.setCharSequence (string); while (lexer.hasNext()) { lexer.next (); this.add (new StringSpan (string, lexer.getStartOffset(), lexer.getEndOffset())); } }
public Instance pipe (Instance carrier) { CharSequence string = (CharSequence) carrier.getData(); lexer.setCharSequence (string); TokenSequence ts = new StringTokenization (string); while (lexer.hasNext()) { lexer.next(); ts.add (new StringSpan (string, lexer.getStartOffset (), lexer.getEndOffset ())); } carrier.setData(ts); return carrier; }
public Instance pipe (Instance carrier) { CharSequence string = (CharSequence) carrier.getData(); lexer.setCharSequence (string); TokenSequence ts = new StringTokenization (string); while (lexer.hasNext()) { lexer.next(); ts.add (new StringSpan (string, lexer.getStartOffset (), lexer.getEndOffset ())); } carrier.setData(ts); return carrier; }
public Instance pipe (Instance carrier) { CharSequence string = (CharSequence) carrier.getData(); lexer.setCharSequence (string); TokenSequence ts = new StringTokenization (string); while (lexer.hasNext()) { lexer.next(); ts.add (new StringSpan (string, lexer.getStartOffset (), lexer.getEndOffset ())); } carrier.setData(ts); return carrier; }