/** * Converts an array of {@link Span}s to an array of {@link String}s. * * @param spans * @param s * @return the strings */ public static String[] spansToStrings(Span[] spans, CharSequence s) { String[] tokens = new String[spans.length]; for (int si = 0, sl = spans.length; si < sl; si++) { tokens[si] = spans[si].getCoveredText(s).toString(); } return tokens; }
@Override public String toString() { StringBuilder documentBuilder = new StringBuilder(); for (Span sentSpan : sentences) { documentBuilder.append(sentSpan.getCoveredText(document).toString() .replace("\r", "<CR>").replace("\n", "<LF>")); documentBuilder.append("\n"); } return documentBuilder.toString(); }
@Override BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException { return new RelationAnnotation(tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString(), tokens[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString(), parseArg(tokens[ARG1_OFFSET].getCoveredText(line).toString()), parseArg(tokens[ARG2_OFFSET].getCoveredText(line).toString())); } }
@Override BratAnnotation parse(Span[] values, CharSequence line) throws IOException { if (values.length == 3 || values.length == 4) { String value = null; if (values.length == 4) { value = values[VALUE_OFFSET].getCoveredText(line).toString(); } return new AttributeAnnotation(values[ID_OFFSET].getCoveredText(line).toString(), values[TYPE_OFFSET].getCoveredText(line).toString(), values[ATTACHED_TO_OFFSET].getCoveredText(line).toString(), value); } else { throw new InvalidFormatException("Line must have 3 or 4 fields"); } } }
/** * Auxiliary method to print span errors * * @param falsePositives * false positives span * @param falseNegatives * false negative span * @param doc * the document text */ private void printErrors(List<Span> falsePositives, List<Span> falseNegatives, String doc) { printStream.println("False positives: {"); for (Span span : falsePositives) { printStream.println(span.getCoveredText(doc)); } printStream.println("} False negatives: {"); for (Span span : falseNegatives) { printStream.println(span.getCoveredText(doc)); } printStream.println("}\n"); }
@Override BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException { Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(), tokens[tokens.length - 1].getEnd() ); return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(), tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(), noteSpan.getCoveredText(line).toString()); } }
@Override BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException { String[] typeParts = tokens[TYPE_OFFSET].getCoveredText(line).toString().split(":"); if (typeParts.length != 2) { throw new InvalidFormatException(String.format( "Failed to parse [%s], type part must be in the format type:trigger", line)); } String type = typeParts[0]; String eventTrigger = typeParts[1]; Map<String, String> arguments = new HashMap<>(); for (int i = TYPE_OFFSET + 1; i < tokens.length; i++) { String[] parts = tokens[i].getCoveredText(line).toString().split(":"); if (parts.length != 2) { throw new InvalidFormatException(String.format( "Failed to parse [%s], argument parts must be in form argument:value", line)); } arguments.put(parts[0], parts[1]); } return new EventAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(),type, eventTrigger, arguments); } }
/** * Detect sentences in a String. * * @param s The string to be processed. * * @return A string array containing individual sentences as elements. */ public String[] sentDetect(String s) { Span[] spans = sentPosDetect(s); String[] sentences; if (spans.length != 0) { sentences = new String[spans.length]; for (int si = 0; si < spans.length; si++) { sentences[si] = spans[si].getCoveredText(s).toString(); } } else { sentences = new String[] {}; } return sentences; }
@Override public String toString() { StringBuilder sentence = new StringBuilder(); int lastEndIndex = -1; for (Span token : tokenSpans) { if (lastEndIndex != -1) { // If there are no chars between last token // and this token insert the separator chars // otherwise insert a space String separator; if (lastEndIndex == token.getStart()) separator = separatorChars; else separator = " "; sentence.append(separator); } sentence.append(token.getCoveredText(text)); lastEndIndex = token.getEnd(); } return sentence.toString(); }
public String read() throws IOException { TokenSample tokenSample = samples.read(); if (tokenSample != null) { StringBuilder whitespaceSeparatedTokenString = new StringBuilder(); for (Span token : tokenSample.getTokenSpans()) { whitespaceSeparatedTokenString.append( token.getCoveredText(tokenSample.getText())); whitespaceSeparatedTokenString.append(' '); } // Shorten string by one to get rid of last space if (whitespaceSeparatedTokenString.length() > 0) { whitespaceSeparatedTokenString.setLength( whitespaceSeparatedTokenString.length() - 1 ); } return whitespaceSeparatedTokenString.toString(); } return null; } }
String type = values[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString(); int beginIndex = parseInt(values[BEGIN_OFFSET].getCoveredText(line).toString()); if (values[i].getCoveredText(line).toString().contains(";")) { String[] parts = values[i].getCoveredText(line).toString().split(";"); endOffset = parseInt(parts[0]); fragments.add(new Span(beginIndex, endOffset, type)); endOffset = parseInt(values[i].getCoveredText(line).toString()); firstTextTokenIndex = i + 1; fragments.add(new Span(beginIndex, endOffset, type)); String id = values[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString();
String whitespaceToken = whiteSpaceTokenSpan.getCoveredText(sampleString).toString();
@Override protected Iterator<Event> createEvents(SentenceSample sample) { Collection<Event> events = new ArrayList<>(); for (Span sentenceSpan : sample.getSentences()) { String sentenceString = sentenceSpan.getCoveredText(sample.getDocument()).toString(); for (Iterator<Integer> it = scanner.getPositions( sentenceString).iterator(); it.hasNext();) { int candidate = it.next(); String type = SentenceDetectorME.NO_SPLIT; if (!it.hasNext()) { type = SentenceDetectorME.SPLIT; } events.add(new Event(type, cg.getContext(sample.getDocument(), sentenceSpan.getStart() + candidate))); } } return events.iterator(); } }
@Override protected final Parse processSample(final Parse reference) { List<String> tokens = new ArrayList<>(); for (Parse token : reference.getTokenNodes()) { tokens.add(token.getSpan().getCoveredText(reference.getText()).toString()); } Parse[] predictions = ParserTool.parseLine(String.join(" ", tokens), parser, 1); Parse prediction = null; if (predictions.length > 0) { prediction = predictions[0]; fmeasure.updateScores(getConstituencySpans(reference), getConstituencySpans(prediction)); } return prediction; }
String tokenString = tokens[i].getCoveredText(sentence).toString(); String escapedToken = escape(tokenString); tokenList[i] = escapedToken;
@Test public void testTrim() { String string1 = " 12 34 "; Span span1 = new Span(0, string1.length()); Assert.assertEquals("12 34", span1.trim(string1).getCoveredText(string1)); }
@Test public void testTrimWhitespaceSpan() { String string1 = " "; Span span1 = new Span(0, string1.length()); Assert.assertEquals("", span1.trim(string1).getCoveredText(string1)); }
/** * Tests if the {@link TokenSample} correctly tokenizes tokens which * are separated by a whitespace. * */ @Test public void testParsingWhitespaceSeparatedTokens() throws IOException { String sampleTokens = "Slave to the wage"; ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream( ObjectStreamUtils.createObjectStream(sampleTokens)); TokenSample tokenSample = sampleTokenStream.read(); Span[] tokenSpans = tokenSample.getTokenSpans(); Assert.assertEquals(4, tokenSpans.length); Assert.assertEquals("Slave", tokenSpans[0].getCoveredText(sampleTokens)); Assert.assertEquals("to", tokenSpans[1].getCoveredText(sampleTokens)); Assert.assertEquals("the", tokenSpans[2].getCoveredText(sampleTokens)); Assert.assertEquals("wage", tokenSpans[3].getCoveredText(sampleTokens)); }
/** * Tests if the {@link TokenSample} correctly tokenizes tokens which * are separated by whitespace and by the split chars. * */ @Test public void testParsingWhitespaceAndSeparatedString() throws IOException { String sampleTokens = "a b<SPLIT>c d<SPLIT>e"; try (ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream( ObjectStreamUtils.createObjectStream(sampleTokens))) { TokenSample tokenSample = sampleTokenStream.read(); Span[] tokenSpans = tokenSample.getTokenSpans(); Assert.assertEquals(5, tokenSpans.length); Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText())); Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText())); Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText())); Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText())); Assert.assertEquals("e", tokenSpans[4].getCoveredText(tokenSample.getText())); } } }
/** * Tests if the {@link TokenSample} correctly tokenizes tokens which * are separated by the split chars. * */ @Test public void testParsingSeparatedString() throws IOException { String sampleTokens = "a<SPLIT>b<SPLIT>c<SPLIT>d"; ObjectStream<TokenSample> sampleTokenStream = new TokenSampleStream( ObjectStreamUtils.createObjectStream(sampleTokens)); TokenSample tokenSample = sampleTokenStream.read(); Span[] tokenSpans = tokenSample.getTokenSpans(); Assert.assertEquals(4, tokenSpans.length); Assert.assertEquals("a", tokenSpans[0].getCoveredText(tokenSample.getText())); Assert.assertEquals(new Span(0,1), tokenSpans[0]); Assert.assertEquals("b", tokenSpans[1].getCoveredText(tokenSample.getText())); Assert.assertEquals(new Span(1,2), tokenSpans[1]); Assert.assertEquals("c", tokenSpans[2].getCoveredText(tokenSample.getText())); Assert.assertEquals(new Span(2,3), tokenSpans[2]); Assert.assertEquals("d", tokenSpans[3].getCoveredText(tokenSample.getText())); Assert.assertEquals(new Span(3,4), tokenSpans[3]); }