protected SpanLabelView makeSpanLabeledView(List<String> chunkLabels, List<Integer> chunkStart, List<Integer> chunkEnd, TextAnnotation ta, String viewName) { SpanLabelView view = new SpanLabelView(viewName, "GoldStandard", ta, 1.0); for (int i = 0; i < chunkLabels.size(); i++) { view.addSpanLabel(chunkStart.get(i), chunkEnd.get(i), chunkLabels.get(i), 1.0); } return view; }
protected SpanLabelView makeSpanLabeledView(List<String> chunkLabels, List<Integer> chunkStart, List<Integer> chunkEnd, TextAnnotation ta, String viewName) { SpanLabelView view = new SpanLabelView(viewName, "GoldStandard", ta, 1.0); for (int i = 0; i < chunkLabels.size(); i++) { view.addSpanLabel(chunkStart.get(i), chunkEnd.get(i), chunkLabels.get(i), 1.0); } return view; }
protected void addGoldBIOView(TextAnnotation ta, List<String> labels) { SpanLabelView lightVerbView = new SpanLabelView(viewName, ta); int startSpan = -1; String prevLabel = null; for (int i = 0; i < labels.size(); i++) { String label = labels.get(i); if (label.startsWith("B")) { startSpan = i; prevLabel = label; } if (label.startsWith("O") && startSpan != -1) { lightVerbView.addSpanLabel(startSpan, i, prevLabel.substring(2), 1.0); startSpan = -1; } } ta.addView(viewName, lightVerbView); }
protected void addGoldBIOView(TextAnnotation ta, List<String> labels) { SpanLabelView lightVerbView = new SpanLabelView(viewName, ta); int startSpan = -1; String prevLabel = null; for (int i = 0; i < labels.size(); i++) { String label = labels.get(i); if (label.startsWith("B")) { startSpan = i; prevLabel = label; } if (label.startsWith("O") && startSpan != -1) { lightVerbView.addSpanLabel(startSpan, i, prevLabel.substring(2), 1.0); startSpan = -1; } } ta.addView(viewName, lightVerbView); }
protected void addGoldBIOView(TextAnnotation ta, List<String> labels) { SpanLabelView lightVerbView = new SpanLabelView(viewName, ta); int startSpan = -1; String prevLabel = null; for (int i = 0; i < labels.size(); i++) { String label = labels.get(i); if (label.startsWith("B")) { startSpan = i; prevLabel = label; } if (label.startsWith("O") && startSpan != -1) { lightVerbView.addSpanLabel(startSpan, i, prevLabel.substring(2), 1.0); startSpan = -1; } } ta.addView(viewName, lightVerbView); }
@Override protected void addView(TextAnnotation ta) throws AnnotatorException { SpanLabelView datalessView = new SpanLabelView(getViewName(), getClassName(), ta, 1d, true); List<Constituent> tokens = ta.getView(ViewNames.TOKENS).getConstituents(); int numTokens = tokens.size(); int textStart = tokens.get(0).getSpan().getFirst(); int textEnd = tokens.get(numTokens - 1).getSpan().getSecond(); StringBuilder sb = new StringBuilder(); for (String s : ta.getTokensInSpan(textStart, textEnd)) { sb.append(s); sb.append(" "); } SparseVector<Integer> docVector = embedding.getVector(sb.toString().trim()); Set<String> labelIDs = classifier.getFlatPredictions(docVector, topK); for (String labelID : labelIDs) { datalessView.addSpanLabel(textStart, textEnd, labelID, 1d); } ta.addView(getViewName(), datalessView); } }
@Override protected void addView(TextAnnotation ta) throws AnnotatorException { SpanLabelView datalessView = new SpanLabelView(getViewName(), getClassName(), ta, 1d, true); List<Constituent> tokens = ta.getView(ViewNames.TOKENS).getConstituents(); int numTokens = tokens.size(); int textStart = tokens.get(0).getSpan().getFirst(); int textEnd = tokens.get(numTokens - 1).getSpan().getSecond(); StringBuilder sb = new StringBuilder(); for (String s : ta.getTokensInSpan(textStart, textEnd)) { sb.append(s); sb.append(" "); } SparseVector<Integer> docVector = embedding.getVector(sb.toString().trim()); Set<String> labelIDs = classifier.getFlatPredictions(docVector, topK); for (String labelID : labelIDs) { datalessView.addSpanLabel(textStart, textEnd, labelID, 1d); } ta.addView(getViewName(), datalessView); } }
@Override public void addView(TextAnnotation ta) throws AnnotatorException { assert (ta.hasView(ViewNames.SENTENCE)); SpanLabelView quantifierView = new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d); List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta); for (QuantSpan span : quantSpans) { int startToken = ta.getTokenIdFromCharacterOffset(span.start); int endToken = ta.getTokenIdFromCharacterOffset(span.end); quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d); } ta.addView(ViewNames.QUANTITIES, quantifierView); }
@Override public void addView(TextAnnotation ta) throws AnnotatorException { assert (ta.hasView(ViewNames.SENTENCE)); SpanLabelView quantifierView = new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d); List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta); for (QuantSpan span : quantSpans) { int startToken = ta.getTokenIdFromCharacterOffset(span.start); int endToken = ta.getTokenIdFromCharacterOffset(span.end); quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d); } ta.addView(ViewNames.QUANTITIES, quantifierView); }
@Override public void addView(TextAnnotation ta) { lazyLoadClusters(); SpanLabelView view = new SpanLabelView(getViewName(), "BrownClusters", ta, 1.0, true); Map<String, List<IntPair>> m = getMatchingSpans(ta); for (Entry<String, List<IntPair>> entry : m.entrySet()) { String label = entry.getKey(); Set<IntPair> added = new LinkedHashSet<>(); for (IntPair p : entry.getValue()) { // don't add nested constituents of the same type boolean foundContainer = false; for (IntPair p1 : added) { if (p1 == p) continue; if (p1.getFirst() <= p.getFirst() && p1.getSecond() >= p.getSecond()) { foundContainer = true; break; } } if (!foundContainer) { view.addSpanLabel(p.getFirst(), p.getSecond(), label, 1.0); added.add(p); } } } ta.addView(getViewName(), view); }
@Override public void addView(TextAnnotation ta) { lazyLoadClusters(); SpanLabelView view = new SpanLabelView(getViewName(), "BrownClusters", ta, 1.0, true); Map<String, List<IntPair>> m = getMatchingSpans(ta); for (Entry<String, List<IntPair>> entry : m.entrySet()) { String label = entry.getKey(); Set<IntPair> added = new LinkedHashSet<>(); for (IntPair p : entry.getValue()) { // don't add nested constituents of the same type boolean foundContainer = false; for (IntPair p1 : added) { if (p1 == p) continue; if (p1.getFirst() <= p.getFirst() && p1.getSecond() >= p.getSecond()) { foundContainer = true; break; } } if (!foundContainer) { view.addSpanLabel(p.getFirst(), p.getSecond(), label, 1.0); added.add(p); } } } ta.addView(getViewName(), view); }
/** * Helper for create a SpanLabelView from a stream of span labels */ private static int createSpanLabelView( Stream<Pair<IntPair, String>> spans, TextAnnotation ta, String viewName, boolean allowOverlapping) { SpanLabelView view = new SpanLabelView(viewName, "GoldStandard", ta, 1.0, allowOverlapping); spans.forEach(span -> view.addSpanLabel( span.getFirst().getFirst(), span.getFirst().getSecond(), span.getSecond(), 1.0)); ta.addView(viewName, view); return view.count(); }
/** * Helper for create a SpanLabelView from a stream of span labels */ private static int createSpanLabelView( Stream<Pair<IntPair, String>> spans, TextAnnotation ta, String viewName, boolean allowOverlapping) { SpanLabelView view = new SpanLabelView(viewName, "GoldStandard", ta, 1.0, allowOverlapping); spans.forEach(span -> view.addSpanLabel( span.getFirst().getFirst(), span.getFirst().getSecond(), span.getSecond(), 1.0)); ta.addView(viewName, view); return view.count(); }
protected TextAnnotation initializeDummySentenceVerb() { List<String[]> listOfTokens = new ArrayList<>(); listOfTokens.add(new String[] {"I", "do", "."}); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens("", "", listOfTokens); TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0); tlv.addTokenLabel(0, "PRP", 1d); tlv.addTokenLabel(1, "VBP", 1d); tlv.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.POS, tlv); ta.addView(ViewNames.NER, new SpanLabelView(ViewNames.NER, "test", ta, 1d)); SpanLabelView chunks = new SpanLabelView(ViewNames.SHALLOW_PARSE, "test", ta, 1d); chunks.addSpanLabel(0, 1, "NP", 1d); chunks.addSpanLabel(1, 2, "VP", 1d); ta.addView(ViewNames.SHALLOW_PARSE, chunks); TokenLabelView view = new TokenLabelView(ViewNames.LEMMA, "test", ta, 1d); view.addTokenLabel(0, "i", 1d); view.addTokenLabel(1, "do", 1d); view.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.LEMMA, view); return ta; }
protected TextAnnotation initializeDummySentenceVerb() { List<String[]> listOfTokens = new ArrayList<>(); listOfTokens.add(new String[] {"I", "do", "."}); TextAnnotation ta = BasicTextAnnotationBuilder.createTextAnnotationFromTokens("", "", listOfTokens); TokenLabelView tlv = new TokenLabelView(ViewNames.POS, "Test", ta, 1.0); tlv.addTokenLabel(0, "PRP", 1d); tlv.addTokenLabel(1, "VBP", 1d); tlv.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.POS, tlv); ta.addView(ViewNames.NER, new SpanLabelView(ViewNames.NER, "test", ta, 1d)); SpanLabelView chunks = new SpanLabelView(ViewNames.SHALLOW_PARSE, "test", ta, 1d); chunks.addSpanLabel(0, 1, "NP", 1d); chunks.addSpanLabel(1, 2, "VP", 1d); ta.addView(ViewNames.SHALLOW_PARSE, chunks); TokenLabelView view = new TokenLabelView(ViewNames.LEMMA, "test", ta, 1d); view.addTokenLabel(0, "i", 1d); view.addTokenLabel(1, "do", 1d); view.addTokenLabel(2, ".", 1d); ta.addView(ViewNames.LEMMA, view); return ta; }
public TextAnnotation(String corpusId, String id, String text, IntPair[] characterOffsets, String[] tokens, int[] sentenceEndPositions) { super(); // if the string is non-empty, the position of the last element should equal to the number of tokens if (tokens.length > 0 && sentenceEndPositions[sentenceEndPositions.length - 1] != tokens.length) throw new IllegalArgumentException("Invalid sentence boundary. " + "Last element should be the number of tokens"); this.corpusId = corpusId; this.id = id; this.text = text; this.symtab = new SymbolTable(); this.setTokens(tokens, characterOffsets); SpanLabelView view = new SpanLabelView(ViewNames.SENTENCE, "UserSpecified", this, 1d); int start = 0; for (int s : sentenceEndPositions) { view.addSpanLabel(start, s, ViewNames.SENTENCE, 1d); start = s; } this.addView(ViewNames.SENTENCE, view); // Add a TOKENS view in order to access tokens the same way as everything else in the // sentence TokenLabelView tokenLabelView = new TokenLabelView(ViewNames.TOKENS, "UserSpecified", this, 1d); for (int i = 0; i < tokens.length; i++) { tokenLabelView.addConstituent(new Constituent("", ViewNames.TOKENS, this, i, i + 1)); } this.addView(ViewNames.TOKENS, tokenLabelView); }
public TextAnnotation(String corpusId, String id, String text, IntPair[] characterOffsets, String[] tokens, int[] sentenceEndPositions) { super(); // if the string is non-empty, the position of the last element should equal to the number of tokens if (tokens.length > 0 && sentenceEndPositions[sentenceEndPositions.length - 1] != tokens.length) throw new IllegalArgumentException("Invalid sentence boundary. " + "Last element should be the number of tokens"); this.corpusId = corpusId; this.id = id; this.text = text; this.symtab = new SymbolTable(); this.setTokens(tokens, characterOffsets); SpanLabelView view = new SpanLabelView(ViewNames.SENTENCE, "UserSpecified", this, 1d); int start = 0; for (int s : sentenceEndPositions) { view.addSpanLabel(start, s, ViewNames.SENTENCE, 1d); start = s; } this.addView(ViewNames.SENTENCE, view); // Add a TOKENS view in order to access tokens the same way as everything else in the // sentence TokenLabelView tokenLabelView = new TokenLabelView(ViewNames.TOKENS, "UserSpecified", this, 1d); for (int i = 0; i < tokens.length; i++) { tokenLabelView.addConstituent(new Constituent("", ViewNames.TOKENS, this, i, i + 1)); } this.addView(ViewNames.TOKENS, tokenLabelView); }
@Override public void addView(TextAnnotation ta) { SpanLabelView view = new SpanLabelView(getViewName(), "From " + parseViewName, ta, 1.0, true); TreeView parse = (TreeView) ta.getView(parseViewName); Set<IntPair> set = new LinkedHashSet<>(); for (Constituent c : parse) { if (TreeView.isLeaf(c)) continue; if (ParseTreeProperties.isPreTerminal(c)) continue; String label = c.getLabel(); label = ParseUtils.stripFunctionTags(label); label = ParseUtils.stripIndexReferences(label); // This is the definition used in // Introduction to the CoNLL-2001 Shared Task: // Clause Identification if (label.startsWith("S") && !label.equals("S1")) { int start = c.getStartSpan(); int end = c.getEndSpan(); if (start >= 0 && end > start) { set.add(new IntPair(start, end)); } } } for (IntPair span : set) { view.addSpanLabel(span.getFirst(), span.getSecond(), "S", 1.0); } ta.addView(getViewName(), view); }
@Override public void addView(TextAnnotation ta) { SpanLabelView view = new SpanLabelView(getViewName(), "From " + parseViewName, ta, 1.0, true); TreeView parse = (TreeView) ta.getView(parseViewName); Set<IntPair> set = new LinkedHashSet<>(); for (Constituent c : parse) { if (TreeView.isLeaf(c)) continue; if (ParseTreeProperties.isPreTerminal(c)) continue; String label = c.getLabel(); label = ParseUtils.stripFunctionTags(label); label = ParseUtils.stripIndexReferences(label); // This is the definition used in // Introduction to the CoNLL-2001 Shared Task: // Clause Identification if (label.startsWith("S") && !label.equals("S1")) { int start = c.getStartSpan(); int end = c.getEndSpan(); if (start >= 0 && end > start) { set.add(new IntPair(start, end)); } } } for (IntPair span : set) { view.addSpanLabel(span.getFirst(), span.getSecond(), "S", 1.0); } ta.addView(getViewName(), view); }
view.addSpanLabel(start, s, ViewNames.SENTENCE, 1d); start = s;