Refine search
public boolean updatePredictions(CoreMap quote, Pair<String, String> speakerAndMethod) { if(speakerAndMethod.first != null && speakerAndMethod.second != null) { quote.set(QuoteAttributionAnnotator.SpeakerAnnotation.class, characterMap.get(speakerAndMethod.first).get(0).name); quote.set(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class, "Baseline Top" + speakerAndMethod.second); return true; } return false; }
private CoreMap makeTimexMap(TimexNode node, List<CoreLabel> tokens, CoreMap sentence) { CoreMap timexMap = new ArrayCoreMap(); timexMap.set(TimeAnnotations.TimexAnnotation.class, node.timex); timexMap.set(CoreAnnotations.TextAnnotation.class, node.contents); timexMap.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, beginOffset(tokens.get(0))); timexMap.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endOffset(tokens.get(tokens.size()-1))); timexMap.set(CoreAnnotations.TokenBeginAnnotation.class, tokens.get(0).index()); timexMap.set(CoreAnnotations.TokenEndAnnotation.class, tokens.get(tokens.size()-1).index()); timexMap.set(CoreAnnotations.TokensAnnotation.class, tokens); if (sentence.get(TimeAnnotations.TimexAnnotations.class) == null) { sentence.set(TimeAnnotations.TimexAnnotations.class, new ArrayList<>()); } sentence.get(TimeAnnotations.TimexAnnotations.class).add(timexMap); // update NER for tokens for (CoreLabel token : tokens) { token.set(CoreAnnotations.NamedEntityTagAnnotation.class, "DATE"); token.set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, node.timex.value()); token.set(TimeAnnotations.TimexAnnotation.class, node.timex); } return timexMap; }
for (int i = 0; i < words.size(); ++i) { CoreLabel token = new CoreLabel(12); token.setWord(words.get(i)); token.setValue(words.get(i)); token.setBeginPosition(beginChar); token.setEndPosition(beginChar + words.get(i).length()); beginChar += words.get(i).length() + 1; token.setLemma(lemmas.get(i)); token.set(CoreAnnotations.TokenBeginAnnotation.class, i); token.set(CoreAnnotations.TokenEndAnnotation.class, i + 1); tokens.add(token); sentence.set(CoreAnnotations.TokensAnnotation.class, tokens); SemanticGraph graph = tree.apply(tokens); sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph); sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph); sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph); SemanticGraph maltGraph = maltTree.apply(tokens); sentence.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, maltGraph); sentence.set(CoreAnnotations.DocIDAnnotation.class, docid.orElse("???")); sentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1)); sentence.set(CoreAnnotations.TextAnnotation.class, gloss); sentence.set(CoreAnnotations.TokenBeginAnnotation.class, 0); sentence.set(CoreAnnotations.TokenEndAnnotation.class, tokens.size()); Annotation doc = new Annotation(gloss); doc.set(CoreAnnotations.TokensAnnotation.class, tokens);
public void annotateSentence(CoreMap sentence) { // this stores all relation mentions generated by this extractor List<RelationMention> relations = new ArrayList<>(); // extractAllRelations creates new objects for every predicted relation for (RelationMention rel : extractAllRelations(sentence)) { // add all relations. potentially useful for a joint model // if (! RelationMention.isUnrelatedLabel(rel.getType())) relations.add(rel); } // caution: this removes the old list of relation mentions! for (RelationMention r: relations) { if (! r.getType().equals(RelationMention.UNRELATED)) { logger.fine("Found positive relation in annotateSentence: " + r); } } sentence.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, relations); }
public void doMentionToSpeaker(Annotation doc) { for(CoreMap quote : doc.get(CoreAnnotations.QuotationsAnnotation.class)) { if(quote.get(QuoteAttributionAnnotator.SpeakerAnnotation.class) == null) { quote.set(QuoteAttributionAnnotator.SpeakerAnnotation.class, characterMap.get(Counters.toSortedList(topSpeakerList).get(0)).get(0).name); quote.set(QuoteAttributionAnnotator.SpeakerSieveAnnotation.class, "majority speaker baseline"); } } } }
@Override protected Collection<List<IN>> loadAuxiliaryData(Collection<List<IN>> docs, DocumentReaderAndWriter<IN> readerAndWriter) { if (flags.unsupDropoutFile != null) { log.info("Reading unsupervised dropout data from file: " + flags.unsupDropoutFile); Timing timer = new Timing(); timer.start(); unsupDocs = new ArrayList<>(); ObjectBank<List<IN>> unsupObjBank = makeObjectBankFromFile(flags.unsupDropoutFile, readerAndWriter); for (List<IN> doc : unsupObjBank) { for (IN tok: doc) { tok.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol); tok.set(CoreAnnotations.GoldAnswerAnnotation.class, flags.backgroundSymbol); } unsupDocs.add(doc); } long elapsedMs = timer.stop(); log.info("Time to read: : " + Timing.toSecondsString(elapsedMs) + " seconds"); } if (unsupDocs != null && flags.doFeatureDiscovery) { List<List<IN>> totalDocs = new ArrayList<>(); totalDocs.addAll(docs); totalDocs.addAll(unsupDocs); return totalDocs; } else return docs; }
public static void addEntityMentions(CoreMap sentence, Collection<EntityMention> args) { List<EntityMention> l = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class); if(l == null){ l = new ArrayList<>(); sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, l); } l.addAll(args); }
/** * Concatenates separate words of a date or other numeric quantity into one node (e.g., 3 November -> 3_November) * Tag is CD or NNP, and other words are added to the remove list */ private static <E extends CoreMap> void concatenateNumericString(List<E> words, List<E> toRemove) { if (words.size() <= 1) return; boolean first = true; StringBuilder newText = new StringBuilder(); E foundEntity = null; for (E word : words) { if (foundEntity == null && (word.get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("CD") || word.get(CoreAnnotations.PartOfSpeechAnnotation.class).equals("NNP"))) { foundEntity = word; } if (first) { first = false; } else { newText.append('_'); } newText.append(word.get(CoreAnnotations.TextAnnotation.class)); } if (foundEntity == null) { foundEntity = words.get(0);//if we didn't find one with the appropriate tag, just take the first one } toRemove.addAll(words); toRemove.remove(foundEntity); foundEntity.set(CoreAnnotations.PartOfSpeechAnnotation.class, "CD"); // cdm 2008: is this actually good for dates?? String collapsed = newText.toString(); foundEntity.set(CoreAnnotations.TextAnnotation.class, collapsed); foundEntity.set(CoreAnnotations.OriginalTextAnnotation.class, collapsed); }
public static void addRelationMention(CoreMap sentence, RelationMention arg) { List<RelationMention> l = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class); if(l == null){ l = new ArrayList<>(); sentence.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, l); } l.add(arg); }
int tokenBegin = -1; for (int i = 0; i < tokens.size(); i++) { CoreLabel token = tokens.get(i); String label = (String) token.get(labelKey); LabelTagType curTagType = getTagType(label); CoreLabel prev = null; if (i > 0) { prev = tokens.get(i-1); CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokenEnd, totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey); chunk.set(labelKey, prevTagType.type); chunks.add(chunk); tokenBegin = -1; CoreMap chunk = ChunkAnnotationUtils.getAnnotatedChunk(tokens, tokenBegin, tokens.size(), totalTokensOffset, tokenChunkKey, textKey, tokenLabelKey); chunk.set(labelKey, prevTagType.type); chunks.add(chunk);
@Override public List<IN> drawSample() { int[] sampleArray = sampler.bestSequence(model); List<IN> sample = new ArrayList<>(); int i = 0; for (IN word : input) { IN newWord = tokenFactory.makeToken(word); newWord.set(CoreAnnotations.AnswerAnnotation.class, classIndex.get(sampleArray[i++])); sample.add(newWord); } return sample; } };
List<CoreLabel> docTokens = docAnnotation.get(CoreAnnotations.TokensAnnotation.class); List<CoreMap> sentences = docAnnotation.get(CoreAnnotations.SentencesAnnotation.class); int i = 0; CoreLabel curDocToken = docTokens.get(0); for (CoreMap sentence:sentences) { List<CoreLabel> sentTokens = sentence.get(CoreAnnotations.TokensAnnotation.class); CoreLabel sentTokenFirst = sentTokens.get(0); while (curDocToken != sentTokenFirst) { i++; if (i >= docTokens.size()) { return false; } curDocToken = docTokens.get(i); sentence.set(CoreAnnotations.TokenBeginAnnotation.class, sentTokenBegin); sentence.set(CoreAnnotations.TokenEndAnnotation.class, sentTokenEnd);
/** * Annotates a CoreMap representing a chunk with basic chunk information. * CharacterOffsetBeginAnnotation - set to CharacterOffsetBeginAnnotation of first token in chunk * CharacterOffsetEndAnnotation - set to CharacterOffsetEndAnnotation of last token in chunk * TokensAnnotation - List of tokens in this chunk * TokenBeginAnnotation - Index of first token in chunk (index in original list of tokens) * tokenStartIndex + totalTokenOffset * TokenEndAnnotation - Index of last token in chunk (index in original list of tokens) * tokenEndIndex + totalTokenOffset * @param chunk - CoreMap to be annotated * @param tokens - List of tokens to look for chunks * @param tokenStartIndex - Index (relative to current list of tokens) at which this chunk starts * @param tokenEndIndex - Index (relative to current list of tokens) at which this chunk ends (not inclusive) * @param totalTokenOffset - Index of tokens to offset by */ public static void annotateChunk(CoreMap chunk, List<CoreLabel> tokens, int tokenStartIndex, int tokenEndIndex, int totalTokenOffset) { List<CoreLabel> chunkTokens = new ArrayList<>(tokens.subList(tokenStartIndex, tokenEndIndex)); chunk.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, chunkTokens.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); chunk.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, chunkTokens.get(chunkTokens.size()-1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); chunk.set(CoreAnnotations.TokensAnnotation.class, chunkTokens); chunk.set(CoreAnnotations.TokenBeginAnnotation.class, tokenStartIndex+totalTokenOffset); chunk.set(CoreAnnotations.TokenEndAnnotation.class, tokenEndIndex+totalTokenOffset); }
public static void addRelationMentions(CoreMap sentence, Collection<RelationMention> args) { List<RelationMention> l = sentence.get(MachineReadingAnnotations.RelationMentionsAnnotation.class); if(l == null){ l = new ArrayList<>(); sentence.set(MachineReadingAnnotations.RelationMentionsAnnotation.class, l); } l.addAll(args); }
CoreMap firstChunk = chunkList.get(chunkIndexStart); CoreMap lastChunk = chunkList.get(chunkIndexEnd-1); int firstCharOffset = firstChunk.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); int lastCharOffset = lastChunk.get(CoreAnnotations.CharacterOffsetEndAnnotation.class); int firstTokenIndex = firstChunk.get(CoreAnnotations.TokenBeginAnnotation.class); int lastTokenIndex = lastChunk.get(CoreAnnotations.TokenEndAnnotation.class); newChunk.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, firstCharOffset); newChunk.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, lastCharOffset); newChunk.set(CoreAnnotations.TokenBeginAnnotation.class, firstTokenIndex); newChunk.set(CoreAnnotations.TokenEndAnnotation.class, lastTokenIndex); List<CoreLabel> tokens = new ArrayList<>(lastTokenIndex - firstTokenIndex); for (int i = chunkIndexStart; i < chunkIndexEnd; i++) { CoreMap chunk = chunkList.get(i); tokens.addAll(chunk.get(CoreAnnotations.TokensAnnotation.class)); newChunk.set(CoreAnnotations.TokensAnnotation.class, tokens);
public static void addSentences(CoreMap dataset, List<CoreMap> sentences) { List<CoreMap> sents = dataset.get(CoreAnnotations.SentencesAnnotation.class); if(sents == null){ sents = new ArrayList<>(); dataset.set(CoreAnnotations.SentencesAnnotation.class, sents); } for(CoreMap sentence: sentences){ sents.add(sentence); } }
List<Tree> gsLeaves = gs.root().getLeaves(); for (int i = 0; i < gsLeaves.size(); i++) { TreeGraphNode leaf = (TreeGraphNode) gsLeaves.get(i); indexToPos.put(leaf.label().index(), i + 1); token.setWord(leaf.value()); token.setTag(leaf.parent(tree).value()); token.set(CoreAnnotations.CoarseTagAnnotation.class, uposLabels.get(index - 1).value()); tokens.add(token); sentence.set(CoreAnnotations.TokensAnnotation.class, tokens); bf.append(dependenciesToCoNLLXString(deps, sentence)); } else { for (TypedDependency dep : deps) { if (dep.extra()) { extraDeps.add(dep); } else { bf.append(toStringIndex(dep, indexToPos));