edu.stanford.nlp.pipeline.CoreDocument java code examples

CoreDocument document = new CoreDocument(text);
CoreLabel token = document.tokens().get(10);
System.out.println("Example: token");
System.out.println(token);
String sentenceText = document.sentences().get(0).text();
System.out.println("Example: sentence");
System.out.println(sentenceText);
CoreSentence sentence = document.sentences().get(1);
  document.sentences().get(4).relations();
System.out.println("Example: relation");
System.out.println(relations.get(0));
CoreEntityMention originalEntityMention = document.sentences().get(3).entityMentions().get(1);
System.out.println("Example: original entity mention");
System.out.println(originalEntityMention);
Map<Integer, CorefChain> corefChains = document.corefChains();
System.out.println("Example: coref chains for document");
System.out.println(corefChains);
List<CoreQuote> quotes = document.quotes();
CoreQuote quote = quotes.get(0);
System.out.println("Example: quote");

@Override
public String toString() {
 return annotation().toString();
}

/** complete the wrapping process post annotation by a pipeline **/
public void wrapAnnotations() {
 // wrap all of the sentences
 if (this.annotationDocument.get(CoreAnnotations.SentencesAnnotation.class) != null) {
  wrapSentences();
  // if there are entity mentions, build a document wide list
  if ( ! sentences.isEmpty() && sentences.get(0).entityMentions() != null) {
   buildDocumentEntityMentionsList();
  }
  // if there are quotes, build a document wide list
  if (QuoteAnnotator.gatherQuotes(this.annotationDocument) != null)
   buildDocumentQuotesList();
 }
}

int lastSentenceIndex = this.quoteCoreMap.get(CoreAnnotations.SentenceEndAnnotation.class);
for (int currSentIndex = firstSentenceIndex ; currSentIndex <= lastSentenceIndex ; currSentIndex++) {
 this.sentences.add(this.document.sentences().get(currSentIndex));
 for (int speakerTokenIndex = firstSpeakerTokenIndex ;
    speakerTokenIndex <= lastSpeakerTokenIndex ; speakerTokenIndex++) {
  this.speakerTokens.get().add(this.document.tokens().get(speakerTokenIndex));
     speakerTokens.get().size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
 this.speakerCharOffsets = Optional.of(new Pair<>(speakerCharOffsetBegin, speakerCharOffsetEnd));
 for (CoreEntityMention candidateEntityMention : this.document.entityMentions()) {
  Pair<Integer,Integer> entityMentionOffsets = candidateEntityMention.charOffsets();
  if (entityMentionOffsets.equals(this.speakerCharOffsets.get())) {
 for (int canonicalSpeakerTokenIndex = firstCanonicalSpeakerTokenIndex ;
    canonicalSpeakerTokenIndex <= lastCanonicalSpeakerTokenIndex ; canonicalSpeakerTokenIndex++) {
  this.canonicalSpeakerTokens.get().add(this.document.tokens().get(canonicalSpeakerTokenIndex));
     canonicalSpeakerTokens.get().size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
 this.canonicalSpeakerCharOffsets = Optional.of(new Pair<>(canonicalSpeakerCharOffsetBegin, canonicalSpeakerCharOffsetEnd));
 for (CoreEntityMention candidateEntityMention : this.document.entityMentions()) {
  Pair<Integer,Integer> entityMentionOffsets = candidateEntityMention.charOffsets();
  if (entityMentionOffsets.equals(this.canonicalSpeakerCharOffsets.get())) {

public CoreDocument(Annotation annotation) {
 this.annotationDocument = annotation;
 wrapAnnotations();
}

/** return the canonical entity mention for this entity mention **/
public Optional<CoreEntityMention> canonicalEntityMention() {
 CoreDocument myDocument = sentence.document();
 Optional<Integer> canonicalEntityMentionIndex =
   Optional.ofNullable(coreMap().get(CoreAnnotations.CanonicalEntityMentionIndexAnnotation.class));
 return canonicalEntityMentionIndex.isPresent() ?
   Optional.of(sentence.document().entityMentions().get(canonicalEntityMentionIndex.get())) :
   Optional.empty();
}

/**
 * Read in a CoreDocument from this input stream.
 *
 * @param is The input stream to read a CoreDocument's annotation from
 * @return A pair with the CoreDocument and the input stream
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws ClassCastException
 */
public Pair<CoreDocument, InputStream> readCoreDocument(InputStream is)
  throws IOException, ClassNotFoundException, ClassCastException {
 Pair<Annotation, InputStream> readPair = read(is);
 CoreDocument readCoreDocument = new CoreDocument(readPair.first());
 return new Pair<CoreDocument, InputStream>(readCoreDocument, is);
}

int lastSentenceIndex = this.quoteCoreMap.get(CoreAnnotations.SentenceEndAnnotation.class);
for (int currSentIndex = firstSentenceIndex ; currSentIndex <= lastSentenceIndex ; currSentIndex++) {
 this.sentences.add(this.document.sentences().get(currSentIndex));
 for (int speakerTokenIndex = firstSpeakerTokenIndex ;
    speakerTokenIndex <= lastSpeakerTokenIndex ; speakerTokenIndex++) {
  this.speakerTokens.get().add(this.document.tokens().get(speakerTokenIndex));
     speakerTokens.get().size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
 this.speakerCharOffsets = Optional.of(new Pair<>(speakerCharOffsetBegin, speakerCharOffsetEnd));
 for (CoreEntityMention candidateEntityMention : this.document.entityMentions()) {
  Pair<Integer,Integer> entityMentionOffsets = candidateEntityMention.charOffsets();
  if (entityMentionOffsets.equals(this.speakerCharOffsets.get())) {
 for (int canonicalSpeakerTokenIndex = firstCanonicalSpeakerTokenIndex ;
    canonicalSpeakerTokenIndex <= lastCanonicalSpeakerTokenIndex ; canonicalSpeakerTokenIndex++) {
  this.canonicalSpeakerTokens.get().add(this.document.tokens().get(canonicalSpeakerTokenIndex));
     canonicalSpeakerTokens.get().size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
 this.canonicalSpeakerCharOffsets = Optional.of(new Pair<>(canonicalSpeakerCharOffsetBegin, canonicalSpeakerCharOffsetEnd));
 for (CoreEntityMention candidateEntityMention : this.document.entityMentions()) {
  Pair<Integer,Integer> entityMentionOffsets = candidateEntityMention.charOffsets();
  if (entityMentionOffsets.equals(this.canonicalSpeakerCharOffsets.get())) {

/** Annotate the CoreDocument wrapper. **/
public void annotate(CoreDocument document) {
 // annotate the underlying Annotation
 this.annotate(document.annotationDocument);
 // wrap the sentences and entity mentions post annotation
 document.wrapAnnotations();
}

/** return the canonical entity mention for this entity mention **/
public Optional<CoreEntityMention> canonicalEntityMention() {
 CoreDocument myDocument = sentence.document();
 Optional<Integer> canonicalEntityMentionIndex =
   Optional.ofNullable(coreMap().get(CoreAnnotations.CanonicalEntityMentionIndexAnnotation.class));
 return canonicalEntityMentionIndex.isPresent() ?
   Optional.of(sentence.document().entityMentions().get(canonicalEntityMentionIndex.get())) :
   Optional.empty();
}

/**
 * Read in a CoreDocument from this input stream.
 *
 * @param is The input stream to read a CoreDocument's annotation from
 * @return A pair with the CoreDocument and the input stream
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws ClassCastException
 */
public Pair<CoreDocument, InputStream> readCoreDocument(InputStream is)
  throws IOException, ClassNotFoundException, ClassCastException {
 Pair<Annotation, InputStream> readPair = read(is);
 CoreDocument readCoreDocument = new CoreDocument(readPair.first());
 return new Pair<CoreDocument, InputStream>(readCoreDocument, is);
}

/** complete the wrapping process post annotation by a pipeline **/
public void wrapAnnotations() {
 // wrap all of the sentences
 if (this.annotationDocument.get(CoreAnnotations.SentencesAnnotation.class) != null) {
  wrapSentences();
  // if there are entity mentions, build a document wide list
  if ( ! sentences.isEmpty() && sentences.get(0).entityMentions() != null) {
   buildDocumentEntityMentionsList();
  }
  // if there are quotes, build a document wide list
  if (QuoteAnnotator.gatherQuotes(this.annotationDocument) != null)
   buildDocumentQuotesList();
 }
}

/**
 * Append a CoreDocument to this output stream.
 *
 * @param document The CoreDocument to serialize (its internal annotation is serialized)
 * @param os The output stream to serialize to
 * @return The output stream which should be closed
 * @throws IOException
 */
public OutputStream writeCoreDocument(CoreDocument document, OutputStream os) throws IOException {
 Annotation wrappedAnnotation = document.annotation();
 return write(wrappedAnnotation, os);
}

public CoreDocument(Annotation annotation) {
 this.annotationDocument = annotation;
 wrapAnnotations();
}

@Override
public String toString() {
 return annotation().toString();
}

/** Annotate the CoreDocument wrapper. **/
public void annotate(CoreDocument document) {
 // annotate the underlying Annotation
 this.annotate(document.annotationDocument);
 // wrap the sentences and entity mentions post annotation
 document.wrapAnnotations();
}

/**
 * Append a CoreDocument to this output stream.
 *
 * @param document The CoreDocument to serialize (its internal annotation is serialized)
 * @param os The output stream to serialize to
 * @return The output stream which should be closed
 * @throws IOException
 */
public OutputStream writeCoreDocument(CoreDocument document, OutputStream os) throws IOException {
 Annotation wrappedAnnotation = document.annotation();
 return write(wrappedAnnotation, os);
}

Javadoc

Wrapper around an annotation representing a document. Adds some helpful methods.

Most used methods

<init>
annotation
provide access to the underlying annotation if needed
buildDocumentEntityMentionsList
build a list of all entity mentions in the document from the sentences
buildDocumentQuotesList
corefChains
coref info
entityMentions
the list of entity mentions in this document
quotes
quotes
sentences
the list of sentences in this document
tokens
return the full token list for this doc
wrapAnnotations
complete the wrapping process post annotation by a pipeline
wrapSentences
create list of CoreSentence's based on the Annotation's sentences

wrapSentences

Popular in Java

Making http post requests using okhttp
getSharedPreferences (Context)
notifyDataSetChanged (ArrayAdapter)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
KeyStore (java.security)
KeyStore is responsible for maintaining cryptographic keys and their owners. The type of the syste
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Component (java.awt)
A component is an object having a graphical representation that can be displayed on the screen and t
Top plugins for WebStorm

How to useCoreDocument in edu.stanford.nlp.pipeline

Best Java code snippets using edu.stanford.nlp.pipeline.CoreDocument (Showing top 17 results out of 315)

How to use
CoreDocument
in
edu.stanford.nlp.pipeline