ixa.kaflib.KAFDocument java code examples

public List<KAFDocument> splitInSentences()
{
List<KAFDocument> sentNafs = new ArrayList<KAFDocument>();
Integer numParagraphs = this.getNumParagraphs();
for (Integer paragraph = 1; paragraph <= numParagraphs; paragraph++) {
  List<Integer> sentences = this.getSentsByParagraph(paragraph);
  for (Integer sentence : sentences) {
  KAFDocument naf = new KAFDocument(this.getLang(), this.getVersion());
  naf.setRawText(this.getRawText());
  for (AnnotationType type : highLevelAnnotationTypes) {
    Layer layer = highLevelAnnotationType2Layer.get(type);
    if (isSentenceLevelAnnotationType(type)) {
    List<Annotation> annotations = new ArrayList<Annotation>();
    if (isMultiLayerAnnotationType(type)) {
      for (String groupId : annotationContainer.getGroupIDs(type)) {
      annotations.addAll(this.getBySent(type, groupId, sentence));
      }
    } else {
      annotations = this.getBySent(type, sentence);
    }
    for (Annotation ann : annotations) {
      naf.addExistingAnnotation(ann, layer, type);
    }
    }
  }
  sentNafs.add(naf);
  }
}
return sentNafs;
}

Map<String, List<LinguisticProcessor>> lps = doc.getLinguisticProcessors();
for (Map.Entry<String, List<LinguisticProcessor>> entry : lps.entrySet()) {
  String layer = entry.getKey();
  List<LinguisticProcessor> lpList = entry.getValue();
  for (LinguisticProcessor lp : lpList) {
    if (!this.linguisticProcessorExists(layer, lp.name, lp.version)) {
      this.addLinguisticProcessor(layer, lp.name, lp.timestamp, lp.version);
for (WF wf : doc.getWFs()) {
  WF wfCopy = new WF(wf, this.annotationContainer);
  this.insertWF(wfCopy);
  copiedWFs.put(wf.getId(), wfCopy);
for (Term term : doc.getTerms()) {
  Term termCopy = new Term(term, copiedWFs);
  this.insertTerm(termCopy);
  copiedTerms.put(term.getId(), termCopy);
for (Dep dep : doc.getDeps()) {
  Dep depCopy = new Dep(dep, copiedTerms);
  this.insertDep(depCopy);
for (Chunk chunk : doc.getChunks()) {
  Chunk chunkCopy = new Chunk(chunk, copiedTerms);
  this.insertChunk(chunkCopy);
for (Entity entity : doc.getEntities()) {
  Entity entityCopy = new Entity(entity, copiedTerms);

for (final Entity entity : document.getEntitiesByTerm(term)) {
  markables.put(document.getTermsHead(entity.getTerms()), entity.getTerms());
  for (final Timex3 timex : document.getTimeExsByWF(wf)) {
    final List<Term> span = document.getTermsByWFs(timex.getSpan().getTargets());
    markables.put(document.getTermsHead(span), span);
      break;
    final Dep dep = document.getDepToTerm(t);
    if (dep == null) {
      break;
final Term head = document.getTermsHead(terms);
    : "(COORD CONJ?)* NAME"
    : includeModifiers ? "((NAME|NMOD|AMOD|TMP) .*)?" : "NAME";
terms.addAll(document.getTermsByDepAncestors(Collections.singleton(head), regex));
return KAFDocument.newTermSpan(Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms),
    head);

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof KAFDocument)) return false;
KAFDocument naf = (KAFDocument) o;
/* Language and version */
if (!this.getLang().equals(naf.getLang()) || !this.getVersion().equals(naf.getVersion())) return false;
/* NAF header */
if (!this.headerEquals(naf)) return false;
/* Layers and annotations */
return Utils.areEquals(this.annotationContainer, naf.annotationContainer);
}

private static Terminal createTerminal(String token, Term term, KAFDocument kaf) {
Span<Term> span = kaf.newTermSpan();
span.addTarget(term);
return kaf.newTerminal(span);
}

private static Span<Term> getSpanFromEntity(LKAnnotationEntity entity, KAFDocument document) {
  Span<Term> returnSpan = KAFDocument.newTermSpan();
  if (entity.referred != null) {
    for (LKAnnotationEntity referredEntity : entity.referred) {
      Integer termID = Integer.parseInt(referredEntity.localURI);
      Term term = document.getTerms().get(termID - 1);
      returnSpan.addTarget(term);
    }
  }
  return returnSpan;
}

String lang = getAttribute("lang", rootElem, Namespace.XML_NAMESPACE);
String kafVersion = getAttribute("version", rootElem);
KAFDocument kaf = new KAFDocument(lang, kafVersion);
  for (Element lpElem : lpElems) {
    String name = getAttribute("name", lpElem);
    KAFDocument.LinguisticProcessor newLp = kaf.addLinguisticProcessor(layer, name);
    String timestamp = getOptAttribute("timestamp", lpElem);
    if (timestamp != null) {
  KAFDocument.FileDesc fd = kaf.createFileDesc();
  String author = getOptAttribute("author", fileDescElem);
  if (author != null) {
  KAFDocument.Public pub = kaf.createPublic();
  String publicId = getOptAttribute("publicId", publicElem);
  if (publicId != null) {
  kaf.setRawText(elem.getText());
  rootChildrenElems.remove(elem);
  String wForm = wfElem.getText();
  String wSent = getAttribute("sent", wfElem);
  WF newWf = kaf.newWF(wid, Integer.valueOf(wOffset), Integer.valueOf(wLength), wForm, Integer.valueOf(wSent));
  String wPara = getOptAttribute("para", wfElem);
  if (wPara != null) {
    Span<Term> span = kaf.newTermSpan();
    List<Element> targetElems = spanElem.getChildren();

String lang = getAttribute("lang", rootElem, Namespace.XML_NAMESPACE);
String kafVersion = getAttribute("version", rootElem);
KAFDocument kaf = new KAFDocument(lang, kafVersion);
      for (Element lpElem : lpElems) {
        String name = getAttribute("name", lpElem);
        LinguisticProcessor newLp = kaf.addLinguisticProcessor(layer, name);
        String timestamp = getOptAttribute("timestamp", lpElem);
        if (timestamp != null) {
      KAFDocument.FileDesc fd = kaf.createFileDesc();
      String author = getOptAttribute("author", fileDescElem);
      if (author != null) {
      KAFDocument.Public pub = kaf.createPublic();
      String publicId = getOptAttribute("publicId", publicElem);
      if (publicId != null) {
    kaf.setRawText(elem.getText());
  } else if (elem.getName().equals("text")) {
    List<Element> wfElems = elem.getChildren();
      WF newWf = kaf.newWF(wid, wForm, Integer.valueOf(wSent));
      String wPara = getOptAttribute("para", wfElem);
      if (wPara != null) {
      Span<Term> span = kaf.newTermSpan();
      for (Element marksTermElem : marksTermElems) {
        String termId = getAttribute("id", marksTermElem);

if (this.document.getFileDesc() != null) {
  final FileDesc fd = this.document.getFileDesc();
  emitMeta(docURI, DCTERMS.TITLE, fd.title);
  emitMeta(docURI, DCTERMS.CREATOR, fd.author);
if (this.document.getLang() != null) {
  emitMeta(docURI, DCTERMS.LANGUAGE,
      ModelUtil.languageCodeToURI(this.document.getLang()));
if (this.document.getRawText() != null) {
  final String rawText = this.document.getRawText();
  final StringBuilder builder = new StringBuilder();
  boolean addSpace = false;
emitMeta(nafURI, KS.VERSION, this.document.getVersion());
emitMeta(nafURI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId);
    .getLinguisticProcessors().entrySet()) {
  emitMeta(nafURI, KS.LAYER,
      FACTORY.createURI(KS.NAMESPACE, "layer_" + entry.getKey()));

public List<KAFDocument> splitInParagraphs()
{
List<KAFDocument> paraNafs = new ArrayList<KAFDocument>();
Integer numParagraphs = this.getNumParagraphs();
for (Integer paragraph = 1; paragraph <= numParagraphs; paragraph++) {
  KAFDocument naf = new KAFDocument(this.getLang(), this.getVersion());
  naf.setRawText(this.getRawText());
  for (AnnotationType type : highLevelAnnotationTypes) {
  Layer layer = highLevelAnnotationType2Layer.get(type);
  if (isParagraphLevelAnnotationType(type)) {
    List<Annotation> annotations = new ArrayList<Annotation>();
    if (isMultiLayerAnnotationType(type)) {
    for (String groupId : annotationContainer.getGroupIDs(type)) {
      annotations.addAll(this.getByPara(type, groupId, paragraph));
    }
    } else {
    annotations = this.getByPara(type, paragraph);
    }
    for (Annotation ann : annotations) {
    naf.addExistingAnnotation(ann, layer, type);
    }
  }
  }
  paraNafs.add(naf);
}
return paraNafs;
}

KAFDocument document = KAFDocument.createFromFile(file);
text = document.getRawText();
text = StringEscapeUtils.unescapeHtml(text);
List<Term> terms = document.getTerms();
List<Opinion> opinions = document.getOpinions();
if (opinions.size() > 0 && !forceOpinion) {
  LOGGER.info("Opinions already present, skipping...");
      Opinion opinion = document.newOpinion();
      opinion.setLabel(label);
        sourceSpan.addAll(eu.fbk.dkm.pikes.resources.mpqa.CorpusAnnotator.getSpan(terms, agent.getSpan()));
        if (sourceSpan.size() > 0) {
          Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(KAFDocument.newTermSpan(sourceSpan));
          String attitude = agent.getValue("writerAttitude");
          if (attitude != null) {
        targetSpan.addAll(eu.fbk.dkm.pikes.resources.mpqa.CorpusAnnotator.getSpan(terms, target.getSpan()));
        if (targetSpan.size() > 0) {
          Opinion.OpinionTarget opinionTarget = opinion.createOpinionTarget(KAFDocument.newTermSpan(targetSpan));
          String attitude = target.getValue("writerAttitude");
          if (attitude != null) {
        opinion.createOpinionExpression(KAFDocument.newTermSpan(attitudeSpan));
        opinion.getOpinionExpression().setPolarity(record.getValue(attribute));
  document.save(file.getAbsolutePath());

File file = fileIterator.next();
String fileBaseName = FilenameUtils.removeExtension(file.getName());
KAFDocument document = KAFDocument.createFromFile(file);
KAFDocument nafDoc = KAFDocument.createFromFile(nafFile);
HashMap<String, Term> nafTerms = new HashMap<>();
for (Term term : nafDoc.getTerms()) {
  nafTerms.put(term.getId(), term);
for (WF wf : document.getWFs()) {
  String id = wf.getId();
  id = id.replace('w', 't');
for (Opinion opinion : document.getOpinions()) {
  if ("gold-vua-opinion".equals(opinion.getLabel())) {
    hasGoldOpinions = true;
  for (Opinion opinion : document.getOpinions()) {
    Opinion newOpinion = nafDoc.newOpinion();
    newOpinion.setLabel("gold-vua-opinion");
      termSpan = KAFDocument.newTermSpan();
      for (Term term : opinion.getOpinionExpression().getTerms()) {
        termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
      termSpan = KAFDocument.newTermSpan();
      for (Term term : opinion.getOpinionHolder().getTerms()) {
        termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));

AnnotationContainer annotationContainer = kaf.getAnnotationContainer();
Element root = new Element("NAF");
root.setAttribute("lang", kaf.getLang(), Namespace.XML_NAMESPACE);
root.setAttribute("version", kaf.getVersion());
root.addContent(kafHeaderElem);
KAFDocument.FileDesc fd = kaf.getFileDesc();
if (fd != null) {
  Element fdElem = new Element("fileDesc");
KAFDocument.Public pub = kaf.getPublic();
if (pub != null) {
  Element pubElem = new Element("public");
Map<String, List<LinguisticProcessor>> lps = kaf.getLinguisticProcessors();
for (Map.Entry entry : lps.entrySet()) {
  Element lpsElem = new Element("linguisticProcessors");

KAFDocument document = KAFDocument.createFromFile(file);
List<Opinion> opinions = document.getOpinions();
boolean hasGoldOpinions = false;
for (Opinion opinion : opinions) {
  List<Term> terms = document.getTerms();
  String documentID = document.getPublic().uri;
  HashSet<HashMap<String, String>> map = opinionsByDocument.get(documentID);
  if (map == null) {
    attitudeSpan.addAll(getSpan(terms, properties.get("expression")));
    Opinion opinion = document.newOpinion();
    opinion.setLabel(GOLD_LABEL + "-" + properties.get("type"));
    LOGGER.debug("Adding opinion {}", properties.get("sentence"));
      opinion.createOpinionHolder(KAFDocument.newTermSpan(sourceSpan));
      opinion.createOpinionTarget(KAFDocument.newTermSpan(targetSpan));
      opinion.createOpinionExpression(KAFDocument.newTermSpan(attitudeSpan));
      opinion.getOpinionExpression().setPolarity(properties.get("sentiment"));
      opinion.getOpinionExpression().setStrength(properties.get("intensity"));
  document.save(file.getAbsolutePath());

public static KAFDocument join(List<KAFDocument> nafs)
{
KAFDocument firstNaf = nafs.get(0);
KAFDocument joinedNaf = new KAFDocument(firstNaf.getLang(), nafs.get(0).getVersion());
joinedNaf.setRawText(firstNaf.getRawText());
for (KAFDocument nafPart : nafs) {
  for (AnnotationType type : highLevelAnnotationTypes) {
  Layer layer = highLevelAnnotationType2Layer.get(type);
  List<Annotation> annotations = new ArrayList<Annotation>();
  if (isMultiLayerAnnotationType(type)) {
    for (String groupId : nafPart.annotationContainer.getGroupIDs(type)) {
    annotations.addAll(nafPart.getAnnotations(type, groupId));
    }
  } else {
    annotations = nafPart.getAnnotations(type);
  }
  for (Annotation ann : annotations) {
    joinedNaf.addExistingAnnotation(ann, layer, type);
  }
  }
}
return joinedNaf;
}

private void applySRLPredicateAddition(final KAFDocument document) {
  for (final Term term : document.getTerms()) {
        || !document.getPredicatesByTerm(term).isEmpty()
        || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty()) {
      continue;
    for (final Entity e : document.getEntitiesByTerm(term)) {
      if (entity == null || e.getTerms().size() < entity.getTerms().size()) {
        entity = e;
    if (entity != null && term != document.getTermsHead(entity.getTerms())) {
      continue;
      if (rolesets.size() == 1) {
        final String rolesetID = rolesets.get(0).getID();
        ref = document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, rolesetID);
      if (rolesets.size() == 1) {
        final String rolesetID = rolesets.get(0).getId();
        ref = document.newExternalRef(NAFUtils.RESOURCE_NOMBANK, rolesetID);
      final Predicate predicate = document.newPredicate(KAFDocument.newTermSpan(
          Collections.singletonList(term), term));
      predicate.addExternalRef(ref);

BufferedReader in = new BufferedReader(reader);
KAFDocument nafDocument = KAFDocument.createFromStream(in);
for (Term term : nafDocument.getTerms()) {
  termsHashMap.put(term.getOffset(), term);
      continue;
    Span<Term> termSpan = KAFDocument.newTermSpan();
    termSpan.addTarget(term);
    termsList.add(termSpan);
  Coref coref = nafDocument.newCoref(termsList);
  coref.setCluster(clusterId);
  coref.setType("event-gold");
File outputFile = new File(outFileName);
Files.createParentDirs(outputFile);
nafDocument.save(outputFile);

for (final Timex3 timex : this.document.getTimeExs()) {
  try {
    processTimex(timex);
for (final Entity entity : this.document.getEntities()) {
  try {
    processEntity(entity);
for (final Predicate predicate : this.document.getPredicates()) {
  try {
    processPredicate(predicate);
for (final Term term : this.document.getTerms()) {
  if (isAttributeTerm(term)) {
    final Dep dep = this.document.getDepToTerm(term);
    if (dep == null || !isAttributeTerm(dep.getFrom())) {
      processAttribute(term);
for (final Coref coref : this.document.getCorefs()) {
  if (!"event".equalsIgnoreCase(coref.getType())) {
    try {
for (final Predicate predicate : this.document.getPredicates()) {
  for (final Role role : predicate.getRoles()) {
    final Term roleHead = NAFUtils.extractHead(this.document, role.getSpan());
    if (roleHead != null) {
      for (final Term argHead : this.document.getTermsByDepAncestors(
          Collections.singleton(roleHead), "SUB? (COORD CONJ?)*"
              + " (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*")) {

for (int i = 1; i <= doc.getNumSentences(); ++i) {
  final int sentenceID = i;
  final Map<String, Object> sm = Maps.newHashMap();
  sm.put("id", i);
  sm.put("markup", (Callable<String>) () -> {
    return renderText(new StringBuilder(), doc, doc.getTermsBySent(sentenceID), model)
        .toString();
  });
        int begin = Integer.MAX_VALUE;
        int end = Integer.MIN_VALUE;
        for (final Term term : doc.getSentenceTerms(sentenceID)) {
          begin = Math.min(begin, NAFUtils.getBegin(term));
          end = Math.max(end, NAFUtils.getEnd(term));
documentModel.put("title", doc.getPublic().uri);
documentModel.put("sentences", sentencesModel);
documentModel.put("metadata", (Callable<String>) () -> {
  return renderProperties(new StringBuilder(), model, //
      new URIImpl(doc.getPublic().uri), true).toString();
});
documentModel.put("mentions", (Callable<String>) () -> {
});
documentModel.put("naf", (Callable<String>) () -> {
  return doc.toString();
});

for (final Timex3 timex : this.document.getTimeExs()) {
  if (timex.getSpan() == null
      || this.sentenceIDs[timex.getSpan().getFirstTarget().getSent()]) {
for (final Entity entity : this.document.getEntities()) {
  for (final Span<Term> span : entity.getSpans()) {
    if (this.sentenceIDs[span.getFirstTarget().getSent()]) {
outer: for (final Predicate predicate : this.document.getPredicates()) {
  if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) {
          for (final Coref coref : this.document.getCorefsByTerm(a1Head)) {
            final Set<Term> corefHeads = Sets.newHashSet();
            for (final Span<Term> span : coref.getSpans()) {
for (final Factuality factuality : this.document.getFactualities()) {
  if (this.sentenceIDs[factuality.getWord().getSent()]) {
    try {
  if (uri != null) {
    final Set<Term> forbiddenTerms = Sets.newHashSet();
    final List<Coref> corefs = this.document.getCorefsByTerm(ann.head);
    for (final Coref coref : corefs) {
      final List<Term> heads = Lists.newArrayList();
    for (final Term term : this.document.getTermsByDepAncestors(
        Collections.singleton(ann.head), MODIFIER_REGEX)) {
      if (!forbiddenTerms.contains(term)) {

Javadoc

Respresents a KAF document. It's the main class of the library, as it keeps all elements of the document (word forms, terms, entities...) and manages all object creations. The document can be created by the user calling it's methods, or loading from an existing XML file.

Most used methods

getLang
Returns the language of the processed document
getRawText
Returns the raw text
getTerms
Returns a list with all terms in the document.
newTermSpan
toString
addLinguisticProcessor
Deprecated
createFromFile
Creates a new KAFDocument and loads the contents of the file passed as argument
createFromStream
Creates a new KAFDocument loading the content read from the reader given on argument.
getDepToTerm
getEntities
Returns a list with all entities in the document
getFileDesc
getPublic

Popular in Java

Parsing JSON documents to java classes using gson
getSystemService (Context)
setContentView (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Kernel (java.awt.image)
JFileChooser (javax.swing)
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
CodeWhisperer alternatives

How to useKAFDocument in ixa.kaflib

Best Java code snippets using ixa.kaflib.KAFDocument (Showing top 20 results out of 315)

How to use
KAFDocument
in
ixa.kaflib