public List<KAFDocument> splitInSentences() { List<KAFDocument> sentNafs = new ArrayList<KAFDocument>(); Integer numParagraphs = this.getNumParagraphs(); for (Integer paragraph = 1; paragraph <= numParagraphs; paragraph++) { List<Integer> sentences = this.getSentsByParagraph(paragraph); for (Integer sentence : sentences) { KAFDocument naf = new KAFDocument(this.getLang(), this.getVersion()); naf.setRawText(this.getRawText()); for (AnnotationType type : highLevelAnnotationTypes) { Layer layer = highLevelAnnotationType2Layer.get(type); if (isSentenceLevelAnnotationType(type)) { List<Annotation> annotations = new ArrayList<Annotation>(); if (isMultiLayerAnnotationType(type)) { for (String groupId : annotationContainer.getGroupIDs(type)) { annotations.addAll(this.getBySent(type, groupId, sentence)); } } else { annotations = this.getBySent(type, sentence); } for (Annotation ann : annotations) { naf.addExistingAnnotation(ann, layer, type); } } } sentNafs.add(naf); } } return sentNafs; }
Map<String, List<LinguisticProcessor>> lps = doc.getLinguisticProcessors(); for (Map.Entry<String, List<LinguisticProcessor>> entry : lps.entrySet()) { String layer = entry.getKey(); List<LinguisticProcessor> lpList = entry.getValue(); for (LinguisticProcessor lp : lpList) { if (!this.linguisticProcessorExists(layer, lp.name, lp.version)) { this.addLinguisticProcessor(layer, lp.name, lp.timestamp, lp.version); for (WF wf : doc.getWFs()) { WF wfCopy = new WF(wf, this.annotationContainer); this.insertWF(wfCopy); copiedWFs.put(wf.getId(), wfCopy); for (Term term : doc.getTerms()) { Term termCopy = new Term(term, copiedWFs); this.insertTerm(termCopy); copiedTerms.put(term.getId(), termCopy); for (Dep dep : doc.getDeps()) { Dep depCopy = new Dep(dep, copiedTerms); this.insertDep(depCopy); for (Chunk chunk : doc.getChunks()) { Chunk chunkCopy = new Chunk(chunk, copiedTerms); this.insertChunk(chunkCopy); for (Entity entity : doc.getEntities()) { Entity entityCopy = new Entity(entity, copiedTerms);
for (final Entity entity : document.getEntitiesByTerm(term)) { markables.put(document.getTermsHead(entity.getTerms()), entity.getTerms()); for (final Timex3 timex : document.getTimeExsByWF(wf)) { final List<Term> span = document.getTermsByWFs(timex.getSpan().getTargets()); markables.put(document.getTermsHead(span), span); break; final Dep dep = document.getDepToTerm(t); if (dep == null) { break; final Term head = document.getTermsHead(terms); : "(COORD CONJ?)* NAME" : includeModifiers ? "((NAME|NMOD|AMOD|TMP) .*)?" : "NAME"; terms.addAll(document.getTermsByDepAncestors(Collections.singleton(head), regex)); return KAFDocument.newTermSpan(Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(terms), head);
@Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof KAFDocument)) return false; KAFDocument naf = (KAFDocument) o; /* Language and version */ if (!this.getLang().equals(naf.getLang()) || !this.getVersion().equals(naf.getVersion())) return false; /* NAF header */ if (!this.headerEquals(naf)) return false; /* Layers and annotations */ return Utils.areEquals(this.annotationContainer, naf.annotationContainer); }
private static Terminal createTerminal(String token, Term term, KAFDocument kaf) { Span<Term> span = kaf.newTermSpan(); span.addTarget(term); return kaf.newTerminal(span); }
private static Span<Term> getSpanFromEntity(LKAnnotationEntity entity, KAFDocument document) { Span<Term> returnSpan = KAFDocument.newTermSpan(); if (entity.referred != null) { for (LKAnnotationEntity referredEntity : entity.referred) { Integer termID = Integer.parseInt(referredEntity.localURI); Term term = document.getTerms().get(termID - 1); returnSpan.addTarget(term); } } return returnSpan; }
String lang = getAttribute("lang", rootElem, Namespace.XML_NAMESPACE); String kafVersion = getAttribute("version", rootElem); KAFDocument kaf = new KAFDocument(lang, kafVersion); for (Element lpElem : lpElems) { String name = getAttribute("name", lpElem); KAFDocument.LinguisticProcessor newLp = kaf.addLinguisticProcessor(layer, name); String timestamp = getOptAttribute("timestamp", lpElem); if (timestamp != null) { KAFDocument.FileDesc fd = kaf.createFileDesc(); String author = getOptAttribute("author", fileDescElem); if (author != null) { KAFDocument.Public pub = kaf.createPublic(); String publicId = getOptAttribute("publicId", publicElem); if (publicId != null) { kaf.setRawText(elem.getText()); rootChildrenElems.remove(elem); String wForm = wfElem.getText(); String wSent = getAttribute("sent", wfElem); WF newWf = kaf.newWF(wid, Integer.valueOf(wOffset), Integer.valueOf(wLength), wForm, Integer.valueOf(wSent)); String wPara = getOptAttribute("para", wfElem); if (wPara != null) { Span<Term> span = kaf.newTermSpan(); List<Element> targetElems = spanElem.getChildren();
String lang = getAttribute("lang", rootElem, Namespace.XML_NAMESPACE); String kafVersion = getAttribute("version", rootElem); KAFDocument kaf = new KAFDocument(lang, kafVersion); for (Element lpElem : lpElems) { String name = getAttribute("name", lpElem); LinguisticProcessor newLp = kaf.addLinguisticProcessor(layer, name); String timestamp = getOptAttribute("timestamp", lpElem); if (timestamp != null) { KAFDocument.FileDesc fd = kaf.createFileDesc(); String author = getOptAttribute("author", fileDescElem); if (author != null) { KAFDocument.Public pub = kaf.createPublic(); String publicId = getOptAttribute("publicId", publicElem); if (publicId != null) { kaf.setRawText(elem.getText()); } else if (elem.getName().equals("text")) { List<Element> wfElems = elem.getChildren(); WF newWf = kaf.newWF(wid, wForm, Integer.valueOf(wSent)); String wPara = getOptAttribute("para", wfElem); if (wPara != null) { Span<Term> span = kaf.newTermSpan(); for (Element marksTermElem : marksTermElems) { String termId = getAttribute("id", marksTermElem);
if (this.document.getFileDesc() != null) { final FileDesc fd = this.document.getFileDesc(); emitMeta(docURI, DCTERMS.TITLE, fd.title); emitMeta(docURI, DCTERMS.CREATOR, fd.author); if (this.document.getLang() != null) { emitMeta(docURI, DCTERMS.LANGUAGE, ModelUtil.languageCodeToURI(this.document.getLang())); if (this.document.getRawText() != null) { final String rawText = this.document.getRawText(); final StringBuilder builder = new StringBuilder(); boolean addSpace = false; emitMeta(nafURI, KS.VERSION, this.document.getVersion()); emitMeta(nafURI, DCTERMS.IDENTIFIER, this.document.getPublic().publicId); .getLinguisticProcessors().entrySet()) { emitMeta(nafURI, KS.LAYER, FACTORY.createURI(KS.NAMESPACE, "layer_" + entry.getKey()));
public List<KAFDocument> splitInParagraphs() { List<KAFDocument> paraNafs = new ArrayList<KAFDocument>(); Integer numParagraphs = this.getNumParagraphs(); for (Integer paragraph = 1; paragraph <= numParagraphs; paragraph++) { KAFDocument naf = new KAFDocument(this.getLang(), this.getVersion()); naf.setRawText(this.getRawText()); for (AnnotationType type : highLevelAnnotationTypes) { Layer layer = highLevelAnnotationType2Layer.get(type); if (isParagraphLevelAnnotationType(type)) { List<Annotation> annotations = new ArrayList<Annotation>(); if (isMultiLayerAnnotationType(type)) { for (String groupId : annotationContainer.getGroupIDs(type)) { annotations.addAll(this.getByPara(type, groupId, paragraph)); } } else { annotations = this.getByPara(type, paragraph); } for (Annotation ann : annotations) { naf.addExistingAnnotation(ann, layer, type); } } } paraNafs.add(naf); } return paraNafs; }
KAFDocument document = KAFDocument.createFromFile(file); text = document.getRawText(); text = StringEscapeUtils.unescapeHtml(text); List<Term> terms = document.getTerms(); List<Opinion> opinions = document.getOpinions(); if (opinions.size() > 0 && !forceOpinion) { LOGGER.info("Opinions already present, skipping..."); Opinion opinion = document.newOpinion(); opinion.setLabel(label); sourceSpan.addAll(eu.fbk.dkm.pikes.resources.mpqa.CorpusAnnotator.getSpan(terms, agent.getSpan())); if (sourceSpan.size() > 0) { Opinion.OpinionHolder opinionHolder = opinion.createOpinionHolder(KAFDocument.newTermSpan(sourceSpan)); String attitude = agent.getValue("writerAttitude"); if (attitude != null) { targetSpan.addAll(eu.fbk.dkm.pikes.resources.mpqa.CorpusAnnotator.getSpan(terms, target.getSpan())); if (targetSpan.size() > 0) { Opinion.OpinionTarget opinionTarget = opinion.createOpinionTarget(KAFDocument.newTermSpan(targetSpan)); String attitude = target.getValue("writerAttitude"); if (attitude != null) { opinion.createOpinionExpression(KAFDocument.newTermSpan(attitudeSpan)); opinion.getOpinionExpression().setPolarity(record.getValue(attribute)); document.save(file.getAbsolutePath());
File file = fileIterator.next(); String fileBaseName = FilenameUtils.removeExtension(file.getName()); KAFDocument document = KAFDocument.createFromFile(file); KAFDocument nafDoc = KAFDocument.createFromFile(nafFile); HashMap<String, Term> nafTerms = new HashMap<>(); for (Term term : nafDoc.getTerms()) { nafTerms.put(term.getId(), term); for (WF wf : document.getWFs()) { String id = wf.getId(); id = id.replace('w', 't'); for (Opinion opinion : document.getOpinions()) { if ("gold-vua-opinion".equals(opinion.getLabel())) { hasGoldOpinions = true; for (Opinion opinion : document.getOpinions()) { Opinion newOpinion = nafDoc.newOpinion(); newOpinion.setLabel("gold-vua-opinion"); termSpan = KAFDocument.newTermSpan(); for (Term term : opinion.getOpinionExpression().getTerms()) { termSpan.addTarget(nafTerms.get(idConverter.get(term.getId()))); termSpan = KAFDocument.newTermSpan(); for (Term term : opinion.getOpinionHolder().getTerms()) { termSpan.addTarget(nafTerms.get(idConverter.get(term.getId())));
AnnotationContainer annotationContainer = kaf.getAnnotationContainer(); Element root = new Element("NAF"); root.setAttribute("lang", kaf.getLang(), Namespace.XML_NAMESPACE); root.setAttribute("version", kaf.getVersion()); root.addContent(kafHeaderElem); KAFDocument.FileDesc fd = kaf.getFileDesc(); if (fd != null) { Element fdElem = new Element("fileDesc"); KAFDocument.Public pub = kaf.getPublic(); if (pub != null) { Element pubElem = new Element("public"); Map<String, List<LinguisticProcessor>> lps = kaf.getLinguisticProcessors(); for (Map.Entry entry : lps.entrySet()) { Element lpsElem = new Element("linguisticProcessors");
KAFDocument document = KAFDocument.createFromFile(file); List<Opinion> opinions = document.getOpinions(); boolean hasGoldOpinions = false; for (Opinion opinion : opinions) { List<Term> terms = document.getTerms(); String documentID = document.getPublic().uri; HashSet<HashMap<String, String>> map = opinionsByDocument.get(documentID); if (map == null) { attitudeSpan.addAll(getSpan(terms, properties.get("expression"))); Opinion opinion = document.newOpinion(); opinion.setLabel(GOLD_LABEL + "-" + properties.get("type")); LOGGER.debug("Adding opinion {}", properties.get("sentence")); opinion.createOpinionHolder(KAFDocument.newTermSpan(sourceSpan)); opinion.createOpinionTarget(KAFDocument.newTermSpan(targetSpan)); opinion.createOpinionExpression(KAFDocument.newTermSpan(attitudeSpan)); opinion.getOpinionExpression().setPolarity(properties.get("sentiment")); opinion.getOpinionExpression().setStrength(properties.get("intensity")); document.save(file.getAbsolutePath());
public static KAFDocument join(List<KAFDocument> nafs) { KAFDocument firstNaf = nafs.get(0); KAFDocument joinedNaf = new KAFDocument(firstNaf.getLang(), nafs.get(0).getVersion()); joinedNaf.setRawText(firstNaf.getRawText()); for (KAFDocument nafPart : nafs) { for (AnnotationType type : highLevelAnnotationTypes) { Layer layer = highLevelAnnotationType2Layer.get(type); List<Annotation> annotations = new ArrayList<Annotation>(); if (isMultiLayerAnnotationType(type)) { for (String groupId : nafPart.annotationContainer.getGroupIDs(type)) { annotations.addAll(nafPart.getAnnotations(type, groupId)); } } else { annotations = nafPart.getAnnotations(type); } for (Annotation ann : annotations) { joinedNaf.addExistingAnnotation(ann, layer, type); } } } return joinedNaf; }
private void applySRLPredicateAddition(final KAFDocument document) { for (final Term term : document.getTerms()) { || !document.getPredicatesByTerm(term).isEmpty() || !document.getTimeExsByWF(term.getWFs().get(0)).isEmpty()) { continue; for (final Entity e : document.getEntitiesByTerm(term)) { if (entity == null || e.getTerms().size() < entity.getTerms().size()) { entity = e; if (entity != null && term != document.getTermsHead(entity.getTerms())) { continue; if (rolesets.size() == 1) { final String rolesetID = rolesets.get(0).getID(); ref = document.newExternalRef(NAFUtils.RESOURCE_PROPBANK, rolesetID); if (rolesets.size() == 1) { final String rolesetID = rolesets.get(0).getId(); ref = document.newExternalRef(NAFUtils.RESOURCE_NOMBANK, rolesetID); final Predicate predicate = document.newPredicate(KAFDocument.newTermSpan( Collections.singletonList(term), term)); predicate.addExternalRef(ref);
BufferedReader in = new BufferedReader(reader); KAFDocument nafDocument = KAFDocument.createFromStream(in); for (Term term : nafDocument.getTerms()) { termsHashMap.put(term.getOffset(), term); continue; Span<Term> termSpan = KAFDocument.newTermSpan(); termSpan.addTarget(term); termsList.add(termSpan); Coref coref = nafDocument.newCoref(termsList); coref.setCluster(clusterId); coref.setType("event-gold"); File outputFile = new File(outFileName); Files.createParentDirs(outputFile); nafDocument.save(outputFile);
for (final Timex3 timex : this.document.getTimeExs()) { try { processTimex(timex); for (final Entity entity : this.document.getEntities()) { try { processEntity(entity); for (final Predicate predicate : this.document.getPredicates()) { try { processPredicate(predicate); for (final Term term : this.document.getTerms()) { if (isAttributeTerm(term)) { final Dep dep = this.document.getDepToTerm(term); if (dep == null || !isAttributeTerm(dep.getFrom())) { processAttribute(term); for (final Coref coref : this.document.getCorefs()) { if (!"event".equalsIgnoreCase(coref.getType())) { try { for (final Predicate predicate : this.document.getPredicates()) { for (final Role role : predicate.getRoles()) { final Term roleHead = NAFUtils.extractHead(this.document, role.getSpan()); if (roleHead != null) { for (final Term argHead : this.document.getTermsByDepAncestors( Collections.singleton(roleHead), "SUB? (COORD CONJ?)*" + " (PMOD (COORD CONJ?)*)? ((VC OPRD?)|(IM OPRD?))*")) {
for (int i = 1; i <= doc.getNumSentences(); ++i) { final int sentenceID = i; final Map<String, Object> sm = Maps.newHashMap(); sm.put("id", i); sm.put("markup", (Callable<String>) () -> { return renderText(new StringBuilder(), doc, doc.getTermsBySent(sentenceID), model) .toString(); }); int begin = Integer.MAX_VALUE; int end = Integer.MIN_VALUE; for (final Term term : doc.getSentenceTerms(sentenceID)) { begin = Math.min(begin, NAFUtils.getBegin(term)); end = Math.max(end, NAFUtils.getEnd(term)); documentModel.put("title", doc.getPublic().uri); documentModel.put("sentences", sentencesModel); documentModel.put("metadata", (Callable<String>) () -> { return renderProperties(new StringBuilder(), model, // new URIImpl(doc.getPublic().uri), true).toString(); }); documentModel.put("mentions", (Callable<String>) () -> { }); documentModel.put("naf", (Callable<String>) () -> { return doc.toString(); });
for (final Timex3 timex : this.document.getTimeExs()) { if (timex.getSpan() == null || this.sentenceIDs[timex.getSpan().getFirstTarget().getSent()]) { for (final Entity entity : this.document.getEntities()) { for (final Span<Term> span : entity.getSpans()) { if (this.sentenceIDs[span.getFirstTarget().getSent()]) { outer: for (final Predicate predicate : this.document.getPredicates()) { if (this.sentenceIDs[predicate.getSpan().getFirstTarget().getSent()]) { for (final Coref coref : this.document.getCorefsByTerm(a1Head)) { final Set<Term> corefHeads = Sets.newHashSet(); for (final Span<Term> span : coref.getSpans()) { for (final Factuality factuality : this.document.getFactualities()) { if (this.sentenceIDs[factuality.getWord().getSent()]) { try { if (uri != null) { final Set<Term> forbiddenTerms = Sets.newHashSet(); final List<Coref> corefs = this.document.getCorefsByTerm(ann.head); for (final Coref coref : corefs) { final List<Term> heads = Lists.newArrayList(); for (final Term term : this.document.getTermsByDepAncestors( Collections.singleton(ann.head), MODIFIER_REGEX)) { if (!forbiddenTerms.contains(term)) {