/** * Creates an empty KAFDocument element */ public KAFDocument(String lang, String version) { this.lang = lang; this.version = version; lps = new LinkedHashMap<String, List<LinguisticProcessor>>(); idManager = new IdManager(); annotationContainer = new AnnotationContainer(); }
/** * Adds a tree to the container */ void add(Tree tree) { add(tree, null); }
/** * Returns the raw text * */ public String getRawText() { return annotationContainer.getRawText(); }
void add(Annotation ann, Layer layer, AnnotationType type, Integer position) { Helper.addAnnotation(ann, layer, getGroupID(ann), position, this.layers); Helper.addAnnotation(ann, type, getGroupID(ann), position, this.annotations); /* Index */ this.indexAnnotation(ann, type); }
String rawText = annotationContainer.getRawText(); if (rawText.length() > 0) { Element rawElem = new Element("raw"); List<WF> text = annotationContainer.getText(); if (text.size() > 0) { Element textElem = new Element("text"); List<Term> terms = annotationContainer.getTerms(); if (terms.size() > 0) { Element termsElem = new Element("terms"); List<String> markSources = annotationContainer.getMarkSources(); for (String source : markSources) { List<Mark> marks = annotationContainer.getMarks(source); if (marks.size() > 0) { Element marksElem = new Element("markables"); List<Dep> deps = annotationContainer.getDeps(); if (deps.size() > 0) { Element depsElem = new Element("deps"); List<Chunk> chunks = annotationContainer.getChunks(); if (chunks.size() > 0) { Element chunksElem = new Element("chunks"); List<Entity> entities = annotationContainer.getEntities(); if (entities.size() > 0) { Element entitiesElem = new Element("entities");
void add(Term term, int index) { terms.add(index, term); for (WF wf : term.getWFs()) { indexAnnotation(term, wf.getId(), termsIndexedByWF); } if (!term.isComponent()) { this.indexBySent(term, term.getSent(), this.termsIndexedBySent); } }
String rawText = annotationContainer.getRawText(); if (rawText.length() > 0) { Element rawElem = new Element("raw"); List<WF> text = (List<WF>)(List<?>)annotationContainer.getLayer(Layer.TEXT); if (text.size() > 0) { Element textElem = new Element("text"); List<Term> terms = (List<Term>)(List<?>)annotationContainer.getLayer(Layer.TERMS); if (terms.size() > 0) { Element termsElem = new Element("terms"); List<Mark> marks = (List<Mark>)(List<?>)annotationContainer.getLayer(Layer.MARKABLES); if (marks.size() > 0) { Element marksElem = new Element("markables"); List<Dep> deps = (List<Dep>)(List<?>)annotationContainer.getLayer(Layer.DEPS); if (deps.size() > 0) { Element depsElem = new Element("deps"); List<Chunk> chunks = (List<Chunk>)(List<?>)annotationContainer.getLayer(Layer.CHUNKS); if (chunks.size() > 0) { Element chunksElem = new Element("chunks"); List<Entity> entities = (List<Entity>)(List<?>)annotationContainer.getLayer(Layer.ENTITIES); if (entities.size() > 0) { Element entitiesElem = new Element("entities"); List<Coref> corefs = (List<Coref>)(List<?>)annotationContainer.getLayer(Layer.COREFERENCES);
/** * Adds a factuality to the container */ void add(Factuality factuality) { factualities.add(factuality); /* Index by terms */ if (factuality.getWFs() != null) { for (WF wf : factuality.getWFs()) { indexAnnotation(factuality, wf.getId(), factsIndexedByWF); } } }
/** * Returns a list with all sentences. Each sentence is a list of WFs. */ public List<List<WF>> getSentences() { return annotationContainer.getSentences(); }
public Term newCompound(List<Term> terms, String lemma) { Span<WF> span = new Span<WF>(); for (Term term : terms) { span.addTargets(term.getSpan().getTargets()); } String newId = idManager.getNextId(AnnotationType.MW); Term compound = newTerm(newId, span, annotationContainer.getPosition(Layer.TERMS, terms.get(0))); compound.setLemma(lemma); for (Term term : terms) { compound.addComponent(term); term.setCompound(compound); this.annotationContainer.remove(term, Layer.TERMS, AnnotationType.TERM); } return compound; }
public Term newCompound(List<Term> terms, String lemma) { Span<WF> span = new Span<WF>(); for (Term term : terms) { span.addTargets(term.getSpan().getTargets()); } String newId = idManager.mws.getNext(); Term compound = newTerm(newId, span, annotationContainer.termPosition(terms.get(0))); compound.setLemma(lemma); for (Term term : terms) { compound.addComponent(term); term.setCompound(compound); this.annotationContainer.remove(term); } return compound; }
/** * Set raw text * */ public void setRawText(String rawText) { annotationContainer.setRawText(rawText); }
public void removeLayer(Layer layer) { this.annotationContainer.removeLayer(layer); }
public Set<Element> getUnknownLayers() { return annotationContainer.getUnknownLayers(); }
void remove(Annotation ann, Layer layer, AnnotationType type) { this.remove(ann, layer, type, DEFAULT_GROUP); }
private void indexAnnotationParaSent(Annotation ann, AnnotationType type) { String groupID = getGroupID(ann); if (ann instanceof SentenceLevelAnnotation) { Integer sent = ((SentenceLevelAnnotation) ann).getSent(); Integer para = ((ParagraphLevelAnnotation) ann).getPara(); Helper.addToIndex(ann, type, groupID, sent, this.sentIndex); if (para > 0) { Helper.addToIndex(ann, type, groupID, para, this.paraIndex); if (!indexedSents.contains(sent)) { this.addSentToPara(sent, para); indexedSents.add(sent); } } } else if (ann instanceof ParagraphLevelAnnotation) { Integer para = ((ParagraphLevelAnnotation) ann).getPara(); if (para > 0) { Helper.addToIndex(ann, type, groupID, para, this.paraIndex); } } }
/** * Adds a named entity to the container */ void add(Entity entity) { entities.add(entity); /* Index by terms */ for (Term term : entity.getTerms()) { indexAnnotation(entity, term.getId(), entitiesIndexedByTerm); } this.indexBySent(entity, entity.getSpans().get(0).getTargets().get(0).getSent(), this.entitiesIndexedBySent); }
/** * Adds a coreference to the container */ void add(Coref coref) { coreferences.add(coref); /* Index by terms */ for (Span<Term> span : coref.getSpans()) { for (Term term : span.getTargets()) { indexAnnotation(coref, term.getId(), corefsIndexedByTerm); } } //this.indexBySent(coref, coref.getSpans().get(0).getTargets().get(0).getSent(), this.corefsIndexedBySent); }
/** Returns all tokens classified into sentences */ List<List<Annotation>> getSentences(AnnotationType type) { return this.getSentences(type, DEFAULT_GROUP); }
/** Set raw text **/ public void setRawText(String rawText) { annotationContainer.setRawText(rawText); }