public static QuadModel getSubModel(final QuadModel model, final Iterable<? extends Resource> mentionIDs) { final QuadModel result = QuadModel.create(); final Set<Resource> nodes = Sets.newHashSet(); result.addAll(model.filter(mentionID, null, null)); for (final Statement triple : model.filter(null, null, mentionID)) { result.add(triple); if (triple.getPredicate().equals(KS.EXPRESSED_BY)) { final Resource factID = triple.getSubject(); result.addAll(model.filter(factID, null, null)); for (final Statement factTriple : model.filter(null, null, null, factID)) { result.add(factTriple); final Resource factSubj = factTriple.getSubject(); final URI factPred = factTriple.getPredicate(); while (!queue.isEmpty()) { final Resource node = queue.remove(0); for (final Statement triple : model.filter(node, null, null)) { if (triple.getContext() != null) { final Resource context = triple.getContext(); if (model.filter(context, KS.EXPRESSED_BY, null).isEmpty()) { result.add(triple); if (triple.getObject() instanceof Resource) { final Resource obj = (Resource) triple.getObject();
public static Set<Resource> getMentions(final QuadModel model, final int beginIndex, final int endIndex) { final List<Resource> mentionIDs = Lists.newArrayList(); for (final Resource mentionID : model.filter(null, RDF.TYPE, KS.MENTION).subjects()) { final Literal begin = model.filter(mentionID, NIF.BEGIN_INDEX, null).objectLiteral(); final Literal end = model.filter(mentionID, NIF.END_INDEX, null).objectLiteral(); if (begin != null && begin.intValue() >= beginIndex && end != null && end.intValue() <= endIndex) { mentionIDs.add(mentionID); } } return ImmutableSet.copyOf(mentionIDs); }
for (final URI pred : this.valueComparator.sortedCopy(model.filter(node, null, null) .predicates())) { if (excludedProperties.contains(pred)) { continue; final List<Resource> nested = Lists.newArrayList(); String separator = ""; for (final Value obj : this.valueComparator.sortedCopy(model.filter(node, pred, null) .objects())) { if (obj instanceof Literal || model.filter((Resource) obj, null, null).isEmpty()) { out.append(separator); renderObject(out, obj, model);
final Set<Resource> expandedNodes) throws IOException { boolean notEmpty = false; for (final URI pred : this.valueComparator.sortedCopy(model.filter(node, null, null) .predicates())) { if (this.ignoredProperties.contains(pred)) { continue; for (final Value object : this.valueComparator.sortedCopy(model.filter(node, pred, null).objects())) { if (!excludedNodes.contains(object)) { out.append(Strings.repeat(" ", indent));
renderObject(out, mentionID, model); out.append("</td><td>"); out.append(model.filter(mentionID, NIF.ANCHOR_OF, null).objectString()); out.append("</td><td>"); renderObject(out, model.filter(mentionID, RDF.TYPE, null).objects(), model); out.append("</td><td>"); final QuadModel mentionModel = QuadModel.create(); for (final Statement statement : model.filter(mentionID, null, null)) { final URI pred = statement.getPredicate(); if (!NIF.BEGIN_INDEX.equals(pred) && !NIF.END_INDEX.equals(pred) && !NIF.ANCHOR_OF.equals(pred) && !RDF.TYPE.equals(pred) && !KS.MENTION_OF.equals(pred)) { mentionModel.add(statement); if (!mentionModel.isEmpty()) { renderProperties(out, mentionModel, mentionID, false); model.filter(mentionID, KS.DENOTES, null).objects(), model.filter(mentionID, KS.IMPLIES, null).objects()), model); out.append("</td><td><ol>"); for (final Value factID : model.filter(mentionID, KS.EXPRESSES, null).objects()) { for (final Statement statement : model.filter(null, null, null, (Resource) factID)) { out.append("<li>"); renderObject(out, statement.getSubject(), model);
for (final Value value : Iterables.concat(model.subjects(), model.objects())) { if (value instanceof URI && this.nodeNamespaces.contains(((URI) value).getNamespace())) { for (final Statement stmt : model.filter(null, RDF.TYPE, null)) { if (stmt.getObject() instanceof Resource && this.nodeTypes.contains(stmt.getObject())) { final Set<Value> types = model.filter(node, RDF.TYPE, null).objects(); out.append(id).append(" ["); out.append("label=<<table border=\"0\" cellborder=\"0\" cellpadding=\"0\" bgcolor=\""); final List<URI> properties = this.valueComparator.sortedCopy(model.filter(sourceNode, null, targetNode).predicates()); for (final Value sourceType : model.filter(sourceNode, RDF.TYPE, null).objects()) { if (sourceType instanceof URI) { keys.add(new URIImpl(sourceType.stringValue() + "-from")); for (final Value targetType : model.filter(targetNode, RDF.TYPE, null).objects()) { if (targetType instanceof URI) { keys.add(new URIImpl(targetType.stringValue() + "-to"));
private URI emitRelationMention(final Iterable<Term> extent, final URI type) { final List<Term> terms = Ordering.from(Term.OFFSET_COMPARATOR).sortedCopy(extent); final URI mentionURI = emitNIF(terms); this.model.add(mentionURI, KS.MENTION_OF, this.documentURI); this.model.add(mentionURI, RDF.TYPE, type); return mentionURI; }
for (final Statement stmt : model.filter(null, KS.DENOTES, null)) { final Resource instance = (Resource) stmt.getObject(); final String color = select(colorMap, model.filter(instance, RDF.TYPE, null).objects(), null); if (stmt.getSubject() instanceof URI && color != null) { final URI mentionURI = (URI) stmt.getSubject();
public Extractor(final String baseURI, final RDFHandler handler, final KAFDocument document, final boolean[] sentenceIDs) { this.baseURI = baseURI; this.handler = handler; this.statements = QuadModel.create(); this.mintedURIs = HashBiMap.create(); this.document = document; this.documentURI = FACTORY.createURI(Util.cleanIRI(document.getPublic().uri)); this.sentenceIDs = sentenceIDs; final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } this.documentText = builder.toString(); this.annotations = Maps.newHashMap(); }
@SuppressWarnings("deprecation") Extraction(final QuadModel model, final KAFDocument document) { // Reconstruct the document text using term offsets to avoid alignment issues final StringBuilder builder = new StringBuilder(); for (final WF word : document.getWFs()) { final int offset = word.getOffset(); if (builder.length() > offset) { builder.setLength(offset); } else { while (builder.length() < offset) { builder.append(" "); } } builder.append(word.getForm()); } // Initialize the object this.model = model; this.document = document; this.vf = model.getValueFactory(); this.documentText = builder.toString(); this.documentURI = this.vf.createURI(Util.cleanIRI(document.getPublic().uri)); this.mentions = Maps.newHashMap(); }
@Override public void map(final QuadModel model) throws Exception { final List<Statement> statements = new ArrayList<>(); final RDFSource source = RDFSources.wrap(model); final RDFHandler handler = RDFHandlers.wrap(statements); processor.wrap(source).emit(handler, 1); model.addAll(statements); }
private void emitMeta(@Nullable final URI subject, @Nullable final URI property, @Nullable final Object objects) { if (subject != null && property != null) { for (final Value object : extract(Value.class, objects, RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) { this.statements.add(FACTORY.createStatement(subject, property, object)); } } }
out.append("</td><td>"); String separator = ""; for (final Resource mentionID : model.filter(statement.getContext(), KS.EXPRESSES, null).subjects()) { final String extent = model.filter(mentionID, NIF.ANCHOR_OF, null) .objectLiteral().stringValue(); out.append(separator); renderObject(out, mentionID, model);
private void emitFact(@Nullable final URI subject, @Nullable final URI property, @Nullable final Object objects, @Nullable final URI mention, @Nullable final Object confidence) { if (subject != null && property != null) { for (final Value object : extract(Value.class, objects, RDF.TYPE.equals(property) ? RDFGenerator.this.typeMap : null)) { final URI factURI = hash(subject, property, object); this.statements.add(FACTORY .createStatement(subject, property, object, factURI)); if (mention != null) { this.statements.add(FACTORY.createStatement(factURI, KS.EXPRESSED_BY, mention)); } if (confidence instanceof Number) { final double confidenceValue = ((Number) confidence).doubleValue(); if (confidenceValue != 0.0) { // this.statements.add(FACTORY.createStatement(factURI, KS.CONFIDENCE, // FACTORY.createLiteral(confidenceValue))); } } } } }
this.model.add(uri, RDF.TYPE, KS.COMPOUND_STRING); for (final URI componentURI : componentURIs) { this.model.add(uri, KS.COMPONENT_SUB_STRING, componentURI); this.model.add(uri, NIF.BEGIN_INDEX, this.vf.createLiteral(begin)); this.model.add(uri, NIF.END_INDEX, this.vf.createLiteral(offset)); this.model.add(uri, NIF.ANCHOR_OF, this.vf.createLiteral(anchor));
this.model.add(mention.uri, KS.MENTION_OF, this.documentURI); this.model.add(mention.uri, KS.LEMMA, this.vf.createLiteral(head.getLemma())); final URI uri = this.vf.createURI("http://www.newsreader-project.eu/sst/", sst.substring(sst.lastIndexOf('-') + 1)); this.model.add(mention.uri, KS.SST, uri); final URI uri = this.vf.createURI("http://wordnet-rdf.princeton.edu/wn30/", synsetRef.getReference()); this.model.add(mention.uri, KS.SYNSET, uri); this.model.add(mention.uri, KS.PLURAL, this.vf.createLiteral(true)); this.model.add(mention.uri, RDF.TYPE, KS.INSTANCE_MENTION); if ((typeMask & InstanceMention.TIME) != 0) { this.model.add(mention.uri, RDF.TYPE, KS.TIME_MENTION); this.model.add(mention.uri, RDF.TYPE, KS.NAME_MENTION); this.model.add(mention.uri, RDF.TYPE, KS.FRAME_MENTION); this.model.add(mention.uri, RDF.TYPE, KS.ATTRIBUTE_MENTION);
this.model.add(docURI, RDF.TYPE, KS.RESOURCE); this.model.add(docURI, RDF.TYPE, KS.TEXT); this.model.add(docURI, DCTERMS.TITLE, this.vf.createLiteral(fd.title)); this.model.add(docURI, DCTERMS.CREATOR, this.vf.createLiteral(fd.author)); this.model.add(docURI, DCTERMS.CREATED, this.vf.createLiteral(fd.creationtime)); this.model.add(docURI, DCTERMS.LANGUAGE, ModelUtil.languageCodeToURI(this.document.getLang())); this.model.add(docURI, KS.TEXT_HASH, this.vf.createLiteral(Hash.murmur3(builder.toString()).toString())); this.model.add(docURI, KS.ANNOTATED_WITH, nafURI); this.model.add(nafURI, KS.ANNOTATION_OF, docURI); this.model.add(nafURI, RDF.TYPE, KS.RESOURCE); this.model.add(nafURI, RDF.TYPE, KS.NAF); this.model.add(nafURI, KS.VERSION, this.vf.createLiteral(this.document.getVersion())); this.model.add(nafURI, DCTERMS.IDENTIFIER, this.vf.createLiteral(this.document.getPublic().publicId)); for (final Map.Entry<String, List<LinguisticProcessor>> entry : this.document .getLinguisticProcessors().entrySet()) { this.model.add(nafURI, KS.LAYER, this.vf.createURI(KS.NAMESPACE, "layer_" + entry.getKey())); for (final LinguisticProcessor lp : entry.getValue()) { this.model.add(nafURI, DCTERMS.CREATOR, lpURI);