@Override public int canEnhance(ContentItem ci) throws EngineException { /* * Being a post-processing engine, the Refactor can enhance only content items that are previously * enhanced by other enhancement engines. */ return ci.getMetadata() == null ? CANNOT_ENHANCE : ENHANCE_SYNCHRONOUS; }
public String getRdfMetadata(String mediatype) throws UnsupportedEncodingException { if(enhancementException == null){ ByteArrayOutputStream out = new ByteArrayOutputStream(); serializer.serialize(out, contentItem.getMetadata(), mediatype); return out.toString("utf-8"); } else {//in case of an exception print the stacktrace StringWriter writer = new StringWriter(); enhancementException.printStackTrace(new PrintWriter(writer)); return writer.toString(); } }
public String getRdfMetadata(String mediatype) throws UnsupportedEncodingException { if(enhancementException == null){ ByteArrayOutputStream out = new ByteArrayOutputStream(); serializer.serialize(out, contentItem.getMetadata(), mediatype); return out.toString("utf-8"); } else {//in case of an exception print the stacktrace StringWriter writer = new StringWriter(); enhancementException.printStackTrace(new PrintWriter(writer)); return writer.toString(); } }
/** * The method adds the returned DBpedia Spotlight surface forms to the * content item's metadata. For each one an TextAnnotation is created. * * @param occs * a Collection of entity information * @param ci * the content item */ protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String content, Language lang) { HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>(); Graph model = ci.getMetadata(); for (SurfaceForm occ : occs) { IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement( occ, this, ci, content, lang); if (entityAnnotationMap.containsKey(occ.name)) { model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation)); } else { entityAnnotationMap.put(occ.name, textAnnotation); } } }
/** @inheritDoc */ public ImmutableGraph getGraph(EnhancementJobManager jobManager, ContentItemFactory ciFactory) throws EnhancementException { if(graph == null) { ContentItem ci; try { ci = ciFactory.createContentItem(new StringSource(inputText)); } catch (IOException e) { throw new IllegalStateException("Unable to create a ContentItem" + "using '"+ciFactory.getClass().getSimpleName()+"'!",e); } if(chain == null){ jobManager.enhanceContent(ci); } else { //parsing null as chain does not work! jobManager.enhanceContent(ci,chain); } graph = ci.getMetadata().getImmutableGraph(); } return graph; } }
private void addSolrSpecificFields(ContentItem ci, SolrInputDocument doc, String ldProgramName) { String title = null; try { title = ci.getPart(TITLE_URI, String.class); } catch (NoSuchPartException e) { title = ci.getUri().getUnicodeString(); } doc.addField(SolrFieldName.TITLE.toString(), title); try { Iterator<Triple> it = ci.getMetadata().filter(null, Properties.ENHANCER_ENTITY_REFERENCE, null); Set<String> contexts = new HashSet<String>(); while (it.hasNext()) { Resource r = it.next().getObject(); if (r instanceof UriRef) { contexts.add(((UriRef) r).getUnicodeString()); } } Map<String,Collection<?>> results = semanticIndexManager.executeProgram(ldProgramName, contexts, ci); for (Entry<String,Collection<?>> entry : results.entrySet()) { doc.addField(entry.getKey(), entry.getValue()); } } catch (LDPathException e) { log.error("Cannot execute the ldPathProgram on ContentItem's metadata", e); } }
/** * Create a new instance with the types enhancer:Enhancement and * enhancer:EntityAnnotation in the metadata-graph of the content * item along with default properties (dc:creator and dc:created) and return * the IRI of the extraction so that engines can further add * * @param ci the ContentItem being under analysis * @param engine the Engine performing the analysis * @return the URI of the new enhancement instance */ public static IRI createEntityEnhancement(ContentItem ci, EnhancementEngine engine){ return createEntityEnhancement(ci.getMetadata(), engine, new IRI(ci.getUri().getUnicodeString())); } /**
/** * Create a new instance with the types enhancer:Enhancement and * enhancer:TopicAnnotation in the metadata-graph of the content * item along with default properties (dc:creator and dc:created) and return * the IRI of the extraction so that engines can further add * * @param ci the ContentItem being under analysis * @param engine the Engine performing the analysis * @return the URI of the new enhancement instance */ public static IRI createTopicEnhancement(ContentItem ci, EnhancementEngine engine){ return createTopicEnhancement(ci.getMetadata(), engine, new IRI(ci.getUri().getUnicodeString())); } /**
/** * Create a new instance with the types enhancer:Enhancement and * enhancer:EntityAnnotation in the metadata-graph of the content * item along with default properties (dc:creator and dc:created) and return * the IRI of the extraction so that engines can further add * * @param ci the ContentItem being under analysis * @param engine the Engine performing the analysis * @return the URI of the new enhancement instance */ public static IRI createEntityEnhancement(ContentItem ci, EnhancementEngine engine){ return createEntityEnhancement(ci.getMetadata(), engine, new IRI(ci.getUri().getUnicodeString())); } /**
/** * Create a new instance with the types enhancer:Enhancement and * enhancer:TopicAnnotation in the metadata-graph of the content * item along with default properties (dc:creator and dc:created) and return * the IRI of the extraction so that engines can further add * * @param ci the ContentItem being under analysis * @param engine the Engine performing the analysis * @return the URI of the new enhancement instance */ public static IRI createTopicEnhancement(ContentItem ci, EnhancementEngine engine){ return createTopicEnhancement(ci.getMetadata(), engine, new IRI(ci.getUri().getUnicodeString())); } /**
/** * Create a new instance with the types enhancer:Enhancement and * enhancer:TextAnnotation in the metadata-graph of the content * item along with default properties (dc:creator and dc:created) and return * the IRI of the extraction so that engines can further add. * * @param ci the ContentItem being under analysis * @param engine the Engine performing the analysis * * @return the URI of the new enhancement instance */ public static IRI createTextEnhancement(ContentItem ci, EnhancementEngine engine){ return createTextEnhancement(ci.getMetadata(), engine, new IRI(ci.getUri().getUnicodeString())); } /**
/** * Enhances the parsed ContentItem * @param ci the content item to enhance * @param reqProp the request properties or <code>null</code> if none * @throws EnhancementException */ protected void enhance(ContentItem ci, Map<String,Object> reqProp) throws EnhancementException { if (jobManager != null) { jobManager.enhanceContent(ci, getChain()); } Graph graph = ci.getMetadata(); Boolean includeExecutionMetadata = RequestPropertiesHelper.isIncludeExecutionMetadata(reqProp); if (includeExecutionMetadata != null && includeExecutionMetadata.booleanValue()) { try { graph.addAll(ci.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class)); } catch (NoSuchPartException e) { // no executionMetadata available } } }
/** * Create a new instance with the types enhancer:Enhancement and * enhancer:TextAnnotation in the metadata-graph of the content * item along with default properties (dc:creator and dc:created) and return * the IRI of the extraction so that engines can further add. * * @param ci the ContentItem being under analysis * @param engine the Engine performing the analysis * * @return the URI of the new enhancement instance */ public static IRI createTextEnhancement(ContentItem ci, EnhancementEngine engine){ return createTextEnhancement(ci.getMetadata(), engine, new IRI(ci.getUri().getUnicodeString())); } /**
/** * Enhances the parsed ContentItem * @param ci the content item to enhance * @param reqProp the request properties or <code>null</code> if none * @throws EnhancementException */ protected void enhance(ContentItem ci, Map<String,Object> reqProp) throws EnhancementException { if (jobManager != null) { jobManager.enhanceContent(ci, getChain()); } Graph graph = ci.getMetadata(); Boolean includeExecutionMetadata = RequestPropertiesHelper.isIncludeExecutionMetadata(reqProp); if (includeExecutionMetadata != null && includeExecutionMetadata.booleanValue()) { try { graph.addAll(ci.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class)); } catch (NoSuchPartException e) { // no executionMetadata available } } }
private void updateEnhancementGraph(ContentItem ci) throws StoreException { MGraph enhancementGraph = getEnhancementGraph(); // Delete old enhancements which belong to this content item from the // global enhancements graph. removeEnhancements(ci.getUri().getUnicodeString()); // Add new enhancements of this content item to the global enhancements // graph. enhancementGraph.addAll(ci.getMetadata()); }
/** * Tests if triples contained in parsed Metadata are also present within * the {@link ContentItem#getMetadata()} graph */ @Test public void testParsedMetadata() throws IOException { ContentItem ci = contentItemFactory.createContentItem(TEST_CR, METADATA); assertNotNull(ci); assertEquals("The created ContentItem MUST contain parsed metadata", METADATA.size(), ci.getMetadata().size()); ci = contentItemFactory.createContentItem(ID,TEST_CS, METADATA); assertNotNull(ci); assertEquals("The created ContentItem MUST contain parsed metadata", METADATA.size(), ci.getMetadata().size()); ci = contentItemFactory.createContentItem(PREFIX,TEST_CS, METADATA); assertNotNull(ci); assertEquals("The created ContentItem MUST contain parsed metadata", METADATA.size(), ci.getMetadata().size()); }
/** * Tests if triples contained in parsed Metadata are also present within * the {@link ContentItem#getMetadata()} graph */ @Test public void testParsedMetadata() throws IOException { ContentItem ci = contentItemFactory.createContentItem(TEST_CR, METADATA); assertNotNull(ci); assertEquals("The created ContentItem MUST contain parsed metadata", METADATA.size(), ci.getMetadata().size()); ci = contentItemFactory.createContentItem(ID,TEST_CS, METADATA); assertNotNull(ci); assertEquals("The created ContentItem MUST contain parsed metadata", METADATA.size(), ci.getMetadata().size()); ci = contentItemFactory.createContentItem(PREFIX,TEST_CS, METADATA); assertNotNull(ci); assertEquals("The created ContentItem MUST contain parsed metadata", METADATA.size(), ci.getMetadata().size()); }
private void addSolrSpecificFields(ContentItem ci, SolrInputDocument doc) { String title = null; try { title = ci.getPart(TITLE_URI, String.class); } catch (NoSuchPartException e) { title = ci.getUri().getUnicodeString(); } doc.addField(SolrFieldName.TITLE.toString(), title); if (ci.getMetadata() != null) { addSemanticFields(ci, doc); addAnnotatedEntityFieldNames(ci, doc); } else { log.debug("There are no enhancements for the content item {}", ci.getUri().getUnicodeString()); } }
/** * Creates a fise:TextAnnotation for the explicitly parsed Content-Language * header. The confidence of this annotation is set <code>1.0</code> (see * <a href="https://issues.apache.org/jira/browse/STANBOL-1417">STANBOL-1417</a>). * @param ci the {@link ContentItem} to the the language annotation * @param lang the parsed language */ private void createParsedLanguageAnnotation(ContentItem ci, String lang){ Graph m = ci.getMetadata(); IRI la = new IRI("urn:enhancement-"+ EnhancementEngineHelper.randomUUID()); //add the fise:Enhancement information m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_ENHANCEMENT)); m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_TEXTANNOTATION)); m.add(new TripleImpl(la, ENHANCER_EXTRACTED_FROM, ci.getUri())); m.add(new TripleImpl(la, DC_CREATED, lf.createTypedLiteral(new Date()))); m.add(new TripleImpl(la, DC_CREATOR, lf.createTypedLiteral("Content-Language Header of the request"))); //add fise:TextAnnotation information as expected by a Language annotation. m.add(new TripleImpl(la, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM)); m.add(new TripleImpl(la, DC_LANGUAGE, new PlainLiteralImpl(lang))); //we set the confidence to 1.0^^xsd:double m.add(new TripleImpl(la, ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(1.0f)))); } /**
/** * Creates a fise:TextAnnotation for the explicitly parsed Content-Language * header. The confidence of this annotation is set <code>1.0</code> (see * <a href="https://issues.apache.org/jira/browse/STANBOL-1417">STANBOL-1417</a>). * @param ci the {@link ContentItem} to the the language annotation * @param lang the parsed language */ private void createParsedLanguageAnnotation(ContentItem ci, String lang){ Graph m = ci.getMetadata(); IRI la = new IRI("urn:enhancement-"+ EnhancementEngineHelper.randomUUID()); //add the fise:Enhancement information m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_ENHANCEMENT)); m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_TEXTANNOTATION)); m.add(new TripleImpl(la, ENHANCER_EXTRACTED_FROM, ci.getUri())); m.add(new TripleImpl(la, DC_CREATED, lf.createTypedLiteral(new Date()))); m.add(new TripleImpl(la, DC_CREATOR, lf.createTypedLiteral("Content-Language Header of the request"))); //add fise:TextAnnotation information as expected by a Language annotation. m.add(new TripleImpl(la, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM)); m.add(new TripleImpl(la, DC_LANGUAGE, new PlainLiteralImpl(lang))); //we set the confidence to 1.0^^xsd:double m.add(new TripleImpl(la, ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(1.0f)))); } /**