/** * Gets the unique id for a document (if contentHashAsId is true then as hash of the content is * used, otherwise a hash of the source URI is used). * * @param jCas the JCas * @return the unique id */ private String getUniqueId(JCas jCas) { return ConsumerUtils.getExternalId(getDocumentAnnotation(jCas), contentHashAsId); } }
protected String getUniqueId(JCas jCas) { return ConsumerUtils.getExternalId(getDocumentAnnotation(jCas), contentHashAsId); }
/** * Gets the unique id for a document (if contentHashAsId is true then as hash of the content is * used, otherwise a hash of the source URI is used). * * @param jCas the JCas * @return the unique id */ private String getUniqueId(JCas jCas) { return ConsumerUtils.getExternalId(getDocumentAnnotation(jCas), contentHashAsId); } }
protected String getUniqueId(JCas jCas) { return ConsumerUtils.getExternalId(getDocumentAnnotation(jCas), contentHashAsId); }
private String getUniqueId(JCas jCas) { return ConsumerUtils.getExternalId(getDocumentAnnotation(jCas), contentHashAsId); }
private String getDocumentId(JCas jCas) { return ConsumerUtils.getExternalId( UimaSupport.getDocumentAnnotation(jCas), options.isContentHashAsId()); }
private String getDocumentId(JCas jCas) { return ConsumerUtils.getExternalId( UimaSupport.getDocumentAnnotation(jCas), options.isContentHashAsId()); }
return new Document() .append(FIELD_DOCUMENT_ID, documentId) .append(fields.getExternalId(), ConsumerUtils.getExternalId(e.getValue())) .append(FIELD_LINKING, referenceTarget.getLinking()) .append(
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); String docId = ConsumerUtils.getExternalId(da, contentHashAsId);
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); String docId = ConsumerUtils.getExternalId(da, contentHashAsId);
private Integer executeDocInsert(JCas jCas) throws SQLException, BaleenException { DocumentAnnotation da = getDocumentAnnotation(jCas); String documentId = ConsumerUtils.getExternalId(da, contentHashAsId); insertDocStatement.clearParameters(); insertDocStatement.setString(1, documentId); insertDocStatement.setString(2, da.getDocType()); insertDocStatement.setString(3, da.getSourceUri()); insertDocStatement.setString(4, jCas.getDocumentText()); insertDocStatement.setString(5, jCas.getDocumentLanguage()); insertDocStatement.setTimestamp(6, new Timestamp(da.getTimestamp())); insertDocStatement.setString(7, da.getDocumentClassification()); insertDocStatement.setArray( 8, createVarcharArray(postgresResource.getConnection(), da.getDocumentCaveats())); insertDocStatement.setArray( 9, createVarcharArray(postgresResource.getConnection(), da.getDocumentReleasability())); insertDocStatement.executeUpdate(); Integer docKey = getKey(insertDocStatement); if (docKey == null) { throw new BaleenException("No document key returned"); } return docKey; }
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { DocumentAnnotation documentAnnotation = getDocumentAnnotation(jCas); String url = documentAnnotation.getSourceUri(); if (Strings.isNullOrEmpty(url)) { url = ConsumerUtils.getExternalId(documentAnnotation, false); } String extension = getExtension(); if (!Strings.isNullOrEmpty(extension)) { url = url + "." + extension; } File file = SourceUtils.urlToFile(basePath, url); try { getMonitor().debug("Writing {} to {}", url, file.getAbsolutePath()); writeToFile(jCas, file); } catch (Exception e) { getMonitor().warn("Failed to write file {}, deleting", file.getAbsolutePath(), e); if (!file.delete()) { getMonitor().warn("Failed to delete file {}", file.getAbsolutePath(), e); } } }
@Test public void testEntityExternalId() throws UIMAException, BaleenException { JCas jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText("Hello World"); Person p1 = new Person(jCas); p1.setGender("female"); p1.setValue("Jane Doe"); p1.addToIndexes(jCas); Person p2 = new Person(jCas); p2.setGender("female"); p2.setValue("J. Doe"); p2.addToIndexes(jCas); assertEquals( ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2)), ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2))); assertEquals( "d3c514ea1fb3367430959255917ee4de12468004897d683d60114b475d37264a", ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2))); assertNotEquals( ConsumerUtils.getExternalId(ImmutableSet.of(p1)), ConsumerUtils.getExternalId(ImmutableSet.of(p1, p2))); } }
@Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { String documentId = ConsumerUtils.getExternalId(getDocumentAnnotation(jCas), contentHashAsId); // Delete any existing content in the database deleteAllContent(documentId); // Save try { saveEvents(documentId, jCas, textClass); } catch (MongoException | BsonSerializationException e) { getMonitor() .error( "Unable to persist relations to database - document {} will contain no relations", getDocumentAnnotation(jCas).getSourceUri(), e); } }
@Test public void testExternalId() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); jCas.setDocumentText("Hello World"); DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); assertEquals( "a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e", ConsumerUtils.getExternalId(da, true)); da.setSourceUri("http://www.example.com/test.html"); assertEquals( "b2e870534ee6fc1abc14feac22dcfd0b268460ac4205d9c3f68a000aab685f4f", ConsumerUtils.getExternalId(da, false)); }
@SuppressWarnings("unchecked") private void assertMetadata(JCas jCas, Map<String, Object> variables) { DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); String documentId = ConsumerUtils.getExternalId(da, false); assertEquals(da.getDocType(), variables.get(FIELD_DOCUMENT_TYPE)); assertEquals(da.getSourceUri(), variables.get(FIELD_DOCUMENT_SOURCE)); assertEquals(da.getLanguage(), variables.get(FIELD_DOCUMENT_LANGUAGE)); assertEquals(new Date(da.getTimestamp()), variables.get(FIELD_DOCUMENT_TIMESTAMP)); assertEquals(da.getDocumentClassification(), variables.get(FIELD_DOCUMENT_CLASSIFICATION)); assertEquals( UimaTypesUtils.toList(da.getDocumentCaveats()), variables.get(FIELD_DOCUMENT_CAVEATS)); assertFalse(variables.containsKey(FIELD_DOCUMENT_RELEASABILITY)); Map<String, String> publishedId = ((List<Map<String, String>>) variables.get(FIELD_PUBLISHEDIDS)).get(0); assertEquals("12", publishedId.get(FIELD_PUBLISHEDIDS_ID)); assertEquals("test", publishedId.get(FIELD_PUBLISHEDIDS_TYPE)); Map<String, Collection<Object>> meta = (Map<String, Collection<Object>>) variables.get(FIELD_METADATA); assertTrue(meta.get("test").contains("1")); assertTrue(meta.get("test").contains("2")); assertEquals(2, meta.get("test").size()); assertNull(variables.get(FIELD_CONTENT)); assertEquals(documentId, variables.get("externalId")); }
final BaleenRelation br = new BaleenRelation(); final String baleenExternalId = ConsumerUtils.getExternalId(relation); final String externalId = idGenerator.generateForExternalId(baleenExternalId);
@Test public void testDocumentGraphConsumerCanCopeWithSameReferenceTargetExternalId() throws AnalysisEngineProcessException, ResourceInitializationException, IOException, URISyntaxException { properties.setProperty( TinkerGraph.GREMLIN_TINKERGRAPH_DEFAULT_VERTEX_PROPERTY_CARDINALITY, VertexProperty.Cardinality.list.name()); writeProperties(); Person p1 = Annotations.createPerson(jCas, 0, 4, "test"); Person p2 = Annotations.createPerson(jCas, 0, 4, "test"); Annotations.createReferenceTarget(jCas, p1, p2); Annotations.createReferenceTarget(jCas, p1, p2); String externalId = ConsumerUtils.getExternalId(ImmutableList.of(p1, p2)); processJCas( DocumentGraphConsumer.PARAM_GRAPH_CONFIG, propertiesFile.getAbsolutePath(), DocumentGraphConsumer.PARAM_OUTPUT_RELATIONS_AS_LINKS, true, DocumentGraphConsumer.PARAM_OUTPUT_REFERENTS, true); Graph graph = GraphFactory.open(propertiesFile.getAbsolutePath()); assertTrue(graph.traversal().V(externalId).hasNext()); }
@SuppressWarnings("unchecked") @Test public void testReferenceTargets() throws AnalysisEngineProcessException { jCas.setDocumentText("Bill went to London. William came back."); String link = "http://test"; Person p = Annotations.createPerson(jCas, 0, 4, "Bill"); Person q = Annotations.createPerson(jCas, 21, 28, NAME_2); ReferenceTarget referenceTarget = Annotations.createReferenceTarget(jCas, p, q); referenceTarget.setLinking(link); ae.process(jCas); assertEquals(1, documents.count()); assertEquals(1, entities.count()); Document a = entities.find().first(); assertEquals(2, ((List<Object>) a.get(Mongo.FIELD_ENTITIES)).size()); assertEquals(2, ((List<Object>) a.get(Mongo.FIELD_ENTITIES)).size()); assertEquals(link, a.getString(Mongo.FIELD_LINKING)); assertEquals( ConsumerUtils.getExternalId(ImmutableList.of(p, q)), a.getString(fields.getExternalId())); }
@Test public void testDocumentGraphWithDocument() throws UIMAException { DocumentGraphOptions options = DocumentGraphOptions.builder().withDocument(true).build(); DocumentGraphFactory factory = createfactory(options); JCas jCas = JCasFactory.createJCas(); JCasTestGraphUtil.populateJcas(jCas); Graph graph = factory.create(jCas); assertEquals(1, graph.traversal().V().hasLabel(DOCUMENT).count().next().intValue()); assertEquals(10, graph.traversal().E().hasLabel(MENTION_IN).count().next().intValue()); assertEquals(2, graph.traversal().V().hasLabel(RELATION).count().next().intValue()); assertEquals(2, graph.traversal().E().hasLabel(SOURCE).count().next().intValue()); assertEquals(2, graph.traversal().E().hasLabel(TARGET).count().next().intValue()); assertEquals(3, graph.traversal().V().hasLabel(REFERENCE_TARGET).count().next().intValue()); assertEquals(1, graph.traversal().V().hasLabel(EVENT).count().next().intValue()); assertEquals(4, graph.traversal().V().hasLabel(MENTION).count().next().intValue()); assertEquals(4, graph.traversal().E().hasLabel(MENTION_OF).count().next().intValue()); assertEquals(0, graph.traversal().E().hasLabel(RELATION).count().next().intValue()); assertEquals(2, graph.traversal().E().hasLabel(PARTICIPANT_IN).count().next().intValue()); assertEquals(11, IteratorUtils.count(graph.vertices())); assertEquals(20, IteratorUtils.count(graph.edges())); DocumentAnnotation da = UimaSupport.getDocumentAnnotation(jCas); String documentId = ConsumerUtils.getExternalId(da, false); Vertex documentVert = graph.traversal().V(documentId).next(); Map<String, Object> properties = new HashMap<>(); documentVert.properties().forEachRemaining(vp -> properties.put(vp.key(), vp.value())); assertMetadata(jCas, properties); }