public void run() throws UIMAException, IOException { sender.process(getIn()); reader.getNext(getOut()); }
@Test public void testSubDirectoriesNonRecursive() throws Exception { File subdir = new File(inputDir, "subdir"); subdir.mkdir(); File f1 = new File(subdir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, TEST2_FILE); f2.createNewFile(); BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}, FolderReader.PARAM_RECURSIVE, false); assertTrue(bcr.hasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f2.getPath(), getSource(jCas)); jCas.reset(); File f3 = new File(inputDir, TEST3_FILE); f3.createNewFile(); Thread.sleep(TIMEOUT); assertTrue(bcr.hasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f3.getPath(), getSource(jCas)); bcr.close(); f1.delete(); f2.delete(); subdir.delete(); }
@Test public void testDocumentText() throws Exception { BaleenCollectionReader bcr = getCollectionReader(MimeReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertTrue(jCas.getDocumentText().startsWith("I'm going to eat you!")); bcr.close(); } }
@Test public void testDocumentText() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertTrue(jCas.getDocumentText().contains("This is the text of the article.")); bcr.close(); }
@Test public void testCreateFileDefaultDirectory() throws Exception { BaleenCollectionReader bcr = getCollectionReader(); assertTrue( bcr.doHasNext()); // There will be files in the current directory, so we can just check // that it's picked them up. bcr.getNext(jCas.getCas()); assertTrue(getSource(jCas).contains(System.getProperty("user.dir"))); bcr.close(); }
@Test public void testModifiedFile() throws Exception { BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}, FolderReader.PARAM_REPROCESS_ON_MODIFY, true); assertFalse(bcr.doHasNext()); File f = new File(inputDir, TEXT1_FILE); f.createNewFile(); // Wait for file to be written and change detected Thread.sleep(TIMEOUT); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f.getPath(), getSource(jCas)); jCas.reset(); // Modify file Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f))); writer.write("Test"); writer.close(); Thread.sleep(TIMEOUT); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f.getPath(), getSource(jCas)); assertEquals("Test", jCas.getDocumentText().trim()); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testCreateFile() throws Exception { BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}); assertFalse(bcr.doHasNext()); File f = new File(inputDir, TEXT1_FILE); f.createNewFile(); // Wait for file to be written and change detected Thread.sleep(TIMEOUT); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f.getPath(), getSource(jCas)); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testDocumentMetadata() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<DocumentAnnotation> annotationIndex = jCas.getAnnotationIndex(DocumentAnnotation.class); assertTrue(contains(annotationIndex, d -> d.getSourceUri().equals(SOURCE_URL))); bcr.close(); }
private void assertNextSourceNotNull(BaleenCollectionReader bcr) throws Exception { assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertNotNull(getSource(jCas)); jCas.reset(); }
@Test public void test() throws Exception { BaleenCollectionReader bcr = getCollectionReader(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFalse(jCas.getDocumentText().isEmpty()); assertFalse(bcr.doHasNext()); bcr.close(); } }
@Test public void testRelations() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RELATIONSHIPS, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Entity> entityIndex = jCas.getAnnotationIndex(Entity.class); Entity source = Streams.stream(entityIndex).filter(e -> e.getBegin() == 12).findAny().get(); Entity target = Streams.stream(entityIndex).filter(e -> e.getBegin() == 9).findAny().get(); AnnotationIndex<Relation> relationIndex = jCas.getAnnotationIndex(Relation.class); contains(relationIndex, r -> r.getBegin() == 11); contains(relationIndex, r -> r.getEnd() == 12); contains(relationIndex, r -> r.getValue().equals("")); contains(relationIndex, r -> r.getRelationshipType().equals("HasAttrOf")); contains(relationIndex, r -> r.getSource().equals(source)); contains(relationIndex, r -> r.getTarget().equals(target)); bcr.close(); }
@Test public void test() throws UIMAException, IOException { BaleenCollectionReader bcr = getCollectionReader(MucReader.KEY_PATH, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); String s = "THE ARCE BATTALION COMMAND HAS REPORTED THAT ABOUT 50 " + "PEASANTS OF VARIOUS AGES HAVE BEEN KIDNAPPED BY TERRORISTS OF THE " + "FARABUNDO MARTI NATIONAL LIBERATION FRONT IN SAN MIGUEL " + "DEPARTMENT. ACCORDING TO THAT GARRISON, THE MASS KIDNAPPING TOOK PLACE ON " + "30 DECEMBER IN SAN LUIS DE LA REINA. THE SOURCE ADDED THAT THE TERRORISTS " + "FORCED THE INDIVIDUALS, WHO WERE TAKEN TO AN UNKNOWN LOCATION, OUT OF " + "THEIR RESIDENCES, PRESUMABLY TO INCORPORATE THEM AGAINST THEIR WILL INTO " + "CLANDESTINE GROUPS."; s = s.toLowerCase(); assertEquals(s, jCas.getDocumentText()); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void test() throws IOException, UIMAException { BaleenCollectionReader bcr = getCollectionReader(ReutersReader.KEY_PATH, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); // assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); assertEquals("Some example\ntext.", jCas.getDocumentText()); jCas.reset(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); // assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); assertEquals("Another example", jCas.getDocumentText()); assertFalse(bcr.doHasNext()); bcr.close(); } }
@Test public void testMetadata() throws Exception { BaleenCollectionReader bcr = getCollectionReader(MimeReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Metadata> annotationIndex = jCas.getAnnotationIndex(Metadata.class); // returns the X-From and not the From, these can differ assertEquals(X_FROM, get(annotationIndex, "email.X-From")); assertEquals(TO, get(annotationIndex, "email.X-To")); assertEquals(MESSAGE_ID, get(annotationIndex, "email.Message-ID")); bcr.close(); }
@Test public void testEntities() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Entity> annotationIndex = jCas.getAnnotationIndex(Entity.class); contains(annotationIndex, e -> e.getValue().equals("Location")); contains(annotationIndex, e -> e.getBegin() == 100); contains(annotationIndex, e -> e.getEnd() == 118); contains(annotationIndex, e -> e.getValue().equals("Organisation Ltd")); contains(annotationIndex, e -> e.getBegin() == 12); contains(annotationIndex, e -> e.getEnd() == 18); bcr.close(); }
bcr.getNext(jCas.getCas()); assertEquals("This is the first line", jCas.getDocumentText()); assertEquals(2, JCasUtil.select(jCas, Metadata.class).size()); bcr.getNext(jCas.getCas()); assertEquals("This is the second line", jCas.getDocumentText()); assertEquals(2, JCasUtil.select(jCas, Metadata.class).size()); bcr.getNext(jCas.getCas()); assertEquals("This is the fourth line, but the third one we pick out", jCas.getDocumentText()); md = JCasUtil.selectByIndex(jCas, Metadata.class, 1); bcr.getNext(jCas.getCas()); assertEquals( "This is the sixth line, but the fourth and final one we pick out", jCas.getDocumentText());
@Test public void test() throws Exception { setup(); BaleenCollectionReader bcr = getCollectionReader( SqlRowReader.PARAM_SQL_CONNECTION, "jdbc:h2:mem:test", SqlRowReader.PARAM_SQL_TABLE, "my_table", SqlRowReader.PARAM_SQL_IGNORE, "ignore_me", SqlRowReader.PARAM_TEXT_COLUMNS, new String[] {"text_column1", "text_column2"}); assertTrue(bcr.hasNext()); bcr.getNext(jCas); testJcas(jCas, 1); jCas.reset(); assertFalse(bcr.hasNext()); insert(); assertTrue(bcr.hasNext()); bcr.getNext(jCas); testJcas(jCas, 2); assertFalse(bcr.hasNext()); teardown(); }
@Test public void testPopLongWait() throws Exception { String subject = GreenMailUtil.random(); String body = GreenMailUtil.random(); String subject2 = GreenMailUtil.random(); String body2 = GreenMailUtil.random(); GreenMailUtil.sendTextEmailTest("to@localhost.com", "from@localhost.com", subject, body); GreenMailUtil.sendTextEmailTest("to@localhost.com", "from@localhost.com", subject2, body2); BaleenCollectionReader bcr = getCollectionReader( EmailReader.PARAM_PROTOCOL, "pop3", EmailReader.PARAM_WAIT, 15, EmailReader.PARAM_SERVER, greenMail.getPop3().getBindTo(), EmailReader.PARAM_PORT, greenMail.getPop3().getPort(), EmailReader.PARAM_USER, "to@localhost.com", EmailReader.PARAM_PASS, "password", EmailReader.PARAM_PROCESS, "content"); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas); jCas.reset(); Thread.sleep(20000); assertTrue(bcr.doHasNext()); bcr.getNext(jCas); jCas.reset(); assertFalse(bcr.doHasNext()); bcr.close(); }