/** * Override of the UIMA hasNext() method with logic to continuously check for new documents until * one is found. This prevents the collection reader from exiting (unless asked to), and so * creates a persistent collection reader and pipeline. */ @Override public final boolean hasNext() throws IOException, CollectionException { monitor.startFunctionTrace("hasNext"); boolean next = doHasNext(); monitor.finishFunctionTrace("hasNext"); return next; }
/** * Override of the UIMA hasNext() method with logic to continuously check for new documents until * one is found. This prevents the collection reader from exiting (unless asked to), and so * creates a persistent collection reader and pipeline. */ @Override public final boolean hasNext() throws IOException, CollectionException { monitor.startFunctionTrace("hasNext"); boolean next = doHasNext(); monitor.finishFunctionTrace("hasNext"); return next; }
@Test public void testCreateDirectoryNotProcessed() throws Exception { BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}); assertFalse(bcr.doHasNext()); File folder = new File(inputDir, DIR); folder.mkdir(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); folder.delete(); }
@Test public void testCreateDirectoryIsWatched() throws Exception { BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}); assertFalse(bcr.doHasNext()); File folder = new File(inputDir, DIR); folder.mkdir(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); File f11 = new File(folder, TEXT1_FILE); f11.createNewFile(); Thread.sleep(TIMEOUT); assertNextSourceNotNull(bcr); f11.delete(); folder.delete(); }
@Test public void testCreateDirectoryIsNotWatchedIfNotRecursive() throws Exception { BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_RECURSIVE, false, FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}); assertFalse(bcr.doHasNext()); File folder = new File(inputDir, DIR); folder.mkdir(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); File f11 = new File(folder, TEXT1_FILE); f11.createNewFile(); Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); f11.delete(); folder.delete(); }
@Test public void testDeleteFile() throws Exception { BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}); assertFalse(bcr.doHasNext()); File f = new File(inputDir, TEXT1_FILE); f.createNewFile(); // Wait for file to be written and change detected Thread.sleep(TIMEOUT); f.delete(); // Wait for file to be written and change detected Thread.sleep(TIMEOUT); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testExistingFiles() throws Exception { File f1 = new File(inputDir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, TEST2_FILE); f2.createNewFile(); BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testFilenameFilter2() throws Exception { File f1 = new File(inputDir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, "test2.log"); f2.createNewFile(); File f3 = new File(inputDir, "test3.TXT"); f3.createNewFile(); BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}, FolderReader.PARAM_ACCEPTED_PATTERNS, new String[] {".*[2-3].*"}); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testCreateFile() throws Exception { BaleenCollectionReader bcr = getCollectionReader(FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}); assertFalse(bcr.doHasNext()); File f = new File(inputDir, TEXT1_FILE); f.createNewFile(); // Wait for file to be written and change detected Thread.sleep(TIMEOUT); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertFilesEquals(f.getPath(), getSource(jCas)); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testFilenameFilter1() throws Exception { File f1 = new File(inputDir, TEXT1_FILE); f1.createNewFile(); File f2 = new File(inputDir, "test2.log"); f2.createNewFile(); File f3 = new File(inputDir, "test3.TXT"); f3.createNewFile(); BaleenCollectionReader bcr = getCollectionReader( FolderReader.PARAM_FOLDERS, new String[] {inputDir.getPath()}, FolderReader.PARAM_ACCEPTED_PATTERNS, new String[] {".*\\.txt"}); assertNextSourceNotNull(bcr); assertNextSourceNotNull(bcr); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testDocumentText() throws Exception { BaleenCollectionReader bcr = getCollectionReader(MimeReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertTrue(jCas.getDocumentText().startsWith("I'm going to eat you!")); bcr.close(); } }
@Test public void testDocumentText() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertTrue(jCas.getDocumentText().contains("This is the text of the article.")); bcr.close(); }
@Test public void testCreateFileDefaultDirectory() throws Exception { BaleenCollectionReader bcr = getCollectionReader(); assertTrue( bcr.doHasNext()); // There will be files in the current directory, so we can just check // that it's picked them up. bcr.getNext(jCas.getCas()); assertTrue(getSource(jCas).contains(System.getProperty("user.dir"))); bcr.close(); }
@Test public void test() throws IOException, UIMAException { BaleenCollectionReader bcr = getCollectionReader(ReutersReader.KEY_PATH, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); // assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); assertEquals("Some example\ntext.", jCas.getDocumentText()); jCas.reset(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); // assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); assertEquals("Another example", jCas.getDocumentText()); assertFalse(bcr.doHasNext()); bcr.close(); } }
@Test public void testPopWait() throws Exception { BaleenCollectionReader bcr = getCollectionReader( EmailReader.PARAM_PROTOCOL, "pop3", EmailReader.PARAM_WAIT, 5, EmailReader.PARAM_SERVER, greenMail.getPop3().getBindTo(), EmailReader.PARAM_PORT, greenMail.getPop3().getPort(), EmailReader.PARAM_USER, "to@localhost.com", EmailReader.PARAM_PASS, "password", EmailReader.PARAM_PROCESS, "content"); bcr.initialize(); assertFalse(bcr.doHasNext()); GreenMailUtil.sendTextEmailTest( "to@localhost.com", "from@localhost.com", GreenMailUtil.random(), GreenMailUtil.random()); assertFalse(bcr.doHasNext()); // Should be a 5 second delay before it returns true Thread.sleep(5000); assertTrue(bcr.doHasNext()); bcr.close(); }
@Test public void testDocumentMetadata() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<DocumentAnnotation> annotationIndex = jCas.getAnnotationIndex(DocumentAnnotation.class); assertTrue(contains(annotationIndex, d -> d.getSourceUri().equals(SOURCE_URL))); bcr.close(); }
private void assertNextSourceNotNull(BaleenCollectionReader bcr) throws Exception { assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertNotNull(getSource(jCas)); jCas.reset(); }
@Test public void testMetadata() throws Exception { BaleenCollectionReader bcr = getCollectionReader(MimeReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Metadata> annotationIndex = jCas.getAnnotationIndex(Metadata.class); // returns the X-From and not the From, these can differ assertEquals(X_FROM, get(annotationIndex, "email.X-From")); assertEquals(TO, get(annotationIndex, "email.X-To")); assertEquals(MESSAGE_ID, get(annotationIndex, "email.Message-ID")); bcr.close(); }
@Test public void testPopNoMessages() throws Exception { BaleenCollectionReader bcr = getCollectionReader( EmailReader.PARAM_PROTOCOL, "pop3", EmailReader.PARAM_WAIT, 5, EmailReader.PARAM_SERVER, greenMail.getPop3().getBindTo(), EmailReader.PARAM_PORT, greenMail.getPop3().getPort(), EmailReader.PARAM_USER, "to@localhost.com", EmailReader.PARAM_PASS, "password"); bcr.initialize(); assertFalse(bcr.doHasNext()); bcr.close(); }