@Test public void testOCRLanguageConfig() throws Exception { if (! new TesseractOCRParser().hasTesseract(new TesseractOCRConfig())) { return; } Response response = WebClient.create(endPoint + TIKA_PATH) .accept("text/plain") .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "ocr_only") .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX+"Language", "eng+fra") .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX+"MinFileSizeToOcr", "10") .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX+"MaxFileSizeToOcr", "1000000000") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); String responseMsg = getStringFromInputStream((InputStream) response .getEntity()); assertContains("Happy New Year 2003!", responseMsg); }
@Test public void testText() throws Exception { Response response = WebClient.create(endPoint + ALL_PATH) .header(CONTENT_TYPE, APPLICATION_XML) .accept("application/zip") .put(ClassLoader.getSystemResourceAsStream("test.doc")); String responseMsg = readArchiveText((InputStream) response.getEntity()); assertNotNull(responseMsg); assertTrue(responseMsg.contains("test")); }
@Test public void testFloatInHeader() { Response response = WebClient.create(endPoint + TIKA_PATH) .type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX + "averageCharTolerance", "2.0") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); assertEquals(200, response.getStatus()); }
@Test public void testTrustedMethodPrevention() { Response response = WebClient.create(endPoint + TIKA_PATH) .type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX + "trustedPageSeparator", "\u0020") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); assertEquals(500, response.getStatus()); }
@Test public void testEncrypted() throws Exception { for (String path : PATHS) { Response response = WebClient .create(endPoint + path) .accept("*/*") .header("Content-Disposition", "attachment; filename=" + TEST_PASSWORD_PROTECTED) .put(ClassLoader.getSystemResourceAsStream(TEST_PASSWORD_PROTECTED)); assertNotNull("null response: " + path, response); assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus()); String msg = getStringFromInputStream((InputStream) response .getEntity()); assertEquals("should be empty: " + path, "", msg); } }
@Test public void testEncrypted() throws Exception { for (String path : PATHS) { Response response = WebClient .create(endPoint + path) .accept("*/*") .header("Content-Disposition", "attachment; filename=" + TEST_PASSWORD_PROTECTED) .put(ClassLoader.getSystemResourceAsStream(TEST_PASSWORD_PROTECTED)); assertNotNull("null response: " + path, response); assertEquals("unprocessable: " + path, UNPROCESSEABLE, response.getStatus()); String msg = getStringFromInputStream((InputStream) response .getEntity()); assertContains("org.apache.tika.exception.EncryptedDocumentException", msg); } }
@Test public void testDataIntegrityCheck() throws Exception { Response response = null; try { response = WebClient.create(endPoint + TIKA_PATH) .type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX + "tesseractPath", "C://tmp//hello.bat\u0000") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); assertEquals(400, response.getStatus()); } catch (ProcessingException e) { //can't tell why this intermittently happens. :( //started after the upgrade to 3.2.7 } response = WebClient.create(endPoint + TIKA_PATH) .type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_OCR_HEADER_PREFIX + "tesseractPath", "bogus path") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); assertEquals(200, response.getStatus()); }
@Test public void testDetectCsvWithExt() throws Exception { String url = endPoint + DETECT_STREAM_PATH; Response response = WebClient .create(endPoint + DETECT_STREAM_PATH) .type("text/csv") .accept("*/*") .header("Content-Disposition", "attachment; filename=" + FOO_CSV) .put(ClassLoader.getSystemResourceAsStream(FOO_CSV)); assertNotNull(response); String readMime = getStringFromInputStream((InputStream) response .getEntity()); assertEquals("text/csv", readMime); }
@Test public void testPDFOCRConfig() throws Exception { if (! new TesseractOCRParser().hasTesseract(new TesseractOCRConfig())) { return; } Response response = WebClient.create(endPoint + TIKA_PATH) .type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "no_ocr") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); String responseMsg = getStringFromInputStream((InputStream) response .getEntity()); assertTrue(responseMsg.trim().equals("")); response = WebClient.create(endPoint + TIKA_PATH) .type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"OcrStrategy", "ocr_only") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); responseMsg = getStringFromInputStream((InputStream) response .getEntity()); assertContains("Happy New Year 2003!", responseMsg); //now try a bad value response = WebClient.create(endPoint + TIKA_PATH) .type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX + "OcrStrategy", "non-sense-value") .put(ClassLoader.getSystemResourceAsStream("testOCR.pdf")); assertEquals(500, response.getStatus()); }
.type("application/pdf") .accept("text/plain") .header(TikaResource.X_TIKA_PDF_HEADER_PREFIX+"sortByPosition", "false") .put(ClassLoader.getSystemResourceAsStream("testPDFTwoTextBoxes.pdf")); responseMsg = getStringFromInputStream((InputStream) response
@Test public void testPasswordProtected() throws Exception { Response response = WebClient .create(endPoint + META_PATH) .type("application/vnd.ms-excel") .accept("application/json") .put(ClassLoader .getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED)); // Won't work, no password given assertEquals(500, response.getStatus()); // Try again, this time with the password response = WebClient .create(endPoint + META_PATH) .type("application/vnd.ms-excel") .accept("application/json") .header("Password", "password") .put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED)); // Will work assertEquals(200, response.getStatus()); // Check results Reader reader = new InputStreamReader((InputStream) response.getEntity(), UTF_8); List<Metadata> metadataList = JsonMetadataList.fromJson(reader); assertNotNull(metadataList.get(0).get(TikaCoreProperties.CREATOR)); assertEquals("pavel", metadataList.get(0).get(TikaCoreProperties.CREATOR)); }
@Test public void testDetectCsvNoExt() throws Exception { String url = endPoint + DETECT_STREAM_PATH; Response response = WebClient .create(endPoint + DETECT_STREAM_PATH) .type("text/csv") .accept("*/*") .header("Content-Disposition", "attachment; filename=" + CDEC_CSV_NO_EXT) .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); assertNotNull(response); String readMime = getStringFromInputStream((InputStream) response .getEntity()); assertEquals("text/plain", readMime); // now trick it by adding .csv to the end response = WebClient .create(endPoint + DETECT_STREAM_PATH) .type("text/csv") .accept("*/*") .header("Content-Disposition", "attachment; filename=" + CDEC_CSV_NO_EXT + ".csv") .put(ClassLoader.getSystemResourceAsStream(CDEC_CSV_NO_EXT)); assertNotNull(response); readMime = getStringFromInputStream((InputStream) response.getEntity()); assertEquals("text/csv", readMime); } }
.type("application/vnd.ms-excel") .accept("text/csv") .header("Password", "wrong password") .put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED)); .type("application/vnd.ms-excel") .accept("text/csv") .header("Password", "password") .put(ClassLoader.getSystemResourceAsStream(TikaResourceTest.TEST_PASSWORD_PROTECTED));
private void doSetHeader(RuntimeDelegate rd, String name, Object value) { HeaderDelegate<Object> hd = HttpUtils.getHeaderDelegate(rd, value); if (hd != null) { value = hd.toString(value); } webClient.header(name, value); }
@org.junit.Test public void testServiceWithFakeToken() throws Exception { URL busFile = OAuth2FiltersTest.class.getResource("client.xml"); // Now invoke on the service with the faked access token String address = "https://localhost:" + PORT + "/secured/bookstore/books"; WebClient client = WebClient.create(address, OAuth2TestUtils.setupProviders(), busFile.toString()); client.header("Authorization", "Bearer " + UUID.randomUUID().toString()); Response response = client.post(new Book("book", 123L)); assertNotEquals(response.getStatus(), 200); }
@Test public void testGetBookByHeaderPerRequestConstructorFault() throws Exception { String address = "http://localhost:" + PORT + "/bookstore2/bookheaders"; WebClient wc = WebClient.create(address); wc.accept("application/xml"); wc.header("BOOK", "1", "2", "4"); Response r = wc.get(); assertEquals(400, r.getStatus()); assertEquals("Constructor: Header value 3 is required", r.readEntity(String.class)); }
@Test public void testGetBook123Fail() throws Exception { WebClient wc = WebClient.create("http://localhost:" + PORT + "/bookstore/books/text/xml/123"); wc.accept("text/xml"); wc.header("fail-write", "yes"); Response r = wc.get(); assertEquals(500, r.getStatus()); }
@Test public void testGetBookByHeaderPerRequestInjected() throws Exception { String address = "http://localhost:" + PORT + "/bookstore2/bookheaders/injected"; WebClient wc = WebClient.create(address); wc.accept("application/xml"); wc.header("BOOK", "1", "2", "3"); Book b = wc.get(Book.class); assertEquals(123L, b.getId()); }
@Test public void testReplaceBookMistypedCTAndHttpVerb() throws Exception { String endpointAddress = "http://localhost:" + PORT + "/bookstore/books2/mistyped"; WebClient wc = WebClient.create(endpointAddress, Collections.singletonList(new ReplaceBodyFilter())); wc.accept("text/mistypedxml").type("text/xml").header("THEMETHOD", "PUT"); Book book = wc.invoke("DELETE", new Book("book", 555L), Book.class); assertEquals(561L, book.getId()); } @Test
@Test public void testGetBookLowCaseHeader() throws Exception { WebClient wc = WebClient.create("http://localhost:" + PORT + "/bookstore/booksecho3"); wc.type("text/plain").accept("text/plain").header("CustomHeader", "custom"); String name = wc.post("book", String.class); assertEquals("book", name); assertEquals("custom", wc.getResponse().getHeaderString("CustomHeader")); } @Test