@Test public void testCsvConversion() throws IOException { PdfToExcelParameters params = getParams(); params.setOutputPrefix("[BASENAME]_[FILENUMBER]"); params.setCsvFormat(true); execute(params); testContext.assertTaskCompleted(); testContext.assertOutputSize(2) .assertOutputContainsFilenames("tabular-data_1.csv", "tabular-data_2.csv") .forRawOutput("tabular-data_1.csv", p -> { try { List<List<String>> contents = parseCsv(p.toFile()); assertThat(contents.size(), is(37)); assertThat(contents.get(0), is(Arrays.asList("OrderDate", "Region", "Rep", "Item", "Units", "Unit Cost", "Total"))); assertThat(contents.get(10), is(Arrays.asList("6/8/15", "East", "Jones", "Binder", "60", "8.99", "539.40"))); assertThat(contents.get(13), is(Arrays.asList("7/29/15", "East", "Parent", "Binder", "81", "19.99", "1,619.19"))); } catch (Exception e) { throw new RuntimeException(e); } }); }
@Test public void testMergedTables() throws IOException { PdfToExcelParameters params = getParams(); params.setMergeTablesSpanningMultiplePages(true); execute(params); testContext.assertTaskCompleted(); testContext.assertOutputSize(1).assertOutputContainsFilenames("tabular-data.xlsx").forEachRawOutput(p -> { try { InputStream in = new FileInputStream(p.toFile()); Workbook wb = WorkbookFactory.create(in); assertThat(wb.getNumberOfSheets(), is(1)); Sheet sheet = wb.getSheetAt(0); assertThat(sheet.getPhysicalNumberOfRows(), is(44)); assertThat(sheet.getSheetName(), is("Table 1 (Pages 1, 2)")); assertThat(getDataRow(sheet, 0), is(Arrays.asList("OrderDate", "Region", "Rep", "Item", "Units", "Unit Cost", "Total"))); assertThat(getDataRow(sheet, 10), is(Arrays.asList("6/8/15", "East", "Jones", "Binder", "60", "8.99", "539.40"))); assertThat(getDataRow(sheet, 13), is(Arrays.asList("7/29/15", "East", "Parent", "Binder", "81", "19.99", "1,619.19"))); assertThat(getDataRow(sheet, 37), is(Arrays.asList("9/10/16", "Central", "Gill", "Pencil", "7", "1.29", "9.03"))); } catch (Exception e) { throw new RuntimeException(e); } }); }
@Test public void testExcelConversion() throws IOException { PdfToExcelParameters params = getParams(); execute(params); testContext.assertTaskCompleted(); testContext.assertOutputSize(1).assertOutputContainsFilenames("tabular-data.xlsx").forEachRawOutput(p -> { try { InputStream in = new FileInputStream(p.toFile()); Workbook wb = WorkbookFactory.create(in); assertThat(wb.getNumberOfSheets(), is(2)); Sheet sheet = wb.getSheetAt(0); assertThat(sheet.getPhysicalNumberOfRows(), is(37)); assertThat(sheet.getSheetName(), is("Table 1 (Page 1)")); assertThat(getDataRow(sheet, 0), is(Arrays.asList("OrderDate", "Region", "Rep", "Item", "Units", "Unit Cost", "Total"))); assertThat(getDataRow(sheet, 10), is(Arrays.asList("6/8/15", "East", "Jones", "Binder", "60", "8.99", "539.40"))); assertThat(getDataRow(sheet, 13), is(Arrays.asList("7/29/15", "East", "Parent", "Binder", "81", "19.99", "1,619.19"))); } catch (Exception e) { throw new RuntimeException(e); } }); }
/** * asserts the output document with the given filename exists and has that number of pages. This assert will work only for multiple output task. * * @return * @throws IOException * @see this{@link #assertPages(int)} */ public TaskTestContext assertPages(String filename, int expected) throws IOException { assertOutputContainsFilenames(filename); try (PDDocument doc = PDFParser.parse(SeekableSources.seekableSourceFrom(new File(fileOutput, filename)))) { assertEquals(expected, doc.getNumberOfPages()); } return this; }
@Test public void testIncludingPageAfterOff() throws IOException { ExtractByOutlineParameters parameters = setUpParameters(1, "pdf/payslip_with_bookmarks.pdf", null); testContext.directoryOutputTo(parameters); execute(parameters); testContext.assertOutputContainsFilenames("1_Employee One.pdf", "3_Employee Three.pdf", "2_Employee Two.pdf"); testContext.assertOutputSize(3); testContext.assertPages("1_Employee One.pdf", 1); }
@Test public void testSplitDeeperLevel() throws IOException { ExtractByOutlineParameters parameters = setUpParameters(3); testContext.directoryOutputTo(parameters); execute(parameters); testContext .assertOutputContainsFilenames("1_Creating Assemblies.pdf", "2_Using Profiles.pdf", "3_Using Profiles by OS.pdf", "4_Setting Source Code Control System.pdf", "5_Versioning.pdf", "6_Using internal Repositories.pdf", "7_Installing Artifact in Remote Repository.pdf", "8_Install 3rdParty jar to Remote Repository.pdf", "9_Preparing Releases.pdf", "10_Performing Releases.pdf", "11_IntegrationTest with tomcat.pdf", "12_Online webdevelopment with Jetty plugin.pdf", "13_Online webdevelopment and automatic deployment with tomcat plugin.pdf") .assertOutputSize(13); }
@Test public void testIncludingPageAfterOn() throws IOException { ExtractByOutlineParameters parameters = setUpParameters(1, "pdf/payslip_with_bookmarks.pdf", null); parameters.setIncludePageAfter(true); testContext.directoryOutputTo(parameters); execute(parameters); testContext.assertOutputContainsFilenames("1_Employee One.pdf", "3_Employee Three.pdf", "2_Employee Two.pdf"); testContext.assertOutputSize(3); testContext.assertPages("1_Employee One.pdf", 2); }
@Test public void testBatchFilesWithConflictingOutputFiles() throws IOException { ExtractByOutlineParameters parameters = setUpParameters(2); parameters.addSource(customInput("pdf/extract_by_outline_sample.pdf", "file2.pdf")); parameters.setOutputPrefix("[BASENAME]_[FILENUMBER]_[BOOKMARK_NAME_STRICT]"); testContext.directoryOutputTo(parameters); execute(parameters); testContext.assertOutputContainsFilenames("file1_1_Invoking Maven.pdf", "file2_1_Invoking Maven.pdf"); testContext.assertOutputSize(36); }
@Test public void testSplitAtTopLevel() throws IOException { ExtractByOutlineParameters parameters = setUpParameters(2); parameters.setIncludePageAfter(true); testContext.directoryOutputTo(parameters); execute(parameters); testContext.assertOutputContainsFilenames("1_Invoking Maven.pdf", "2_Creating a new Project jar.pdf", "3_Creating a new Project war.pdf", "4_Standard Project Structure.pdf", "5_Compiling.pdf", "6_Running Unit Tests Code Coverage.pdf", "7_Packaging jar war.pdf", "8_Installing Artifact in Local Repository.pdf", "9_Installing 3rdParty jar in local Repository.pdf", "10_Cleaning Up.pdf", "11_Creating Eclipse Project Structure.pdf", "12_Maven Project file pomxml.pdf", "13_Adding Dependencies.pdf", "14_Adding Developers.pdf", "15_Setting Compiler Version.pdf", "16_Assemblies and Profiles.pdf", "17_Versioning Repositories and Releases.pdf", "18_WebDevelopment.pdf" ).assertOutputSize(18).assertPages("17_Versioning Repositories and Releases.pdf", 2); }
@Test public void batchMode() throws IOException { setUpParameters(); parameters.addSource(shortInput()); parameters.addSource(mediumInput()); parameters.addPage(2); execute(parameters); testContext.assertTaskCompleted(); testContext.assertOutputSize(4); testContext.assertOutputContainsFilenames("1_short-test-file.pdf", "3_short-test-file.pdf", "1_medium-test-file.pdf", "3_medium-test-file.pdf"); } }
@Test public void batchMode() throws IOException { setUpParameters(PredefinedSetOfPages.EVEN_PAGES); parameters.addSource(shortInput()); parameters.addSource(mediumInput()); testContext.directoryOutputTo(parameters); execute(parameters); testContext.assertTaskCompleted(); testContext.assertOutputSize(19); testContext.assertOutputContainsFilenames("1_short-test-file.pdf", "3_short-test-file.pdf", "1_medium-test-file.pdf", "3_medium-test-file.pdf", "5_medium-test-file.pdf", "7_medium-test-file.pdf", "9_medium-test-file.pdf", "11_medium-test-file.pdf", "13_medium-test-file.pdf", "15_medium-test-file.pdf", "17_medium-test-file.pdf", "19_medium-test-file.pdf", "21_medium-test-file.pdf", "23_medium-test-file.pdf", "25_medium-test-file.pdf", "27_medium-test-file.pdf", "29_medium-test-file.pdf", "31_medium-test-file.pdf", "33_medium-test-file.pdf" ); }
@Test public void testFileCounterStartFrom() throws Exception { SetHeaderFooterParameters parameters = basicNoSources(); parameters.addSource(customInput("pdf/test_file.pdf", "a.pdf")); parameters.addSource(customInput("pdf/test_file.pdf", "b.pdf")); parameters.setFileCountStartFrom(10); parameters.setPattern("Foo"); parameters.setOutputPrefix("[FILENUMBER]-[BASENAME]"); execute(parameters); testContext.assertTaskCompleted(); testContext.assertOutputContainsFilenames("10-a.pdf", "11-b.pdf"); }
@Test public void batchMode() throws IOException { parameters = new SplitBySizeParameters(100000); parameters.setCompress(true); parameters.setVersion(PdfVersion.VERSION_1_6); parameters.addSource(mediumInput()); parameters.addSource(regularInput()); parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE); parameters.setOutputPrefix("[FILENUMBER]-[BASENAME]"); testContext.directoryOutputTo(parameters); execute(parameters); testContext.assertTaskCompleted(); testContext.assertOutputSize(5); testContext.assertOutputContainsFilenames("1-medium-test-file.pdf", "2-medium-test-file.pdf", "3-medium-test-file.pdf", "4-medium-test-file.pdf","5-test-file.pdf"); }
@Test public void testPageScale() throws IOException { parameters = new ScaleParameters(0.6); setUpParameters(); parameters.setScaleType(ScaleType.PAGE); execute(parameters); testContext.assertTaskCompleted(); testContext.assertOutputSize(2).assertOutputContainsFilenames("1_test_file.pdf", "2_test_file.pdf") .forEachPdfOutput(d -> { PDPage page = d.getPage(0); PDRectangle expected = new PDRectangle(0f, 0f, 357f, 505.2f); assertEquals(expected, page.getMediaBox()); assertEquals(expected, page.getCropBox()); }); } }