PDDocument document = PDDocument.load(new File(filename)); PDPage doc = document.getPage(0);
/** * Imports a page from some PDF file as a Form XObject so it can be placed on another page * in the target document. * <p> * You may want to call {@link #wrapInSaveRestore(PDPage) wrapInSaveRestore(PDPage)} before invoking the Form XObject to * make sure that the graphics state is reset. * * @param sourceDoc the source PDF document that contains the page to be copied * @param pageNumber the page number of the page to be copied * @return a Form XObject containing the original page's content * @throws IOException if an I/O error occurs */ public PDFormXObject importPageAsForm(PDDocument sourceDoc, int pageNumber) throws IOException { PDPage page = sourceDoc.getPage(pageNumber); return importPageAsForm(sourceDoc, page); }
public void renderTo(Graphics2D graphics) throws IOException { DebugTextStripper stripper = new DebugTextStripper(graphics); stripper.stripPage(this.document, this.document.getPage(pageIndex), this.pageIndex, this.scale); } }
public static void main(String[] args) throws IOException { File file = new File("src/main/resources/org/apache/pdfbox/examples/rendering/", "custom-render-demo.pdf"); try (PDDocument doc = PDDocument.load(file)) { PDPage page = doc.getPage(0); CustomGraphicsStreamEngine engine = new CustomGraphicsStreamEngine(page); engine.run(); } }
/** * Interface method to handle the start of the page processing. * * @throws IOException If an IO error occurs. */ private void processPages() throws IOException { for (int i = 0; i < sourceDocument.getNumberOfPages(); i++) { PDPage page = sourceDocument.getPage(i); if (currentPageNumber + 1 >= startPage && currentPageNumber + 1 <= endPage) { processPage(page); currentPageNumber++; } else { if (currentPageNumber > endPage) { break; } else { currentPageNumber++; } } } }
/** * This will print the documents text in a certain area. * * @param args The command line arguments. * * @throws IOException If there is an error parsing the document. */ public static void main( String[] args ) throws IOException { if( args.length != 1 ) { usage(); } else { try (PDDocument document = PDDocument.load(new File(args[0]))) { PDFTextStripperByArea stripper = new PDFTextStripperByArea(); stripper.setSortByPosition( true ); Rectangle rect = new Rectangle( 10, 280, 275, 60 ); stripper.addRegion( "class1", rect ); PDPage firstPage = document.getPage(0); stripper.extractRegions( firstPage ); System.out.println( "Text in the area:" + rect ); System.out.println( stripper.getTextForRegion( "class1" ) ); } } }
image = pdfRenderer.renderImage(page, SCALE); PDPage pdPage = document.getPage(page); PDRectangle cropBox = pdPage.getCropBox();
/** * Each page of document can be different sizes. This method calculates the page size based on * the page media box. * * @param document * @param page The 1-based page number for which the page size should be calculated. * @throws IllegalArgumentException if the page argument is lower than 0. */ private void calculatePageSize(PDDocument document, int page) { if (page < 1) { throw new IllegalArgumentException("First page of pdf is 1, not " + page); } PDPage firstPage = document.getPage(page - 1); PDRectangle mediaBox = firstPage.getMediaBox(); pageHeight(mediaBox.getHeight()); pageWidth = mediaBox.getWidth(); imageSizeInPercents = 100; rotation = firstPage.getRotation() % 360; }
private Map<Integer,LayoutPage> getLayoutPages(PDDocument doc) throws IOException { int numberOfPages = doc.getNumberOfPages(); Map<Integer,LayoutPage> layoutPages = new HashMap<>(numberOfPages); for (int i=0;i<numberOfPages;i++) { PDPage page = doc.getPage(i); COSBase contents = page.getCOSObject().getDictionaryObject(COSName.CONTENTS); PDResources resources = page.getResources(); if (resources == null) { resources = new PDResources(); } layoutPages.put(i, new LayoutPage(page.getMediaBox(), createCombinedContentStream(contents), resources.getCOSObject())); } return layoutPages; }
private List<String> extractStrings(File pdfFile) throws IOException { PDDocument pdf = PDDocument.load(pdfFile); assertThat(pdf.getNumberOfPages()).isEqualTo(1); StringExtractor stringExtractor = new StringExtractor(); stringExtractor.processPage(pdf.getPage(0)); return stringExtractor.getStrings(); }
PDPage page = doc.getPage(0);
private LayoutPage getLayoutPage(PDDocument doc) throws IOException { PDPage page = doc.getPage(0); COSBase contents = page.getCOSObject().getDictionaryObject(COSName.CONTENTS); PDResources resources = page.getResources(); if (resources == null) { resources = new PDResources(); } return new LayoutPage(page.getMediaBox(), createCombinedContentStream(contents), resources.getCOSObject()); }
/** * Renders a given page to an AWT Graphics2D instance. * * @param pageIndex the zero-based index of the page to be converted * @param graphics the Graphics2D on which to draw the page * @param scaleX the scale to draw the page at for the x-axis * @param scaleY the scale to draw the page at for the y-axis * @param destination controlling visibility of optional content groups * @throws IOException if the PDF cannot be read */ public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scaleX, float scaleY, RenderDestination destination) throws IOException { PDPage page = document.getPage(pageIndex); // TODO need width/wight calculations? should these be in PageDrawer? transform(graphics, page, scaleX, scaleY); PDRectangle cropBox = page.getCropBox(); graphics.clearRect(0, 0, (int) cropBox.getWidth(), (int) cropBox.getHeight()); // the end-user may provide a custom PageDrawer RenderingHints actualRenderingHints = renderingHints == null ? createDefaultRenderingHints(graphics) : renderingHints; PageDrawerParameters parameters = new PageDrawerParameters(this, page, subsamplingAllowed, destination, actualRenderingHints); PageDrawer drawer = createPageDrawer(parameters); drawer.drawPage(graphics, cropBox); }
if (pageNumber != -1) page = doc.getPage(pageNumber);
protected Page extractPage(Integer pageNumber) throws IOException { if (pageNumber > this.pdfDocument.getNumberOfPages() || pageNumber < 1) { throw new java.lang.IndexOutOfBoundsException( "Page number does not exist"); } PDPage p = this.pdfDocument.getPage(pageNumber - 1); ObjectExtractorStreamEngine se = new ObjectExtractorStreamEngine(p); se.processPage(p); TextStripper pdfTextStripper = new TextStripper(this.pdfDocument, pageNumber); pdfTextStripper.process(); Utils.sort(pdfTextStripper.textElements, Rectangle.ILL_DEFINED_ORDER); float w, h; int pageRotation = p.getRotation(); if (Math.abs(pageRotation) == 90 || Math.abs(pageRotation) == 270) { w = p.getCropBox().getHeight(); h = p.getCropBox().getWidth(); } else { w = p.getCropBox().getWidth(); h = p.getCropBox().getHeight(); } return new Page(0, 0, w, h, pageRotation, pageNumber, p, pdfTextStripper.textElements, se.rulings, pdfTextStripper.minCharWidth, pdfTextStripper.minCharHeight, pdfTextStripper.spatialIndex); }
PDPage p = document.getPage(pageNumber);
float width = (float) humanRect.getWidth(); float height = (float) humanRect.getHeight(); PDPage page = doc.getPage(0); PDRectangle pageRect = page.getCropBox(); PDRectangle rect = new PDRectangle();
PDPage page = document.getPage(p - 1); int rotation = page.getRotation(); page.setRotation(0);
PDPage page = sourceDocument.getPage(i - 1); PDPage imported = extractedDocument.importPage(page); imported.setCropBox(page.getCropBox());
public PageFormat getPageFormat(int pageIndex) PDPage page = document.getPage(pageIndex); PDRectangle mediaBox = PDFPrintable.getRotatedMediaBox(page); PDRectangle cropBox = PDFPrintable.getRotatedCropBox(page);