private void tryImageExtraction(PDPage page, IDocearPdfImageHandler imageHandler) { CSImageExtractor ocrExtractor = new CSImageExtractor(imageHandler); CSDeviceBasedInterpreter interpreter = new CSDeviceBasedInterpreter(null, ocrExtractor); interpreter.process(page.getContentStream(), page.getResources()); }
private Integer getPageFromCOSArray(COSArray destination) { // DOCEAR: fallback if no entry was found if (destination == null) { return 1; } Iterator<?> it = destination.iterator(); while (it.hasNext()) { COSObject o = (COSObject) it.next(); if (o.isIndirect()) { // the page is indirect referenced o.dereference(); } PDPage page = getDocument().getPageTree().getFirstPage(); while (page != null) { if (page.cosGetObject().equals(o)) { return page.getNodeIndex() + 1; } page = page.getNextPage(); } } return null; }
Rectangle2D rect = page.getCropBox().toNormalizedRectangle(); int rotation = page.getRotate(); logger.debug("Page #{} rotation: {}°", id, rotation); gctx.fill(rect); CSContent content = page.getContentStream(); RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_OFF); renderer.process(content, page.getResources());
private void onlyHashExtraction() throws IOException { try { PDPage page = getPDDocument(getDocument()).getPageTree().getFirstPage(); if (page.isPage()) { try { if(!page.cosGetContents().basicIterator().hasNext()) { page = page.getNextPage(); } TreeMap<PdfTextEntity, StringBuilder> map = tryTextExtraction(page); Entry<PdfTextEntity, StringBuilder> entry = map.firstEntry(); if(entry == null) { UniqueImageHashExtractor handler = new UniqueImageHashExtractor(); tryImageExtraction(page, handler); uniqueHash = handler.getUniqueHash(); } } catch (Exception ex) { } } } finally { close(); } }
public APDObjectDestination getDestination(PDAnnotation annotation) { return new PageDestination(PDAnnotationTools.getPage(annotation).getNodeIndex() + 1); }
try { PDPage page = getPDDocument(getDocument()).getPageTree().getFirstPage(); if (page.isPage()) { try { if(!page.cosGetContents().basicIterator().hasNext()) { page = page.getNextPage();
private Integer getBookmarkDestinationPage(PDOutlineItem item) throws IOException, COSLoadException { if (item == null) { return null; } if (item.getDestination() != null) { PDExplicitDestination destination = item.getDestination().getResolvedDestination(getDocument()); if (destination != null) { PDPage page = destination.getPage(getDocument()); return page.getNodeIndex() + 1; } } if (!(item.cosGetField(PDOutlineItem.DK_A) instanceof COSNull)) { COSDictionary cosDictionary = (COSDictionary) item.cosGetField(PDOutlineItem.DK_A); COSArray destination = getCOSArrayFromDestination(cosDictionary); return getPageFromCOSArray((COSArray) destination); } return null; }
private TreeMap<PdfTextEntity, StringBuilder> tryTextExtraction(PDPage page) { CSFormatedTextExtractor extractor = new CSFormatedTextExtractor(); AffineTransform pageTx = new AffineTransform(); PDFGeometryTools.adjustTransform(pageTx, page); extractor.setDeviceTransform(pageTx); CSDeviceBasedInterpreter interpreter = new CSDeviceBasedInterpreter(null, extractor); interpreter.process(page.getContentStream(), page.getResources()); TreeMap<PdfTextEntity, StringBuilder> map = extractor.getMap(); uniqueHash = extractor.getHash(); return map; }
private void extractText(PDPageTree pageTree, StringBuilder sb) { for (Iterator<?> it = pageTree.getKids().iterator(); it.hasNext();) { PDPageNode node = (PDPageNode) it.next(); if (node.isPage()) { try { CSTextExtractor extractor = new CSTextExtractor(); PDPage page = (PDPage) node; AffineTransform pageTx = new AffineTransform(); PDFGeometryTools.adjustTransform(pageTx, page); extractor.setDeviceTransform(pageTx); CSDeviceBasedInterpreter interpreter = new CSDeviceBasedInterpreter(null, extractor); interpreter.process(page.getContentStream(), page.getResources()); sb.append(extractor.getContent()); } catch (CSException e) { e.printStackTrace(); } } else { extractText((PDPageTree) node, sb); } } }