Refine search
if (isDynamicXfa(srcCatalog.getAcroForm())) if (destCatalog.getOpenAction() == null) openAction = srcCatalog.getOpenAction(); pageIndexOpenActionDest = srcCatalog.getPages().indexOf(page); destCatalog.setOpenAction(openAction); COSArray destThreads = (COSArray) destCatalog.getCOSObject().getDictionaryObject(COSName.THREADS); COSArray srcThreads = (COSArray) cloner.cloneForNewDocument(destCatalog.getCOSObject().getDictionaryObject( COSName.THREADS)); if (destThreads == null) destCatalog.getCOSObject().setItem(COSName.THREADS, srcThreads); PDDocumentNameDictionary destNames = destCatalog.getNames(); PDDocumentNameDictionary srcNames = srcCatalog.getNames(); if (srcNames != null) destCatalog.getCOSObject().setItem(COSName.NAMES, cloner.cloneForNewDocument(srcNames)); PDDocumentNameDestinationDictionary destDests = destCatalog.getDests(); PDDocumentNameDestinationDictionary srcDests = srcCatalog.getDests(); if (srcDests != null) destCatalog.getCOSObject().setItem(COSName.DESTS, cloner.cloneForNewDocument(srcDests));
/** * This will print the documents data to System.out. * * @param document The document to get the metadata from. * * @throws IOException If there is an error getting the page count. */ public void printMetadata( PDDocument document ) throws IOException { PDDocumentInformation info = document.getDocumentInformation(); PDDocumentCatalog cat = document.getDocumentCatalog(); PDMetadata metadata = cat.getMetadata(); System.out.println( "Page Count=" + document.getNumberOfPages() ); System.out.println( "Title=" + info.getTitle() ); System.out.println( "Author=" + info.getAuthor() ); System.out.println( "Subject=" + info.getSubject() ); System.out.println( "Keywords=" + info.getKeywords() ); System.out.println( "Creator=" + info.getCreator() ); System.out.println( "Producer=" + info.getProducer() ); System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) ); System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) ); System.out.println( "Trapped=" + info.getTrapped() ); if( metadata != null ) { String string = new String( metadata.toByteArray(), "ISO-8859-1" ); System.out.println( "Metadata=" + string ); } }
import org.apache.pdfbox.exceptions.COSVisitorException; import org.apache.pdfbox.exceptions.CryptographyException; import org.apache.pdfbox.exceptions.InvalidPasswordException; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocumentCatalog; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import java.io.IOException; public class Main { public static void main(String[] argv) throws COSVisitorException, InvalidPasswordException, CryptographyException, IOException { PDDocument document = PDDocument.load("input.pdf"); if (document.isEncrypted()) { document.decrypt(""); } PDDocumentCatalog catalog = document.getDocumentCatalog(); for (Object pageObj : catalog.getAllPages()) { PDPage page = (PDPage) pageObj; PDResources resources = page.findResources(); resources.getImages().clear(); } document.save("strippedOfImages.pdf"); } }
/** * Adds Extensions to the document catalog. So that the use of DSS is identified. Described in * PAdES Part 4, Chapter 4.4. * * @param catalog to add Extensions into */ private void addExtensions(PDDocumentCatalog catalog) { COSDictionary dssExtensions = new COSDictionary(); dssExtensions.setDirect(true); catalog.getCOSObject().setItem("Extensions", dssExtensions); COSDictionary adbeExtension = new COSDictionary(); adbeExtension.setDirect(true); dssExtensions.setItem("ADBE", adbeExtension); adbeExtension.setName("BaseVersion", "1.7"); adbeExtension.setInt("ExtensionLevel", 5); catalog.setVersion("1.7"); }
/** * Imports OCProperties from source document to target document so hidden layers can still be * hidden after import. * * @param sourceDoc The source PDF document that contains the /OCProperties to be copied. * @throws IOException If an I/O error occurs. */ private void importOcProperties(PDDocument srcDoc) throws IOException { PDDocumentCatalog srcCatalog = srcDoc.getDocumentCatalog(); PDOptionalContentProperties srcOCProperties = srcCatalog.getOCProperties(); if (srcOCProperties == null) { return; } PDDocumentCatalog dstCatalog = targetDoc.getDocumentCatalog(); PDOptionalContentProperties dstOCProperties = dstCatalog.getOCProperties(); if (dstOCProperties == null) { dstCatalog.setOCProperties(new PDOptionalContentProperties( (COSDictionary) cloner.cloneForNewDocument(srcOCProperties))); } else { cloner.cloneMerge(srcOCProperties, dstOCProperties); } } }
PDAcroForm destAcroForm = destCatalog.getAcroForm(); PDAcroForm srcAcroForm = srcCatalog.getAcroForm(); destCatalog.getCOSObject().setItem(COSName.ACRO_FORM, cloner.cloneForNewDocument(srcAcroForm.getCOSObject()));
try (PDDocument document = PDDocument.load(new File(args[0]))) if (document.isEncrypted()) PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentInformation info = document.getDocumentInformation(); catalog.setMetadata( metadataStream );
memUsageSetting.getPartitionedCopy(sources.size()+1) : MemoryUsageSetting.setupMainMemoryOnly(); try (PDDocument destination = new PDDocument(partitionedMemSetting)) if (sourceObject instanceof File) sourceDoc = PDDocument.load((File) sourceObject, partitionedMemSetting); sourceDoc = PDDocument.load((InputStream) sourceObject, partitionedMemSetting); destination.getDocumentCatalog().setMetadata(destinationMetadata);
public static void main(String[] args) throws IOException { String formTemplate = "src/main/resources/org/apache/pdfbox/examples/interactive/form/FillFormField.pdf"; try (PDDocument pdfDocument = PDDocument.load(new File(formTemplate))) { // get the document catalog PDAcroForm acroForm = pdfDocument.getDocumentCatalog().getAcroForm(); // as there might not be an AcroForm entry a null check is necessary if (acroForm != null) { // Retrieve an individual field and set its value. PDTextField field = (PDTextField) acroForm.getField( "sampleField" ); field.setValue("Text Entry"); // If a field is nested within the form tree a fully qualified name // might be provided to access the field. field = (PDTextField) acroForm.getField( "fieldsContainer.nestedSampleField" ); field.setValue("Text Entry"); } // Save and close the filled out form. pdfDocument.save("target/FillFormField.pdf"); } }
int pageCount = getNumberOfPages(); if (pageCount == 0) PDPage page = getPage(startIndex); PDDocumentCatalog catalog = getDocumentCatalog(); PDAcroForm acroForm = catalog.getAcroForm(); catalog.getCOSObject().setNeedToBeUpdated(true); acroForm = new PDAcroForm(this); catalog.setAcroForm(acroForm); acroForm.getCOSObject().setNeedToBeUpdated(true); if (!(acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS) instanceof COSArray)) acroForm.getCOSObject().setItem(COSName.FIELDS, new COSArray()); List<PDAnnotation> annotations = page.getAnnotations(); page.setAnnotations(annotations); page.getCOSObject().setNeedToBeUpdated(true);
try (PDDocument document = PDDocument.load(new File(args[0]))) if( document.isEncrypted() ) if( document.getNumberOfPages() < 2 ) PDDocumentOutline bookmarks = document.getDocumentCatalog().getDocumentOutline(); if( bookmarks == null ) PDActionGoTo action = new PDActionGoTo(); action.setDestination(dest); document.getDocumentCatalog().setOpenAction(action); document.save( args[1] );
AccessPermission ap = document.getCurrentAccessPermission(); metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY, Boolean.toString(ap.canExtractForAccessibility())); Boolean.toString(ap.canPrintDegraded())); if (document.getDocumentCatalog().getLanguage() != null) { metadata.set(TikaCoreProperties.LANGUAGE, document.getDocumentCatalog().getLanguage()); Document dom = loadDOM(document.getDocumentCatalog().getMetadata(), metadata, context); for (COSName key : info.getCOSObject().keySet()) { String name = key.getName(); if (!handledMetadata.contains(name)) { addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key)); addMetadata(metadata, PDF.PDF_DOC_INFO_CUSTOM_PREFIX + name, info.getCOSObject().getDictionaryObject(key)); COSDictionary root = document.getDocumentCatalog().getCOSObject(); COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions")); if (extensions != null) {
PDDocument doc = PDDocument.load(inputFile)) PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm(); PDRectangle rect = null; if (signature != null) rect = acroForm.getField(signatureFieldName).getWidgets().get(0).getRectangle(); if (doc.getVersion() >= 1.5f && accessPermissions == 0) if (acroForm != null && acroForm.getNeedAppearances()) if (acroForm.getFields().isEmpty()) acroForm.getCOSObject().removeItem(COSName.NEED_APPEARANCES);
try (PDDocument document = new PDDocument()) PDPage page1 = new PDPage(); PDPage page2 = new PDPage(); PDPage page3 = new PDPage(); document.addPage(page1); document.addPage(page2); document.addPage(page3); List<PDAnnotation> annotations = page1.getAnnotations(); PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm(); if (acroForm == null) acroForm = new PDAcroForm(document); document.getDocumentCatalog().setAcroForm(acroForm); PDResources dr = acroForm.getDefaultResources(); if (dr == null) acroForm.setDefaultResources(dr);
/** * Parses a document extracting the colors for the specified words in * the constructor * * @param filename PDF document path */ public void parse (String filename) throws IOException { PDDocument document = null; try { document = PDDocument.load(filename, false); List allPages = document.getDocumentCatalog().getAllPages(); for( int i=0; i<allPages.size(); i++ ) { PDPage page = (PDPage)allPages.get( i ); PDStream contents = page.getContents(); if (contents != null) { processStream( page, page.getResources(), page.getContents().getStream() ); } } } finally { if (document != null) { document.close(); } } }
/** * Output a PDF with as many pages as there are interesting areas in the * input document */ @Override public PDDocument extract() throws IOException { PDDocument extractedDocument = new PDDocument(); extractedDocument.setDocumentInformation(sourceDocument.getDocumentInformation()); extractedDocument.getDocumentCatalog().setViewerPreferences( sourceDocument.getDocumentCatalog().getViewerPreferences()); @SuppressWarnings("unchecked") List<PDPage> pages = sourceDocument.getDocumentCatalog().getAllPages(); int pageCounter = 1; for (PDPage page : pages) { if (pageCounter >= startPage && pageCounter <= endPage) { List<PDRectangle> zoomedFragments = getFragments(page); for (PDRectangle fragment : zoomedFragments) { PDPage outputPage = extractedDocument.importPage(page); outputPage.setCropBox(fragment); outputPage.setMediaBox(page.getMediaBox()); outputPage.setResources(page.findResources()); outputPage.setRotation(page.findRotation()); // TODO: rotate the page in landscape mode is width > height } } pageCounter++; } return extractedDocument; }
/** * Parses a document extracting the images * * @param filename PDF document path */ public void parse(String filename) throws IOException { PDDocument document = null; try { document = PDDocument.load(filename, false); List allPages = document.getDocumentCatalog().getAllPages(); for( int i=0; i<allPages.size(); i++ ) { PDPage page = (PDPage)allPages.get( i ); currentPage = i; processStream( page, page.findResources(), page.getContents().getStream() ); } } finally { if (document != null) { document.close(); } } }
void sanitize(RandomAccessRead source, OutputStream outputStream) throws IOException, BleachException { final PDDocument doc = getDocument(source); final PDDocumentCatalog docCatalog = doc.getDocumentCatalog(); sanitizeNamed(doc, docCatalog.getNames()); PDDocumentCatalogBleach catalogBleach = new PDDocumentCatalogBleach(this); catalogBleach.sanitize(docCatalog); sanitizeDocumentOutline(doc.getDocumentCatalog().getDocumentOutline()); cosObjectBleach.sanitizeObjects(doc.getDocument().getObjects()); doc.save(outputStream); doc.close(); }
private static String convertEncryptedPDFDocument(String url) throws IOException, TesseractException{ int imageDPIValue = 300; PDDocument document = PDDocument.loadNonSeq(new File(url), null); @SuppressWarnings("unchecked") List<PDPage> pdPages = document.getDocumentCatalog().getAllPages(); List<BufferedImage> imagesPages = new ArrayList<>(); for (PDPage pdPage : pdPages){ imagesPages.add(pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, imageDPIValue)); } document.close(); Tesseract tessaract = TessaractManager.getInstance().getTessaract(); List<IIOImage> pagesToOCR = new ArrayList<>(); for(BufferedImage image :imagesPages){ List<IIOImage> content = ImageIOHelper.getIIOImageList(image); pagesToOCR.addAll(content); } String originalText = tessaract.doOCR(pagesToOCR, null); return originalText; }
document = PDDocument.load(new File( pdfFile ), password); AccessPermission ap = document.getCurrentAccessPermission(); if( ! ap.canExtractContent() ) extractPages(startPage, Math.min(endPage, document.getNumberOfPages()), stripper, document, output, rotationMagic, alwaysNext); PDDocumentCatalog catalog = document.getDocumentCatalog(); PDDocumentNameDictionary names = catalog.getNames(); if (names != null)