org.apache.pdfbox.pdmodel.PDDocumentCatalog java code examples

Refine search

if (isDynamicXfa(srcCatalog.getAcroForm()))
if (destCatalog.getOpenAction() == null)
    openAction = srcCatalog.getOpenAction();
      pageIndexOpenActionDest = srcCatalog.getPages().indexOf(page);
  destCatalog.setOpenAction(openAction);
COSArray destThreads = (COSArray) destCatalog.getCOSObject().getDictionaryObject(COSName.THREADS);
COSArray srcThreads = (COSArray) cloner.cloneForNewDocument(destCatalog.getCOSObject().getDictionaryObject(
    COSName.THREADS));
if (destThreads == null)
  destCatalog.getCOSObject().setItem(COSName.THREADS, srcThreads);
PDDocumentNameDictionary destNames = destCatalog.getNames();
PDDocumentNameDictionary srcNames = srcCatalog.getNames();
if (srcNames != null)
    destCatalog.getCOSObject().setItem(COSName.NAMES, cloner.cloneForNewDocument(srcNames));
PDDocumentNameDestinationDictionary destDests = destCatalog.getDests();
PDDocumentNameDestinationDictionary srcDests = srcCatalog.getDests();
if (srcDests != null)
    destCatalog.getCOSObject().setItem(COSName.DESTS, cloner.cloneForNewDocument(srcDests));

/**
 * This will print the documents data to System.out.
 *
 * @param document The document to get the metadata from.
 *
 * @throws IOException If there is an error getting the page count.
 */
public void printMetadata( PDDocument document ) throws IOException
{
  PDDocumentInformation info = document.getDocumentInformation();
  PDDocumentCatalog cat = document.getDocumentCatalog();
  PDMetadata metadata = cat.getMetadata();
  System.out.println( "Page Count=" + document.getNumberOfPages() );
  System.out.println( "Title=" + info.getTitle() );
  System.out.println( "Author=" + info.getAuthor() );
  System.out.println( "Subject=" + info.getSubject() );
  System.out.println( "Keywords=" + info.getKeywords() );
  System.out.println( "Creator=" + info.getCreator() );
  System.out.println( "Producer=" + info.getProducer() );
  System.out.println( "Creation Date=" + formatDate( info.getCreationDate() ) );
  System.out.println( "Modification Date=" + formatDate( info.getModificationDate() ) );
  System.out.println( "Trapped=" + info.getTrapped() );
  if( metadata != null )
  {
    String string =  new String( metadata.toByteArray(), "ISO-8859-1" );
    System.out.println( "Metadata=" + string );
  }
}

 import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.exceptions.CryptographyException;
import org.apache.pdfbox.exceptions.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDResources;
import java.io.IOException;

public class Main {
  public static void main(String[] argv) throws COSVisitorException, InvalidPasswordException, CryptographyException, IOException {
    PDDocument document = PDDocument.load("input.pdf");

    if (document.isEncrypted()) {
      document.decrypt("");
    }

    PDDocumentCatalog catalog = document.getDocumentCatalog();
    for (Object pageObj :  catalog.getAllPages()) {
      PDPage page = (PDPage) pageObj;
      PDResources resources = page.findResources();
      resources.getImages().clear();
    }

    document.save("strippedOfImages.pdf");
  }
}

/**
 * Adds Extensions to the document catalog. So that the use of DSS is identified. Described in
 * PAdES Part 4, Chapter 4.4.
 *
 * @param catalog to add Extensions into
 */
private void addExtensions(PDDocumentCatalog catalog)
{
  COSDictionary dssExtensions = new COSDictionary();
  dssExtensions.setDirect(true);
  catalog.getCOSObject().setItem("Extensions", dssExtensions);
  COSDictionary adbeExtension = new COSDictionary();
  adbeExtension.setDirect(true);
  dssExtensions.setItem("ADBE", adbeExtension);
  adbeExtension.setName("BaseVersion", "1.7");
  adbeExtension.setInt("ExtensionLevel", 5);
  catalog.setVersion("1.7");
}

  /**
   * Imports OCProperties from source document to target document so hidden layers can still be
   * hidden after import.
   *
   * @param sourceDoc The source PDF document that contains the /OCProperties to be copied.
   * @throws IOException If an I/O error occurs.
   */
  private void importOcProperties(PDDocument srcDoc) throws IOException
  {
    PDDocumentCatalog srcCatalog = srcDoc.getDocumentCatalog();
    PDOptionalContentProperties srcOCProperties = srcCatalog.getOCProperties();
    if (srcOCProperties == null)
    {
      return;
    }

    PDDocumentCatalog dstCatalog = targetDoc.getDocumentCatalog();
    PDOptionalContentProperties dstOCProperties = dstCatalog.getOCProperties();

    if (dstOCProperties == null)
    {
      dstCatalog.setOCProperties(new PDOptionalContentProperties(
          (COSDictionary) cloner.cloneForNewDocument(srcOCProperties)));
    }
    else
    {
      cloner.cloneMerge(srcOCProperties, dstOCProperties);
    }
  }
}

PDAcroForm destAcroForm = destCatalog.getAcroForm();
PDAcroForm srcAcroForm = srcCatalog.getAcroForm();
  destCatalog.getCOSObject().setItem(COSName.ACRO_FORM,
      cloner.cloneForNewDocument(srcAcroForm.getCOSObject()));

try (PDDocument document = PDDocument.load(new File(args[0])))
  if (document.isEncrypted())
  PDDocumentCatalog catalog = document.getDocumentCatalog();
  PDDocumentInformation info = document.getDocumentInformation();
  catalog.setMetadata( metadataStream );

    memUsageSetting.getPartitionedCopy(sources.size()+1) :
    MemoryUsageSetting.setupMainMemoryOnly();
try (PDDocument destination = new PDDocument(partitionedMemSetting))
    if (sourceObject instanceof File)
      sourceDoc = PDDocument.load((File) sourceObject, partitionedMemSetting);
      sourceDoc = PDDocument.load((InputStream) sourceObject,
          partitionedMemSetting);
    destination.getDocumentCatalog().setMetadata(destinationMetadata);

public static void main(String[] args) throws IOException
{
  String formTemplate = "src/main/resources/org/apache/pdfbox/examples/interactive/form/FillFormField.pdf";
  
  try (PDDocument pdfDocument = PDDocument.load(new File(formTemplate)))
  {
    // get the document catalog
    PDAcroForm acroForm = pdfDocument.getDocumentCatalog().getAcroForm();
    
    // as there might not be an AcroForm entry a null check is necessary
    if (acroForm != null)
    {
      // Retrieve an individual field and set its value.
      PDTextField field = (PDTextField) acroForm.getField( "sampleField" );
      field.setValue("Text Entry");
      
      // If a field is nested within the form tree a fully qualified name
      // might be provided to access the field.
      field = (PDTextField) acroForm.getField( "fieldsContainer.nestedSampleField" );
      field.setValue("Text Entry");
    }
    
    // Save and close the filled out form.
    pdfDocument.save("target/FillFormField.pdf");
  }
}

int pageCount = getNumberOfPages();
if (pageCount == 0)
PDPage page = getPage(startIndex);
PDDocumentCatalog catalog = getDocumentCatalog();
PDAcroForm acroForm = catalog.getAcroForm();
catalog.getCOSObject().setNeedToBeUpdated(true);
  acroForm = new PDAcroForm(this);
  catalog.setAcroForm(acroForm);
  acroForm.getCOSObject().setNeedToBeUpdated(true);
if (!(acroForm.getCOSObject().getDictionaryObject(COSName.FIELDS) instanceof COSArray))
  acroForm.getCOSObject().setItem(COSName.FIELDS, new COSArray());
List<PDAnnotation> annotations = page.getAnnotations();
page.setAnnotations(annotations);
page.getCOSObject().setNeedToBeUpdated(true);

try (PDDocument document = PDDocument.load(new File(args[0])))
  if( document.isEncrypted() )
  if( document.getNumberOfPages() < 2 )
  PDDocumentOutline bookmarks = document.getDocumentCatalog().getDocumentOutline();
  if( bookmarks == null )
  PDActionGoTo action = new PDActionGoTo();
  action.setDestination(dest);
  document.getDocumentCatalog().setOpenAction(action);
  document.save( args[1] );

AccessPermission ap = document.getCurrentAccessPermission();
metadata.set(AccessPermissions.EXTRACT_FOR_ACCESSIBILITY,
    Boolean.toString(ap.canExtractForAccessibility()));
    Boolean.toString(ap.canPrintDegraded()));
if (document.getDocumentCatalog().getLanguage() != null) {
  metadata.set(TikaCoreProperties.LANGUAGE, document.getDocumentCatalog().getLanguage());
Document dom = loadDOM(document.getDocumentCatalog().getMetadata(), metadata, context);
for (COSName key : info.getCOSObject().keySet()) {
  String name = key.getName();
  if (!handledMetadata.contains(name)) {
    addMetadata(metadata, name, info.getCOSObject().getDictionaryObject(key));
    addMetadata(metadata, PDF.PDF_DOC_INFO_CUSTOM_PREFIX + name,
        info.getCOSObject().getDictionaryObject(key));
COSDictionary root = document.getDocumentCatalog().getCOSObject();
COSDictionary extensions = (COSDictionary) root.getDictionaryObject(COSName.getPDFName("Extensions"));
if (extensions != null) {

  PDDocument doc = PDDocument.load(inputFile))
PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();
PDRectangle rect = null;
  if (signature != null)
    rect = acroForm.getField(signatureFieldName).getWidgets().get(0).getRectangle();
if (doc.getVersion() >= 1.5f && accessPermissions == 0)
if (acroForm != null && acroForm.getNeedAppearances())
  if (acroForm.getFields().isEmpty())
    acroForm.getCOSObject().removeItem(COSName.NEED_APPEARANCES);

try (PDDocument document = new PDDocument())
  PDPage page1 = new PDPage();
  PDPage page2 = new PDPage();
  PDPage page3 = new PDPage();
  document.addPage(page1);
  document.addPage(page2);
  document.addPage(page3);
  List<PDAnnotation> annotations = page1.getAnnotations();
  PDAcroForm acroForm = document.getDocumentCatalog().getAcroForm();
  if (acroForm == null)
    acroForm = new PDAcroForm(document);
    document.getDocumentCatalog().setAcroForm(acroForm);
  PDResources dr = acroForm.getDefaultResources();
  if (dr == null)
    acroForm.setDefaultResources(dr);

/**
 * Parses a document extracting the colors for the specified words in
 * the constructor
 *
 * @param filename PDF document path
 */
public void parse (String filename) throws IOException {
  PDDocument document = null;
  try {
    document = PDDocument.load(filename, false);
    List allPages = document.getDocumentCatalog().getAllPages();
    for( int i=0; i<allPages.size(); i++ ) {
      PDPage page = (PDPage)allPages.get( i );
      PDStream contents = page.getContents();
      if (contents != null) {
        processStream( page, page.getResources(),
          page.getContents().getStream() );
      }
    }
  } finally {
    if (document != null) {
      document.close();
    }
  }
}

/**
 * Output a PDF with as many pages as there are interesting areas in the
 * input document
 */
@Override
public PDDocument extract() throws IOException {
  PDDocument extractedDocument = new PDDocument();
  extractedDocument.setDocumentInformation(sourceDocument.getDocumentInformation());
  extractedDocument.getDocumentCatalog().setViewerPreferences(
      sourceDocument.getDocumentCatalog().getViewerPreferences());
  @SuppressWarnings("unchecked")
  List<PDPage> pages = sourceDocument.getDocumentCatalog().getAllPages();
  int pageCounter = 1;
  for (PDPage page : pages) {
    if (pageCounter >= startPage && pageCounter <= endPage) {
      List<PDRectangle> zoomedFragments = getFragments(page);
      for (PDRectangle fragment : zoomedFragments) {
        PDPage outputPage = extractedDocument.importPage(page);
        outputPage.setCropBox(fragment);
        outputPage.setMediaBox(page.getMediaBox());
        outputPage.setResources(page.findResources());
        outputPage.setRotation(page.findRotation());
        // TODO: rotate the page in landscape mode is width > height
      }
    }
    pageCounter++;
  }
  return extractedDocument;
}

/**
 * Parses a document extracting the images
 *
 * @param filename PDF document path
 */
public void parse(String filename) throws IOException {
  PDDocument document = null;
  try {
    document = PDDocument.load(filename, false);
    List allPages = document.getDocumentCatalog().getAllPages();
    for( int i=0; i<allPages.size(); i++ ) {
      PDPage page = (PDPage)allPages.get( i );
      currentPage = i;
      processStream( page, page.findResources(), page.getContents().getStream() );
    }
  } finally {
    if (document != null) {
      document.close();
    }
  }
}

void sanitize(RandomAccessRead source, OutputStream outputStream)
  throws IOException, BleachException {
 final PDDocument doc = getDocument(source);
 final PDDocumentCatalog docCatalog = doc.getDocumentCatalog();
 sanitizeNamed(doc, docCatalog.getNames());
 PDDocumentCatalogBleach catalogBleach = new PDDocumentCatalogBleach(this);
 catalogBleach.sanitize(docCatalog);
 sanitizeDocumentOutline(doc.getDocumentCatalog().getDocumentOutline());
 cosObjectBleach.sanitizeObjects(doc.getDocument().getObjects());
 doc.save(outputStream);
 doc.close();
}

private static String convertEncryptedPDFDocument(String url) throws IOException, TesseractException{
  int imageDPIValue = 300;
  PDDocument document = PDDocument.loadNonSeq(new File(url), null);
  @SuppressWarnings("unchecked")
  List<PDPage> pdPages = document.getDocumentCatalog().getAllPages();
  List<BufferedImage> imagesPages = new ArrayList<>();
  for (PDPage pdPage : pdPages){ 
    imagesPages.add(pdPage.convertToImage(BufferedImage.TYPE_INT_RGB, imageDPIValue));
  }
  document.close();
  Tesseract tessaract = TessaractManager.getInstance().getTessaract();
  List<IIOImage> pagesToOCR = new ArrayList<>();
  for(BufferedImage image :imagesPages){
    List<IIOImage> content = ImageIOHelper.getIIOImageList(image);
    pagesToOCR.addAll(content);
  }
  String originalText = tessaract.doOCR(pagesToOCR, null);
  return originalText;
}

document = PDDocument.load(new File( pdfFile ), password);
AccessPermission ap = document.getCurrentAccessPermission();
if( ! ap.canExtractContent() )
  extractPages(startPage, Math.min(endPage, document.getNumberOfPages()), 
         stripper, document, output, rotationMagic, alwaysNext);
PDDocumentCatalog catalog = document.getDocumentCatalog();
PDDocumentNameDictionary names = catalog.getNames();    
if (names != null)

Javadoc

The Document Catalog of a PDF.

Most used methods

getAcroForm
Get the documents AcroForm. This will return null if no AcroForm is part of the document.
getMetadata
Get the metadata that is part of the document catalog. This will return null if there is no meta dat
getCOSObject
Convert this standard java object to a COS object.
getDocumentOutline
Get the outline associated with this document or null if it does not exist.
getAllPages
getNames
getOpenAction
Get the Document Open Action for this object.
getPages
Returns all pages in the document, as a page tree.
getOCProperties
Get the optional content properties dictionary associated with this document.
setMetadata
Sets the metadata for this object. This can be null.
getLanguage
Returns the language for the document, or null.
getOutputIntents
Get the list of OutputIntents defined in the document.

Popular in Java

Start an intent from android
startActivity (Activity)
getSystemService (Context)
scheduleAtFixedRate (ScheduledExecutorService)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
CodeWhisperer alternatives

How to usePDDocumentCatalog in org.apache.pdfbox.pdmodel

Best Java code snippets using org.apache.pdfbox.pdmodel.PDDocumentCatalog (Showing top 20 results out of 315)

Refine search

How to use
PDDocumentCatalog
in
org.apache.pdfbox.pdmodel