/** * Used when processing custom metadata entries, as PDFBox won't do * the conversion for us in the way it does for the standard ones */ private void addMetadata(Metadata metadata, String name, COSBase value) { if (value instanceof COSArray) { for (Object v : ((COSArray) value).toList()) { addMetadata(metadata, name, ((COSBase) v)); } } else if (value instanceof COSString) { addMetadata(metadata, name, ((COSString) value).getString()); } // Avoid calling COSDictionary#toString, since it can lead to infinite // recursion. See TIKA-1038 and PDFBOX-1835. else if (value != null && !(value instanceof COSDictionary)) { addMetadata(metadata, name, value.toString()); } }
addMetadata(metadata, property, pdfBoxBaseline); if (items == null) { if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { addMetadata(metadata, property, pdfBoxBaseline); addMetadata(metadata, property, item); addMetadata(metadata, property, pdfBoxBaseline);
addMetadata(metadata, property, pdfBoxBaseline); continue; addMetadata(metadata, property, value); if (!property.isMultiValuePermitted()) { return; addMetadata(metadata, property, pdfBoxBaseline);
metadata.set(PagedText.N_PAGES, document.getNumberOfPages()); extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema); addMetadata(metadata, PDF.DOC_INFO_TITLE, info.getTitle()); extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema); addMetadata(metadata, PDF.DOC_INFO_CREATOR, info.getAuthor()); extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema); addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator()); addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, info.getCreator()); addMetadata(metadata, Office.KEYWORDS, info.getKeywords()); addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, info.getKeywords()); addMetadata(metadata, PDF.DOC_INFO_PRODUCER, info.getProducer()); extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema); addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject()); addMetadata(metadata, TikaCoreProperties.SUBJECT, info.getKeywords()); addMetadata(metadata, TikaCoreProperties.SUBJECT, info.getSubject()); addMetadata(metadata, OfficeOpenXMLCore.SUBJECT, info.getSubject()); addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped()); Calendar created = info.getCreationDate(); addMetadata(metadata, PDF.DOC_INFO_CREATED, created); addMetadata(metadata, TikaCoreProperties.CREATED, created); Calendar modified = info.getModificationDate(); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, modified); String name = key.getName(); if (!handledMetadata.contains(name)) {
/** * Used when processing custom metadata entries, as PDFBox won't do * the conversion for us in the way it does for the standard ones */ private void addMetadata(Metadata metadata, String name, COSBase value) { if (value instanceof COSArray) { for (Object v : ((COSArray) value).toList()) { addMetadata(metadata, name, ((COSBase) v)); } } else if (value instanceof COSString) { addMetadata(metadata, name, ((COSString) value).getString()); } // Avoid calling COSDictionary#toString, since it can lead to infinite // recursion. See TIKA-1038 and PDFBOX-1835. else if (value != null && !(value instanceof COSDictionary)) { addMetadata(metadata, name, value.toString()); } }
/** * Used when processing custom metadata entries, as PDFBox won't do * the conversion for us in the way it does for the standard ones */ private void addMetadata(Metadata metadata, String name, COSBase value) { if (value instanceof COSArray) { for (Object v : ((COSArray) value).toList()) { addMetadata(metadata, name, ((COSBase) v)); } } else if (value instanceof COSString) { addMetadata(metadata, name, ((COSString) value).getString()); } // Avoid calling COSDictionary#toString, since it can lead to infinite // recursion. See TIKA-1038 and PDFBOX-1835. else if (value != null && !(value instanceof COSDictionary)) { addMetadata(metadata, name, value.toString()); } }
addMetadata(metadata, property, pdfBoxBaseline); if (items == null) { if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { addMetadata(metadata, property, pdfBoxBaseline); addMetadata(metadata, property, item); addMetadata(metadata, property, pdfBoxBaseline);
addMetadata(metadata, property, pdfBoxBaseline); if (items == null) { if (pdfBoxBaseline != null && pdfBoxBaseline.length() > 0) { addMetadata(metadata, property, pdfBoxBaseline); addMetadata(metadata, property, item); addMetadata(metadata, property, pdfBoxBaseline);
addMetadata(metadata, property, pdfBoxBaseline); continue; addMetadata(metadata, property, value); if (!property.isMultiValuePermitted()) { return; addMetadata(metadata, property, pdfBoxBaseline);
addMetadata(metadata, property, pdfBoxBaseline); continue; addMetadata(metadata, property, value); if (!property.isMultiValuePermitted()) { return; addMetadata(metadata, property, pdfBoxBaseline);
metadata.set(PagedText.N_PAGES, document.getNumberOfPages()); extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema); addMetadata(metadata, PDF.DOC_INFO_TITLE, info.getTitle()); extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema); addMetadata(metadata, PDF.DOC_INFO_CREATOR, info.getAuthor()); extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema); addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator()); addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, info.getCreator()); addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords()); addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, info.getKeywords()); addMetadata(metadata, "producer", info.getProducer()); addMetadata(metadata, PDF.DOC_INFO_PRODUCER, info.getProducer()); extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema); addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject()); addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject()); addMetadata(metadata, "trapped", info.getTrapped()); addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped()); addMetadata(metadata, "created", info.getCreationDate()); addMetadata(metadata, PDF.DOC_INFO_CREATED, info.getCreationDate()); addMetadata(metadata, TikaCoreProperties.CREATED, info.getCreationDate()); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, info.getModificationDate()); String name = key.getName();
metadata.set(PagedText.N_PAGES, document.getNumberOfPages()); extractMultilingualItems(metadata, TikaCoreProperties.TITLE, info.getTitle(), dcSchema); addMetadata(metadata, PDF.DOC_INFO_TITLE, info.getTitle()); extractDublinCoreListItems(metadata, TikaCoreProperties.CREATOR, info.getAuthor(), dcSchema); addMetadata(metadata, PDF.DOC_INFO_CREATOR, info.getAuthor()); extractDublinCoreListItems(metadata, TikaCoreProperties.CONTRIBUTOR, null, dcSchema); addMetadata(metadata, TikaCoreProperties.CREATOR_TOOL, info.getCreator()); addMetadata(metadata, PDF.DOC_INFO_CREATOR_TOOL, info.getCreator()); addMetadata(metadata, TikaCoreProperties.KEYWORDS, info.getKeywords()); addMetadata(metadata, PDF.DOC_INFO_KEY_WORDS, info.getKeywords()); addMetadata(metadata, "producer", info.getProducer()); addMetadata(metadata, PDF.DOC_INFO_PRODUCER, info.getProducer()); extractMultilingualItems(metadata, TikaCoreProperties.DESCRIPTION, null, dcSchema); addMetadata(metadata, PDF.DOC_INFO_SUBJECT, info.getSubject()); addMetadata(metadata, TikaCoreProperties.TRANSITION_SUBJECT_TO_OO_SUBJECT, info.getSubject()); addMetadata(metadata, "trapped", info.getTrapped()); addMetadata(metadata, PDF.DOC_INFO_TRAPPED, info.getTrapped()); addMetadata(metadata, DEPRECATED_CREATED, created); addMetadata(metadata, PDF.DOC_INFO_CREATED, created); addMetadata(metadata, TikaCoreProperties.CREATED, created); Calendar modified = info.getModificationDate(); addMetadata(metadata, Metadata.LAST_MODIFIED, modified); addMetadata(metadata, TikaCoreProperties.MODIFIED, modified); addMetadata(metadata, PDF.DOC_INFO_MODIFICATION_DATE, modified); String name = key.getName();