public void handle(Directory directory, Metadata metadata) throws MetadataException { //TODO: after upgrading metadataextractor, swap out //magic number with ExifDirectoryBase.TAG_PAGE_NUMBER if (directory.containsTag(297)) { int[] pageNums = directory.getIntArray(297); //pages can be in any order, take the max if (pageNums != null && pageNums.length > 1) { Integer curr = metadata.getInt(TIFF.EXIF_PAGE_COUNT); if (curr == null || curr < pageNums[1]) { metadata.set(TIFF.EXIF_PAGE_COUNT, pageNums[1]); } } } } }
Integer.toString(countMetadataValues(m))); Integer nPages = m.getInt(PagedText.N_PAGES); if (nPages != null) { data.put(Cols.NUM_PAGES, Integer.toString(nPages));
assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH)); assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue()); assertEquals(null, meta.getInt(Metadata.IMAGE_WIDTH)); assertEquals(22, meta.getInt(Metadata.IMAGE_WIDTH).intValue()); assertEquals(null, meta.getInt(Metadata.BITS_PER_SAMPLE)); assertEquals(null, meta.getInt(TikaCoreProperties.CREATED));
TikaInputStream docxStream = TikaInputStream.get(new File("some-doc.docx")); TikaInputStream pdfStream = TikaInputStream.get(new File("some-doc.pdf")); ContentHandler handler = new DefaultContentHandler(); Metadata docxMeta = new Metadata(); Metadata pdfMeta = new Metadata(); ParseContext pc = new ParseContext(); Parser parser = TikaConfig.getDefaultConfig().getParser(); parser.parse(docxStream, handler, docxMeta, pc); parser.parse(pdfStream, handler, pdfMeta, pc); int docxParagraphCount = docxMeta.getInt(Office.PARAGRAPH_COUNT); int pdfParagraphCount = pdfMeta.getInt(Office.PARAGRAPH_COUNT);
public void handle(Directory directory, Metadata metadata) throws MetadataException { //TODO: after upgrading metadataextractor, swap out //magic number with ExifDirectoryBase.TAG_PAGE_NUMBER if (directory.containsTag(297)) { int[] pageNums = directory.getIntArray(297); //pages can be in any order, take the max if (pageNums != null && pageNums.length > 1) { Integer curr = metadata.getInt(TIFF.EXIF_PAGE_COUNT); if (curr == null || curr < pageNums[1]) { metadata.set(TIFF.EXIF_PAGE_COUNT, pageNums[1]); } } } } }