} else if (headerTag.equalsIgnoreCase("Date")) { try { Date date = parseDate(headerContent); metadata.set(TikaCoreProperties.CREATED, date); } catch (ParseException e) {
parseMailAttachments(handler, pstMail, mailMetadata, embeddedExtractor); parserMailItem(handler, pstMail, mailMetadata, embeddedExtractor); handler.startElement("div", createAttribute("class", "email-folder")); handler.element("h1", pstSubFolder.getDisplayName()); parseFolder(handler, pstSubFolder, embeddedExtractor); handler.endElement("div");
saveHeaderInMetadata(mailMetadata, item); getTrackingMetadata().put(mailItem++, mailMetadata);
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Use the delegate parser to parse the contained document EmbeddedDocumentExtractor embeddedExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); TikaInputStream in = TikaInputStream.get(stream); PSTFile pstFile = null; try { pstFile = new PSTFile(in.getFile().getPath()); metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length())); boolean isValid = pstFile.getFileHandle().getFD().valid(); metadata.set("isValid", valueOf(isValid)); if (isValid) { parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor); } } catch (Exception e) { throw new TikaException(e.getMessage(), e); } finally { if (pstFile != null && pstFile.getFileHandle() != null) { try { pstFile.getFileHandle().close(); } catch (IOException e) { //swallow closing exception } } } xhtml.endDocument(); }
private void parseFolder(XHTMLContentHandler handler, PSTFolder pstFolder, EmbeddedDocumentExtractor embeddedExtractor) throws Exception { if (pstFolder.getContentCount() > 0) { PSTMessage pstMail = (PSTMessage) pstFolder.getNextChild(); while (pstMail != null) { AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", pstMail.getInternetMessageId()); handler.startElement("div", attributes); handler.element("h1", pstMail.getSubject()); parserMailItem(handler, pstMail, embeddedExtractor); parseMailAttachments(handler, pstMail, embeddedExtractor); handler.endElement("div"); pstMail = (PSTMessage) pstFolder.getNextChild(); } } if (pstFolder.hasSubfolders()) { for (PSTFolder pstSubFolder : pstFolder.getSubFolders()) { handler.startElement("div", createAttribute("class", "email-folder")); handler.element("h1", pstSubFolder.getDisplayName()); parseFolder(handler, pstSubFolder, embeddedExtractor); handler.endElement("div"); } } }
Date d = MboxParser.parseDate(date); metadata.set(TikaCoreProperties.CREATED, d); metadata.set(TikaCoreProperties.MODIFIED, d);
saveHeaderInMetadata(mailMetadata, item); getTrackingMetadata().put(mailItem++, mailMetadata);
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Use the delegate parser to parse the contained document EmbeddedDocumentExtractor embeddedExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); TikaInputStream in = TikaInputStream.get(stream); PSTFile pstFile = null; try { pstFile = new PSTFile(in.getFile().getPath()); metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length())); boolean isValid = pstFile.getFileHandle().getFD().valid(); metadata.set("isValid", valueOf(isValid)); if (isValid) { parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor); } } catch (Exception e) { throw new TikaException(e.getMessage(), e); } finally { if (pstFile != null && pstFile.getFileHandle() != null) { try { pstFile.getFileHandle().close(); } catch (IOException e) { //swallow closing exception } } } xhtml.endDocument(); }
parseMailAttachments(handler, pstMail, mailMetadata, embeddedExtractor); parserMailItem(handler, pstMail, mailMetadata, embeddedExtractor); handler.startElement("div", createAttribute("class", "email-folder")); handler.element("h1", pstSubFolder.getDisplayName()); parseFolder(handler, pstSubFolder, embeddedExtractor); handler.endElement("div");
} else if (headerTag.equalsIgnoreCase("Date")) { try { Date date = parseDate(headerContent); metadata.set(TikaCoreProperties.CREATED, date); } catch (ParseException e) {
saveHeaderInMetadata(mailMetadata, item); getTrackingMetadata().put(mailItem++, mailMetadata);
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { // Use the delegate parser to parse the contained document EmbeddedDocumentExtractor embeddedExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context); metadata.set(Metadata.CONTENT_TYPE, MS_OUTLOOK_PST_MIMETYPE.toString()); XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); xhtml.startDocument(); TikaInputStream in = TikaInputStream.get(stream); PSTFile pstFile = null; try { pstFile = new PSTFile(in.getFile().getPath()); metadata.set(Metadata.CONTENT_LENGTH, valueOf(pstFile.getFileHandle().length())); boolean isValid = pstFile.getFileHandle().getFD().valid(); metadata.set("isValid", valueOf(isValid)); if (isValid) { parseFolder(xhtml, pstFile.getRootFolder(), embeddedExtractor); } } catch (Exception e) { throw new TikaException(e.getMessage(), e); } finally { if (pstFile != null && pstFile.getFileHandle() != null) { try { pstFile.getFileHandle().close(); } catch (IOException e) { //swallow closing exception } } } xhtml.endDocument(); }
} else if (headerTag.equalsIgnoreCase("Date")) { try { Date date = parseDate(headerContent); metadata.set(TikaCoreProperties.CREATED, date); } catch (ParseException e) {
Date d = MboxParser.parseDate(date); metadata.set(TikaCoreProperties.CREATED, d); metadata.set(TikaCoreProperties.MODIFIED, d);
Date d = MboxParser.parseDate(date); metadata.set(TikaCoreProperties.CREATED, d); metadata.set(TikaCoreProperties.MODIFIED, d);
Date d = MboxParser.parseDate(date); metadata.set(TikaCoreProperties.CREATED, d); metadata.set(TikaCoreProperties.MODIFIED, d);