throws IOException, SAXException, TikaException { Metadata lastMetadata = cloneMetadata(originalMetadata); Metadata metadata = lastMetadata; InputStream taggedStream = ParserUtils.ensureStreamReReadable(stream, tmp); recordParserDetails(p, originalMetadata); metadata = cloneMetadata(originalMetadata); } catch (Exception e) { recordParserFailure(p, e, originalMetadata); recordParserFailure(p, e, metadata); failure = e; lastMetadata = cloneMetadata(metadata); taggedStream = ParserUtils.streamResetForReRead(taggedStream, tmp);
/** * Records details of the {@link Parser} used to the {@link Metadata}, * typically wanted where multiple parsers could be picked between * or used. */ public static void recordParserDetails(Parser parser, Metadata metadata) { metadata.add(X_PARSED_BY, getParserClassname(parser)); }
/** * This is called after parsing an embedded document. * @param contentHandler local contenthandler used on the embedded document * @param metadata metadata from the embedded document * @throws SAXException */ @Override public void endEmbeddedDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException { super.endEmbeddedDocument(contentHandler, metadata); addContent(contentHandler, metadata); metadataList.add(ParserUtils.cloneMetadata(metadata)); }
} else { if (catchEmbeddedExceptions) { ParserUtils.recordParserFailure(this, e, metadata); } else { throw e; } catch (TikaException e) { if (catchEmbeddedExceptions) { ParserUtils.recordParserFailure(this, e, metadata); } else { throw e;
TaggedContentHandler taggedHandler = handler != null ? new TaggedContentHandler(handler) : null; ParserUtils.recordParserDetails(parser, metadata); try { parser.parse(taggedStream, taggedHandler, metadata, context);
/** * * @param contentHandler content handler used on the main document * @param metadata metadata from the main document * @throws SAXException */ @Override public void endDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException { super.endDocument(contentHandler, metadata); addContent(contentHandler, metadata); metadataList.add(0, ParserUtils.cloneMetadata(metadata)); }
} else { if (catchEmbeddedExceptions) { ParserUtils.recordParserFailure(this, e, metadata); } else { throw e; } catch (TikaException e) { if (catchEmbeddedExceptions) { ParserUtils.recordParserFailure(this, e, metadata); } else { throw e;
/** * Records details of a {@link Parser}'s failure to the * {@link Metadata}, so you can check what went wrong even if the * {@link Exception} wasn't immediately thrown (eg when several different * Parsers are used) */ public static void recordParserFailure(Parser parser, Throwable failure, Metadata metadata) { String trace = ExceptionUtils.getStackTrace(failure); metadata.add(EMBEDDED_EXCEPTION, trace); metadata.add(EMBEDDED_PARSER, getParserClassname(parser)); }
/** * * @param contentHandler content handler used on the main document * @param metadata metadata from the main document * @throws SAXException */ @Override public void endDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException { super.endDocument(contentHandler, metadata); addContent(contentHandler, metadata); metadataList.add(0, ParserUtils.cloneMetadata(metadata)); }
/** * Records details of the {@link Parser} used to the {@link Metadata}, * typically wanted where multiple parsers could be picked between * or used. */ public static void recordParserDetails(Parser parser, Metadata metadata) { metadata.add(X_PARSED_BY, getParserClassname(parser)); }
/** * This is called after parsing an embedded document. * @param contentHandler local contenthandler used on the embedded document * @param metadata metadata from the embedded document * @throws SAXException */ @Override public void endEmbeddedDocument(ContentHandler contentHandler, Metadata metadata) throws SAXException { super.endEmbeddedDocument(contentHandler, metadata); addContent(contentHandler, metadata); metadataList.add(ParserUtils.cloneMetadata(metadata)); }
/** * Records details of a {@link Parser}'s failure to the * {@link Metadata}, so you can check what went wrong even if the * {@link Exception} wasn't immediately thrown (eg when several different * Parsers are used) */ public static void recordParserFailure(Parser parser, Throwable failure, Metadata metadata) { String trace = ExceptionUtils.getStackTrace(failure); metadata.add(EMBEDDED_EXCEPTION, trace); metadata.add(EMBEDDED_PARSER, getParserClassname(parser)); }