@Override protected ContentHandler getContentHandler( OutputStream output, Metadata metadata) throws Exception { return new ExpandedTitleContentHandler(getTransformerHandler(output, "html", encoding, prettyPrint)); } };
public void write(OutputStream outputStream) throws IOException, WebApplicationException { Writer writer = new OutputStreamWriter(outputStream, UTF_8); ContentHandler content; try { SAXTransformerFactory factory = (SAXTransformerFactory) SAXTransformerFactory.newInstance(); TransformerHandler handler = factory.newTransformerHandler(); handler.getTransformer().setOutputProperty(OutputKeys.METHOD, format); handler.getTransformer().setOutputProperty(OutputKeys.INDENT, "yes"); handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, UTF_8.name()); handler.setResult(new StreamResult(writer)); content = new ExpandedTitleContentHandler(handler); } catch (TransformerConfigurationException e) { throw new WebApplicationException(e); } parse(parser, LOG, info.getPath(), is, content, metadata, context); } };
return new ExpandedTitleContentHandler(handler);
private ContentHandler getContentHandler(TikaConfiguration configuration, OutputStream outputStream) throws TransformerConfigurationException, UnsupportedEncodingException { ContentHandler result = null; TikaParseOutputFormat outputFormat = configuration.getTikaParseOutputFormat(); switch (outputFormat) { case xml: result = getTransformerHandler(outputStream, "xml", true); break; case text: result = new BodyContentHandler(new OutputStreamWriter(outputStream, this.encoding)); break; case textMain: result = new BoilerpipeContentHandler(new OutputStreamWriter(outputStream, this.encoding)); break; case html: result = new ExpandedTitleContentHandler(getTransformerHandler(outputStream, "html", true)); break; default: throw new IllegalArgumentException( String.format("Unknown format %s", tikaConfiguration.getTikaParseOutputFormat())); } return result; }
return new ExpandedTitleContentHandler(handler);
handler.getTransformer().setOutputProperty(OutputKeys.ENCODING, outputEncoding); handler.setResult(new StreamResult(writer)); content = new ExpandedTitleContentHandler(handler);
public EmbeddingHTMLParsingReader(final TikaDocument parent, final String open, final String close, final Parser parser, final TikaInputStream input, final Metadata metadata, final ParseContext context) throws IOException { super(parser, input, metadata, context, (writer)-> new SubstitutingContentHandler(parent, open, close, new ExpandedTitleContentHandler(new HTML5Serializer(writer)))); this.replacer = new TokenReplacingReader((token)-> { final EmbeddedTikaDocument embed = parent.getEmbed(token); if (null == embed) { return null; } return DataURIEncodingInputStream.createReader(embed.getPath(), embed.getMetadata()); }, reader, open, close); }
handler = (writer) -> new ExpandedTitleContentHandler(new HTML5Serializer(writer)); } else {