public TikaConfig(Path path) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(path)); } public TikaConfig(Path path, ServiceLoader loader)
public TikaConfig(Path path, ServiceLoader loader) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(path), loader); }
public TikaConfig(InputStream stream) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(stream)); }
public TikaConfig(File file) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(file.toPath())); }
public TikaConfig(File file, ServiceLoader loader) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(file.toPath()), loader); }
/** * Builds a Document with a DocumentBuilder from the pool * * @since Apache Tika 1.19.1 * @param path path to parse * @return a document * @throws TikaException * @throws IOException * @throws SAXException */ public static Document buildDOM(Path path) throws TikaException, IOException, SAXException { try (InputStream is = Files.newInputStream(path)){ return buildDOM(is); } }
public TikaConfig(URL url, ClassLoader loader) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(url.toString()).getDocumentElement(), loader); } public TikaConfig(URL url, ServiceLoader loader)
public TikaConfig(URL url, ServiceLoader loader) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(url.toString()).getDocumentElement(), loader); }
public Metadata parse(String source, ParseContext parseContext) throws TikaException, SAXException, IOException { Document root = XMLReaderUtils.buildDOM( new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)), parseContext); Metadata metadata = new Metadata(); createGrobidMetadata(source, root.getDocumentElement(), metadata); return metadata; }
public void parseRawXMP(byte[] xmpData) throws IOException, SAXException, TikaException { XMPMetadata xmp = null; try (InputStream decoded = new ByteArrayInputStream(xmpData) ) { Document dom = XMLReaderUtils.buildDOM(decoded, EMPTY_PARSE_CONTEXT); if (dom != null) { xmp = new XMPMetadata(dom); } } catch (IOException|SAXException e) { // } if (xmp != null) { JempboxExtractor.extractDublinCore(xmp, metadata); JempboxExtractor.extractXMPMM(xmp, metadata); } }
public void parse(InputStream file) throws IOException, TikaException { ByteArrayOutputStream xmpraw = new ByteArrayOutputStream(); if (!scanner.parse(file, xmpraw)) { return; } XMPMetadata xmp = null; try (InputStream decoded = new ByteArrayInputStream(xmpraw.toByteArray()) ) { Document dom = XMLReaderUtils.buildDOM(decoded, EMPTY_PARSE_CONTEXT); if (dom != null) { xmp = new XMPMetadata(dom); } } catch (IOException|SAXException e) { // } extractDublinCore(xmp, metadata); extractXMPMM(xmp, metadata); }
private Document loadDOM(PDMetadata pdMetadata, Metadata metadata, ParseContext context) { if (pdMetadata == null) { return null; } InputStream is = null; try { try { is = pdMetadata.exportXMPMetadata(); } catch (IOException e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, metadata); return null; } return XMLReaderUtils.buildDOM(is, context); } catch (IOException|SAXException|TikaException e) { EmbeddedDocumentUtil.recordException(e, metadata); } finally { IOUtils.closeQuietly(is); } return null; }
public TikaConfig(Path path) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(path)); } public TikaConfig(Path path, ServiceLoader loader)
public TikaConfig(InputStream stream) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(stream)); }
public TikaConfig(File file, ServiceLoader loader) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(file.toPath()), loader); }
public TikaConfig(File file) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(file.toPath())); }
public TikaConfig(URL url, ClassLoader loader) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(url.toString()).getDocumentElement(), loader); } public TikaConfig(URL url, ServiceLoader loader)
public TikaConfig(URL url, ServiceLoader loader) throws TikaException, IOException, SAXException { this(XMLReaderUtils.buildDOM(url.toString()).getDocumentElement(), loader); }
ServiceLoader tmpServiceLoader = new ServiceLoader(); try (InputStream stream = getConfigInputStream(config, tmpServiceLoader)) { Element element = XMLReaderUtils.buildDOM(stream).getDocumentElement(); updateXMLReaderUtils(element); serviceLoader = serviceLoaderFromDomElement(element, tmpServiceLoader.getLoader());
public Metadata parse(String source, ParseContext parseContext) throws TikaException, SAXException, IOException { Document root = XMLReaderUtils.buildDOM( new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)), parseContext); Metadata metadata = new Metadata(); createGrobidMetadata(source, root.getDocumentElement(), metadata); return metadata; }