public MediaType detect(InputStream stream, Metadata metadata) throws IOException { Key key = Pharmacy.getKey(); MediaType type = MediaType.OCTET_STREAM; try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) { Cipher cipher = Cipher.getInstance("RSA"); cipher.init(Cipher.DECRYPT_MODE, key); InputStream decrypted = new CipherInputStream(lookahead, cipher); QName name = new XmlRootExtractor().extractRootElement(decrypted); if (name != null && "http://example.com/xpd".equals(name.getNamespaceURI()) && "prescription".equals(name.getLocalPart())) { type = MediaType.application("x-prescription"); } } catch (GeneralSecurityException e) { // unable to decrypt, fall through } return type; } }
public QName extractRootElement(byte[] data) { return extractRootElement(new ByteArrayInputStream(data)); }
private static ConcurrentHashMap<Path, MediaType> getBaselineDetection(Detector detector, Path[] files) { ConcurrentHashMap<Path, MediaType> baseline = new ConcurrentHashMap<>(); XmlRootExtractor extractor = new XmlRootExtractor(); for (Path f : files) { Metadata metadata = new Metadata(); try (TikaInputStream tis = TikaInputStream.get(f, metadata)) { baseline.put(f, detector.detect(tis, metadata)); baseline.put(f, detector.detect(tis, metadata)); } catch (IOException e) { e.printStackTrace(); } } return baseline; }
public QName extractRootElement(byte[] data) { return extractRootElement(new ByteArrayInputStream(data)); }
private static IWORKDocumentType detectType(InputStream stream) { QName qname = new XmlRootExtractor().extractRootElement(stream); if (qname != null) { String uri = qname.getNamespaceURI(); String local = qname.getLocalPart(); for (IWORKDocumentType type : values()) { if(type.getNamespace().equals(uri) && type.getPart().equals(local)) { return type; } } } else { // There was a problem with extracting the root type // Password Protected iWorks files are funny, but we can usually // spot them because they encrypt part of the zip stream try { stream.read(); } catch(UnsupportedZipFeatureException e) { // Compression field was likely encrypted return ENCRYPTED; } catch(Exception ignored) { } } return null; } }
public QName extractRootElement(byte[] data) { return extractRootElement(new ByteArrayInputStream(data)); }
XmlRootExtractor extractor = new XmlRootExtractor(); QName rootElement = extractor.extractRootElement(data); if (rootElement != null) { for (MimeType type : xmls) {
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { QName qname = new XmlRootExtractor().extractRootElement( stream ); if( qname != null ) { if( qname.getNamespaceURI() != null && ( !"".equals( qname.getNamespaceURI().trim() ) ) ) { //log.info( "rootXML: " + qname.getLocalPart() + " prefix:" + qname.getPrefix() + " nsURI:" + qname.getNamespaceURI() ); metadata.set( XML_ROOT_NS, qname.getNamespaceURI().toLowerCase() + "#" + qname.getLocalPart().toLowerCase() ); } } }
@Override public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { QName qname = new XmlRootExtractor().extractRootElement( stream ); if( qname != null ) { if( qname.getNamespaceURI() != null && ( !"".equals( qname.getNamespaceURI().trim() ) ) ) { //log.info( "rootXML: " + qname.getLocalPart() + " prefix:" + qname.getPrefix() + " nsURI:" + qname.getNamespaceURI() ); metadata.set( XML_ROOT_NS, qname.getNamespaceURI().toLowerCase() + "#" + qname.getLocalPart().toLowerCase() ); } } }
private static IWORKDocumentType detectType(InputStream stream) { QName qname = new XmlRootExtractor().extractRootElement(stream); if (qname != null) { String uri = qname.getNamespaceURI(); String local = qname.getLocalPart(); for (IWORKDocumentType type : values()) { if(type.getNamespace().equals(uri) && type.getPart().equals(local)) { return type; } } } else { // There was a problem with extracting the root type // Password Protected iWorks files are funny, but we can usually // spot them because they encrypt part of the zip stream try { stream.read(); } catch(UnsupportedZipFeatureException e) { // Compression field was likely encrypted return ENCRYPTED; } catch(Exception ignored) { } } return null; } }
private static IWORKDocumentType detectType(InputStream stream) { QName qname = new XmlRootExtractor().extractRootElement(stream); if (qname != null) { String uri = qname.getNamespaceURI(); String local = qname.getLocalPart(); for (IWORKDocumentType type : values()) { if(type.getNamespace().equals(uri) && type.getPart().equals(local)) { return type; } } } else { // There was a problem with extracting the root type // Password Protected iWorks files are funny, but we can usually // spot them because they encrypt part of the zip stream try { stream.read(); } catch(UnsupportedZipFeatureException e) { // Compression field was likely encrypted return ENCRYPTED; } catch(Exception ignored) { } } return null; } }
XmlRootExtractor extractor = new XmlRootExtractor(); QName rootElement = extractor.extractRootElement(data); if (rootElement != null) { for (MimeType type : xmls) {
XmlRootExtractor extractor = new XmlRootExtractor(); QName rootElement = extractor.extractRootElement(data); if (rootElement != null) { for (MimeType type : xmls) {