protected void _testHttpLenient(final String cT, final String bomEnc, final String streamEnc, final String prologEnc, final String shouldbe) throws Exception { final InputStream is = getXmlStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc); final XmlStreamReader xmlReader = new XmlStreamReader(is, cT, true); assertEquals(xmlReader.getEncoding(), shouldbe); xmlReader.close(); }
final String cTMime = getContentTypeMime(httpContentType); final String cTEnc = getContentTypeEncoding(httpContentType); final boolean appXml = isAppXml(cTMime); final boolean textXml = isTextXml(cTMime); return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); } else { return defaultEncoding == null ? US_ASCII : defaultEncoding;
/** * Process a HTTP stream. * * @param bom BOMInputStream to detect byte order marks * @param pis BOMInputStream to guess XML encoding * @param httpContentType The HTTP content type * @param lenient indicates if the charset encoding detection should be * relaxed. * @return the encoding to be used * @throws IOException thrown if there is a problem reading the stream. */ private String doHttpStream(final BOMInputStream bom, final BOMInputStream pis, final String httpContentType, final boolean lenient) throws IOException { final String bomEnc = bom.getBOMCharsetName(); final String xmlGuessEnc = pis.getBOMCharsetName(); final String xmlEnc = getXmlProlog(pis, xmlGuessEnc); try { return calculateHttpEncoding(httpContentType, bomEnc, xmlGuessEnc, xmlEnc, lenient); } catch (final XmlStreamReaderException ex) { if (lenient) { return doLenientDetection(httpContentType, ex); } else { throw ex; } } }
/** * Process the raw stream. * * @param bom BOMInputStream to detect byte order marks * @param pis BOMInputStream to guess XML encoding * @param lenient indicates if the charset encoding detection should be * relaxed. * @return the encoding to be used * @throws IOException thrown if there is a problem reading the stream. */ private String doRawStream(final BOMInputStream bom, final BOMInputStream pis, final boolean lenient) throws IOException { final String bomEnc = bom.getBOMCharsetName(); final String xmlGuessEnc = pis.getBOMCharsetName(); final String xmlEnc = getXmlProlog(pis, xmlGuessEnc); try { return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); } catch (final XmlStreamReaderException ex) { if (lenient) { return doLenientDetection(null, ex); } else { throw ex; } } }
final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); if (conn instanceof HttpURLConnection || contentType != null) { this.encoding = doHttpStream(bom, pis, contentType, lenient); } else { this.encoding = doRawStream(bom, pis, lenient);
@Test public void testReadXmlWithBOMUtf32Be() throws Exception { Assume.assumeTrue("JVM and SAX need to support UTF_32BE for this", jvmAndSaxBothSupportCharset("UTF_32BE")); final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32BE\"?><X/>".getBytes("UTF_32BE"); parseXml(new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE)); // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader parseXml(new XmlStreamReader(createUtf32BeDataStream(data, true))); }
@Test public void testRawContent() throws Exception { final String encoding = "UTF-8"; final String xml = getXML("no-bom", XML3, encoding, encoding); final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); final XmlStreamReader xmlReader = new XmlStreamReader(is); assertEquals("Check encoding", xmlReader.getEncoding(), encoding); assertEquals("Check content", xml, IOUtils.toString(xmlReader)); }
protected void _testRawNoBomInvalid(final String encoding) throws Exception { final InputStream is = getXmlStream("no-bom", XML3, encoding, encoding); try { (new XmlStreamReader(is, false)).close();; fail("It should have failed"); } catch (final IOException ex) { assertTrue(ex.getMessage().contains("Invalid encoding,")); } }
private void checkHttpError(final String msgSuffix, final boolean lenient, final String httpContentType, final String bomEnc, final String xmlGuessEnc, final String xmlEnc, final String defaultEncoding) { try { checkHttpEncoding("XmlStreamReaderException", lenient, httpContentType, bomEnc, xmlGuessEnc, xmlEnc, defaultEncoding); fail("Expected XmlStreamReaderException"); } catch (final XmlStreamReaderException e) { assertTrue("Msg Start: " + e.getMessage(), e.getMessage().startsWith("Invalid encoding")); assertTrue("Msg End: " + e.getMessage(), e.getMessage().endsWith(msgSuffix)); assertEquals("bomEnc", bomEnc, e.getBomEncoding()); assertEquals("xmlGuessEnc", xmlGuessEnc, e.getXmlGuessEncoding()); assertEquals("xmlEnc", xmlEnc, e.getXmlEncoding()); assertEquals("ContentTypeEncoding", XmlStreamReader.getContentTypeEncoding(httpContentType), e.getContentTypeEncoding()); assertEquals("ContentTypeMime", XmlStreamReader.getContentTypeMime(httpContentType), e.getContentTypeMime()); } catch (final Exception e) { fail("Expected XmlStreamReaderException, but threw " + e); } }
/** * Do lenient detection. * * @param httpContentType content-type header to use for the resolution of * the charset encoding. * @param ex The thrown exception * @return the encoding * @throws IOException thrown if there is a problem reading the stream. */ private String doLenientDetection(String httpContentType, XmlStreamReaderException ex) throws IOException { if (httpContentType != null && httpContentType.startsWith("text/html")) { httpContentType = httpContentType.substring("text/html".length()); httpContentType = "text/xml" + httpContentType; try { return calculateHttpEncoding(httpContentType, ex.getBomEncoding(), ex.getXmlGuessEncoding(), ex.getXmlEncoding(), true); } catch (final XmlStreamReaderException ex2) { ex = ex2; } } String encoding = ex.getXmlEncoding(); if (encoding == null) { encoding = ex.getContentTypeEncoding(); } if (encoding == null) { encoding = defaultEncoding == null ? UTF_8 : defaultEncoding; } return encoding; }
final BOMInputStream bom = new BOMInputStream(new BufferedInputStream(is, BUFFER_SIZE), false, BOMS); final BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); this.encoding = doHttpStream(bom, pis, httpContentType, lenient); this.reader = new InputStreamReader(pis, encoding);
@SuppressWarnings("boxing") private void checkAppXml(final boolean expected, final String mime) { assertEquals("Mime=[" + mime + "]", expected, XmlStreamReader.isAppXml(mime)); }
@SuppressWarnings("boxing") private void checkTextXml(final boolean expected, final String mime) { assertEquals("Mime=[" + mime + "]", expected, XmlStreamReader.isTextXml(mime)); }
private void checkContentTypeMime(final String expected, final String httpContentType) { assertEquals("ContentTypeMime=[" + httpContentType + "]", expected, XmlStreamReader.getContentTypeMime(httpContentType)); }
private void checkContentTypeEncoding(final String expected, final String httpContentType) { assertEquals("ContentTypeEncoding=[" + httpContentType + "]", expected, XmlStreamReader.getContentTypeEncoding(httpContentType)); }
@Test public void testReadXmlWithBOMUtf32Le() throws Exception { Assume.assumeTrue("JVM and SAX need to support UTF_32LE for this", jvmAndSaxBothSupportCharset("UTF_32LE")); final byte[] data = "<?xml version=\"1.0\" encoding=\"UTF-32LE\"?><X/>".getBytes("UTF_32LE"); parseXml(new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE)); // XML parser does not know what to do with UTF-32, so we warp the input stream with a XmlStreamReader parseXml(new XmlStreamReader(createUtf32LeDataStream(data, true))); }
@Test public void testHttpContent() throws Exception { final String encoding = "UTF-8"; final String xml = getXML("no-bom", XML3, encoding, encoding); final ByteArrayInputStream is = new ByteArrayInputStream(xml.getBytes(encoding)); final XmlStreamReader xmlReader = new XmlStreamReader(is, encoding); assertEquals("Check encoding", xmlReader.getEncoding(), encoding); assertEquals("Check content", xml, IOUtils.toString(xmlReader)); }
/** * Process the raw stream. * * @param bom BOMInputStream to detect byte order marks * @param pis BOMInputStream to guess XML encoding * @param lenient indicates if the charset encoding detection should be * relaxed. * @return the encoding to be used * @throws IOException thrown if there is a problem reading the stream. */ private String doRawStream(BOMInputStream bom, BOMInputStream pis, boolean lenient) throws IOException { String bomEnc = bom.getBOMCharsetName(); String xmlGuessEnc = pis.getBOMCharsetName(); String xmlEnc = getXmlProlog(pis, xmlGuessEnc); try { return calculateRawEncoding(bomEnc, xmlGuessEnc, xmlEnc); } catch (XmlStreamReaderException ex) { if (lenient) { return doLenientDetection(null, ex); } else { throw ex; } } }
protected void _testHttpInvalid(final String cT, final String bomEnc, final String streamEnc, final String prologEnc) throws Exception { final InputStream is = getXmlStream(bomEnc, prologEnc == null ? XML2 : XML3, streamEnc, prologEnc); try { (new XmlStreamReader(is, cT, false)).close();; fail("It should have failed for HTTP Content-type " + cT + ", BOM " + bomEnc + ", streamEnc " + streamEnc + " and prologEnc " + prologEnc); } catch (final IOException ex) { assertTrue(ex.getMessage().contains("Invalid encoding,")); } }
BOMInputStream pis = new BOMInputStream(bom, true, XML_GUESS_BYTES); if (conn instanceof HttpURLConnection || contentType != null) { this.encoding = doHttpStream(bom, pis, contentType, lenient); } else { this.encoding = doRawStream(bom, pis, lenient);