/** * Indicates whether the stream contains one of the specified BOMs. * * @return true if the stream has one of the specified BOMs, otherwise false if it does not * @throws IOException * if an error reading the first bytes of the stream occurs */ public boolean hasBOM() throws IOException { return getBOM() != null; }
/** * This method reads and either preserves or skips the first bytes in the stream. It behaves like the single-byte * <code>read()</code> method, either returning a valid byte or -1 to indicate that the initial bytes have been * processed already. * * @return the byte read (excluding BOM) or -1 if the end of stream * @throws IOException * if an I/O error occurs */ private int readFirstBytes() throws IOException { getBOM(); return fbIndex < fbLength ? firstBytes[fbIndex++] : EOF; }
/** * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}. * * @return The BOM charset Name or null if no BOM found * @throws IOException * if an error reading the first bytes of the stream occurs * */ public String getBOMCharsetName() throws IOException { getBOM(); return byteOrderMark == null ? null : byteOrderMark.getCharsetName(); }
/** * Indicates whether the stream contains the specified BOM. * * @param bom * The BOM to check for * @return true if the stream has the specified BOM, otherwise false if it does not * @throws IllegalArgumentException * if the BOM is not one the stream is configured to detect * @throws IOException * if an error reading the first bytes of the stream occurs */ public boolean hasBOM(final ByteOrderMark bom) throws IOException { if (!boms.contains(bom)) { throw new IllegalArgumentException("Stream not configure to detect " + bom); } getBOM(); return byteOrderMark != null && byteOrderMark.equals(bom); }
final ByteOrderMark bom = bomStream.getBOM(); final String defaultEncoding = StandardCharsets.UTF_8.name(); final String charsetName = bom == null ? defaultEncoding : bom.getCharsetName();
@Test public void testReadEmpty() throws Exception { final byte[] data = new byte[] {}; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false)); assertEquals(-1, in.read()); assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); in.close(); }
@Test public void testReadSmall() throws Exception { final byte[] data = new byte[] { 'A', 'B' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false)); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals(-1, in.read()); assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); in.close(); }
@Test public void testReadWithBOMUtf8() throws Exception { final byte[] data = "ABC".getBytes(StandardCharsets.UTF_8); final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), ByteOrderMark.UTF_8); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_16BE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-16BE } in.close(); }
@Test public void testGetBOMFirstThenRead() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); in.close(); }
String defaultEncoding = "UTF-8"; InputStream inputStream = new FileInputStream(someFileWithPossibleUtf8Bom); try { BOMInputStream bOMInputStream = new BOMInputStream(inputStream); ByteOrderMark bom = bOMInputStream.getBOM(); String charsetName = bom == null ? defaultEncoding : bom.getCharsetName(); InputStreamReader reader = new InputStreamReader(new BufferedInputStream(bOMInputStream), charsetName); //use reader } finally { inputStream.close(); }
@Test public void testReadWithoutBOM() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, false)); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertFalse("hasBOM()", in.hasBOM()); assertFalse("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertNull("getBOM", in.getBOM()); in.close(); }
@Test public void testReadWithMultipleBOM() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_8); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertFalse("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); in.close(); }
@Test public void testReadWithBOMUtf16Be() throws Exception { final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16BE); final BOMInputStream in = new BOMInputStream(createUtf16BeDataStream(data, true), ByteOrderMark.UTF_16BE); assertEquals(0, in.read()); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-16BE)", in.hasBOM(ByteOrderMark.UTF_16BE)); assertEquals("getBOM", ByteOrderMark.UTF_16BE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_16LE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-16LE } in.close(); }
@Test public void testReadWithBOMUtf16Le() throws Exception { final byte[] data = "ABC".getBytes(StandardCharsets.UTF_16LE); final BOMInputStream in = new BOMInputStream(createUtf16LeDataStream(data, true), ByteOrderMark.UTF_16LE); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(0, in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-16LE)", in.hasBOM(ByteOrderMark.UTF_16LE)); assertEquals("getBOM", ByteOrderMark.UTF_16LE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_16BE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-16BE } in.close(); }
@Test public void testGetBOMFirstThenReadInclude() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); assertEquals(0xEF, in.read()); assertEquals(0xBB, in.read()); assertEquals(0xBF, in.read()); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); in.close(); }
@Test public void testReadWithBOMInclude() throws Exception { final byte[] data = new byte[] { 'A', 'B', 'C' }; final BOMInputStream in = new BOMInputStream(createUtf8DataStream(data, true), true); assertEquals(0xEF, in.read()); assertEquals(0xBB, in.read()); assertEquals(0xBF, in.read()); assertEquals('A', in.read()); assertEquals('B', in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-8)", in.hasBOM(ByteOrderMark.UTF_8)); assertEquals("getBOM", ByteOrderMark.UTF_8, in.getBOM()); in.close(); }
@Test public void testReadWithBOMUtf32Le() throws Exception { Assume.assumeTrue(Charset.isSupported("UTF_32LE")); final byte[] data = "ABC".getBytes("UTF_32LE"); final BOMInputStream in = new BOMInputStream(createUtf32LeDataStream(data, true), ByteOrderMark.UTF_32LE); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-32LE)", in.hasBOM(ByteOrderMark.UTF_32LE)); assertEquals("getBOM", ByteOrderMark.UTF_32LE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_32BE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-32BE } in.close(); }
@Test public void testReadWithBOMUtf32Be() throws Exception { Assume.assumeTrue(Charset.isSupported("UTF_32BE")); final byte[] data = "ABC".getBytes("UTF_32BE"); final BOMInputStream in = new BOMInputStream(createUtf32BeDataStream(data, true), ByteOrderMark.UTF_32BE); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('A', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('B', in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals(0, in.read()); assertEquals('C', in.read()); assertEquals(-1, in.read()); assertTrue("hasBOM()", in.hasBOM()); assertTrue("hasBOM(UTF-32BE)", in.hasBOM(ByteOrderMark.UTF_32BE)); assertEquals("getBOM", ByteOrderMark.UTF_32BE, in.getBOM()); try { in.hasBOM(ByteOrderMark.UTF_32LE); fail("Expected IllegalArgumentException"); } catch (final IllegalArgumentException e) { // expected - not configured for UTF-32LE } in.close(); }
/** * This method reads and either preserves or skips the first bytes in the * stream. It behaves like the single-byte <code>read()</code> method, * either returning a valid byte or -1 to indicate that the initial bytes * have been processed already. * @return the byte read (excluding BOM) or -1 if the end of stream * @throws IOException if an I/O error occurs */ private int readFirstBytes() throws IOException { getBOM(); return fbIndex < fbLength ? firstBytes[fbIndex++] : -1; }
/** * Return the BOM charset Name - {@link ByteOrderMark#getCharsetName()}. * * @return The BOM charset Name or null if no BOM found * @throws IOException * if an error reading the first bytes of the stream occurs * */ public String getBOMCharsetName() throws IOException { getBOM(); return byteOrderMark == null ? null : byteOrderMark.getCharsetName(); }