return bytes.length; if (index > bytes.length) throw new UnicodeException("Terminator not found."); throw new UnicodeException("Invalid unicode."); throw new UnicodeException("Invalid code point."); } else if (c1 <= 0xEF) throw new UnicodeException("Invalid unicode."); throw new UnicodeException("Invalid code point."); int c3 = 0xff & bytes[index++]; if (c3 < 0x80 || c3 > 0xBF) throw new UnicodeException("Invalid code point."); } else if (c1 <= 0xF4) throw new UnicodeException("Invalid unicode."); throw new UnicodeException("Invalid code point."); int c3 = 0xff & bytes[index++]; if (c3 < 0x80 || c3 > 0xBF) throw new UnicodeException("Invalid code point."); int c4 = 0xff & bytes[index++]; if (c4 < 0x80 || c4 > 0xBF) throw new UnicodeException("Invalid code point."); } else throw new UnicodeException("Invalid code point.");
return bytes.length; if (index > bytes.length) throw new UnicodeException("Terminator not found."); throw new UnicodeException("Invalid unicode."); throw new UnicodeException("Invalid code point."); } else if (c1 <= 0xEF) throw new UnicodeException("Invalid unicode."); throw new UnicodeException("Invalid code point."); int c3 = 0xff & bytes[index++]; if (c3 < 0x80 || c3 > 0xBF) throw new UnicodeException("Invalid code point."); } else if (c1 <= 0xF4) throw new UnicodeException("Invalid unicode."); throw new UnicodeException("Invalid code point."); int c3 = 0xff & bytes[index++]; if (c3 < 0x80 || c3 > 0xBF) throw new UnicodeException("Invalid code point."); int c4 = 0xff & bytes[index++]; if (c4 < 0x80 || c4 > 0xBF) throw new UnicodeException("Invalid code point."); } else throw new UnicodeException("Invalid code point.");
public int findEnd(byte bytes[], int index, boolean includeTerminator) throws UnicodeException { // http://en.wikipedia.org/wiki/UTF-16/UCS-2 while (true) { if (index == bytes.length) return bytes.length; if (index > bytes.length - 1) throw new UnicodeException("Terminator not found."); int c1 = 0xff & bytes[index++]; int c2 = 0xff & bytes[index++]; int msb1 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c1 : c2; if (c1 == 0 && c2 == 0) { return includeTerminator ? index : index - 2; } else if (msb1 >= 0xD8) { if (index > bytes.length - 1) throw new UnicodeException("Terminator not found."); // second word. int c3 = 0xff & bytes[index++]; int c4 = 0xff & bytes[index++]; int msb2 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c3 : c4; if (msb2 < 0xDC) throw new UnicodeException("Invalid code point."); } } } }
public int findEnd(byte bytes[], int index, boolean includeTerminator) throws UnicodeException { // http://en.wikipedia.org/wiki/UTF-16/UCS-2 while (true) { if (index == bytes.length) return bytes.length; if (index > bytes.length - 1) throw new UnicodeException("Terminator not found."); int c1 = 0xff & bytes[index++]; int c2 = 0xff & bytes[index++]; int msb1 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c1 : c2; if (c1 == 0 && c2 == 0) { return includeTerminator ? index : index - 2; } else if (msb1 >= 0xD8) { if (index > bytes.length - 1) throw new UnicodeException("Terminator not found."); // second word. int c3 = 0xff & bytes[index++]; int c4 = 0xff & bytes[index++]; int msb2 = byteOrder == BYTE_ORDER_BIG_ENDIAN ? c3 : c4; if (msb2 < 0xDC) throw new UnicodeException("Invalid code point."); } } } }
public static UnicodeUtils getInstance(int charEncodingCode) throws UnicodeException { switch (charEncodingCode) { case CHAR_ENCODING_CODE_ISO_8859_1: return new UnicodeMetricsASCII(); case CHAR_ENCODING_CODE_UTF_8: // Debug.debug("CHAR_ENCODING_CODE_UTF_8"); return new UnicodeMetricsUTF8(); case CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_WITH_BOM: case CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_WITH_BOM: // Debug.debug("CHAR_ENCODING_CODE_UTF_16_WITH_BOM"); return new UnicodeMetricsUTF16WithBOM(); case CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_NO_BOM: return new UnicodeMetricsUTF16NoBOM(BYTE_ORDER_BIG_ENDIAN); case CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_NO_BOM: return new UnicodeMetricsUTF16NoBOM(BYTE_ORDER_LITTLE_ENDIAN); default: throw new UnicodeException("Unknown char encoding code: " + charEncodingCode); } }
public static UnicodeUtils getInstance(int charEncodingCode) throws UnicodeException { switch (charEncodingCode) { case CHAR_ENCODING_CODE_ISO_8859_1: return new UnicodeMetricsASCII(); case CHAR_ENCODING_CODE_UTF_8: // Debug.debug("CHAR_ENCODING_CODE_UTF_8"); return new UnicodeMetricsUTF8(); case CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_WITH_BOM: case CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_WITH_BOM: // Debug.debug("CHAR_ENCODING_CODE_UTF_16_WITH_BOM"); return new UnicodeMetricsUTF16WithBOM(); case CHAR_ENCODING_CODE_UTF_16_BIG_ENDIAN_NO_BOM: return new UnicodeMetricsUTF16NoBOM(BYTE_ORDER_BIG_ENDIAN); case CHAR_ENCODING_CODE_UTF_16_LITTLE_ENDIAN_NO_BOM: return new UnicodeMetricsUTF16NoBOM(BYTE_ORDER_LITTLE_ENDIAN); default: throw new UnicodeException("Unknown char encoding code: " + charEncodingCode); } }
public int findEnd(byte bytes[], int index, boolean includeTerminator) throws UnicodeException { // http://en.wikipedia.org/wiki/UTF-16/UCS-2 if (index >= bytes.length - 1) throw new UnicodeException("Missing BOM."); int c1 = 0xff & bytes[index++]; int c2 = 0xff & bytes[index++]; if (c1 == 0xFF && c2 == 0xFE) byteOrder = BYTE_ORDER_LITTLE_ENDIAN; else if (c1 == 0xFE && c2 == 0xFF) byteOrder = BYTE_ORDER_BIG_ENDIAN; else throw new UnicodeException("Invalid byte order mark."); return super.findEnd(bytes, index, includeTerminator); } }
public int findEnd(byte bytes[], int index, boolean includeTerminator) throws UnicodeException { // http://en.wikipedia.org/wiki/UTF-16/UCS-2 if (index >= bytes.length - 1) throw new UnicodeException("Missing BOM."); int c1 = 0xff & bytes[index++]; int c2 = 0xff & bytes[index++]; if (c1 == 0xFF && c2 == 0xFE) byteOrder = BYTE_ORDER_LITTLE_ENDIAN; else if (c1 == 0xFE && c2 == 0xFF) byteOrder = BYTE_ORDER_BIG_ENDIAN; else throw new UnicodeException("Invalid byte order mark."); return super.findEnd(bytes, index, includeTerminator); } }