/** * True if this codepoint is a high surrogate */ public boolean isHighSurrogate() { return CharUtils.isHighSurrogate((char)value); }
/** * True if the two chars represent a surrogate pair */ public static boolean isSurrogatePair(char high, char low) { return isHighSurrogate(high) && isLowSurrogate(low); }
/** * Converts the high and low surrogate into a supplementary codepoint */ public static Codepoint toSupplementary(char high, char low) { if (!isHighSurrogate(high)) throw new IllegalArgumentException("Invalid High Surrogate"); if (!isLowSurrogate(low)) throw new IllegalArgumentException("Invalid Low Surrogate"); return new Codepoint(((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000); }
/** * Returns true if the char at the specified index is a high surrogate */ public boolean isHigh(int index) { if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index); return CharUtils.isHighSurrogate(get(index)); }
public static String encode(CharSequence s, String enc, Filter... filters) throws UnsupportedEncodingException { if (s == null) return null; StringBuilder sb = new StringBuilder(); for (int n = 0; n < s.length(); n++) { char c = s.charAt(n); if (!CharUtils.isHighSurrogate(c) && check(c, filters)) { encode(sb, String.valueOf(c).getBytes(enc)); } else if (CharUtils.isHighSurrogate(c)) { if (check(c, filters)) { StringBuilder buf = new StringBuilder(); buf.append(c); buf.append(s.charAt(++n)); byte[] b = buf.toString().getBytes(enc); encode(sb, b); } else { sb.append(c); sb.append(s.charAt(++n)); } } else { sb.append(c); } } return sb.toString(); }
/** * Set the character at a given location, automatically dealing with surrogate pairs */ public static void setChar(CharSequence s, int i, int c) { if (!(s instanceof StringBuilder) && !(s instanceof StringBuffer)) { setChar(new StringBuilder(s), i, c); } else { int l = 1; char ch = s.charAt(i); boolean high = isHighSurrogate(ch); boolean low = isLowSurrogate(ch); if (high || low) { if (high && (i + 1) < s.length() && isLowSurrogate(s.charAt(i + 1))) l++; else { if (low && i > 0 && isHighSurrogate(s.charAt(i - 1))) { i--; l++; } } } if (s instanceof StringBuffer) ((StringBuffer)s).replace(i, i + l, toString(c)); else if (s instanceof StringBuilder) ((StringBuilder)s).replace(i, i + l, toString(c)); } }
/** * Insert a codepoint into the buffer, automatically dealing with surrogate pairs */ public static void insert(CharSequence s, int i, int c) { if (!(s instanceof StringBuilder) && !(s instanceof StringBuffer)) { insert(new StringBuilder(s), i, c); } else { if (i > 0 && i < s.length()) { char ch = s.charAt(i); boolean low = isLowSurrogate(ch); if (low) { if (low && isHighSurrogate(s.charAt(i - 1))) { i--; } } } if (s instanceof StringBuffer) ((StringBuffer)s).insert(i, toString(c)); else if (s instanceof StringBuilder) ((StringBuilder)s).insert(i, toString(c)); } }
private static void processChars(StringBuilder sb, CharBuffer chars, String enc, Filter... filters) throws IOException { for (int n = 0; n < chars.length(); n++) { char c = chars.charAt(n); if (!CharUtils.isHighSurrogate(c) && check(c, filters)) { encode(sb, String.valueOf(c).getBytes(enc)); } else if (CharUtils.isHighSurrogate(c)) { if (check(c, filters)) { StringBuilder buf = new StringBuilder(); buf.append(c); buf.append(chars.charAt(++n)); byte[] b = buf.toString().getBytes(enc); encode(sb, b); } else { sb.append(c); sb.append(chars.charAt(++n)); } } else { sb.append(c); } } }
/** * Return the codepoint at the given location, automatically dealing with surrogate pairs */ public static Codepoint codepointAt(CharSequence s, int i) { char c = s.charAt(i); if (c < 0xD800 || c > 0xDFFF) return new Codepoint(c); if (isHighSurrogate(c)) { if (s.length() != i) { char low = s.charAt(i + 1); if (isLowSurrogate(low)) return toSupplementary(c, low); } } else if (isLowSurrogate(c)) { if (i >= 1) { char high = s.charAt(i - 1); if (isHighSurrogate(high)) return toSupplementary(high, c); } } return new Codepoint(c); }
/** * Return the codepoint at the given location, automatically dealing with surrogate pairs */ public static Codepoint codepointAt(String s, int i) { char c = s.charAt(i); if (c < 0xD800 || c > 0xDFFF) return new Codepoint(c); if (isHighSurrogate(c)) { if (s.length() != i) { char low = s.charAt(i + 1); if (isLowSurrogate(low)) return toSupplementary(c, low); } } else if (isLowSurrogate(c)) { if (i >= 1) { char high = s.charAt(i - 1); if (isHighSurrogate(high)) return toSupplementary(high, c); } } return new Codepoint(c); }
private boolean isNextSurrogate() { if (!hasNext()) return false; char c = get(position()); return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c); }
/** * True if all the characters in chars are within the set [low,high] */ public static boolean inRange(char[] chars, int low, int high) { for (int i = 0; i < chars.length; i++) { char n = chars[i]; Codepoint cp = (isHighSurrogate(n) && i + 1 < chars.length && isLowSurrogate(chars[i + 1])) ? toSupplementary(n, chars[i++]) : new Codepoint(n); int c = cp.getValue(); if (c < low || c > high) return false; } return true; }
/** * Peek the specified chars in the iterator. If the codepoint is not supplemental, the char array will have a single * member. If the codepoint is supplemental, the char array will have two members, representing the high and low * surrogate chars */ private char[] peekChars(int pos) throws InvalidCharacterException { if (pos < 0 || pos >= limit()) return null; char c1 = get(pos); if (CharUtils.isHighSurrogate(c1) && pos < limit()) { char c2 = get(pos + 1); if (CharUtils.isLowSurrogate(c2)) { return new char[] {c1, c2}; } else { throw new InvalidCharacterException(c2); } } else if (CharUtils.isLowSurrogate(c1) && pos > 1) { char c2 = get(pos - 1); if (CharUtils.isHighSurrogate(c2)) { return new char[] {c2, c1}; } else { throw new InvalidCharacterException(c2); } } else return new char[] {c1}; }
/** * Return the next chars. If the codepoint is not supplemental, the char array will have a single member. If the * codepoint is supplemental, the char array will have two members, representing the high and low surrogate chars */ public char[] nextChars() throws InvalidCharacterException { if (hasNext()) { if (isNextSurrogate()) { char c1 = get(); if (CharUtils.isHighSurrogate(c1) && position() < limit()) { char c2 = get(); if (CharUtils.isLowSurrogate(c2)) { return new char[] {c1, c2}; } else { throw new InvalidCharacterException(c2); } } else if (CharUtils.isLowSurrogate(c1) && position() > 0) { char c2 = get(position() - 2); if (CharUtils.isHighSurrogate(c2)) { return new char[] {c1, c2}; } else { throw new InvalidCharacterException(c2); } } } return new char[] {get()}; } return null; }