static int spanOneBack(final UnicodeSet set, CharSequence s, int length) { char c = s.charAt(length - 1); if (c >= 0xdc00 && c <= 0xdfff && length >= 2) { char c2 = s.charAt(length - 2); if (com.ibm.icu.text.UTF16.isLeadSurrogate(c2)) { int supplementary = Character.toCodePoint(c2, c); return set.contains(supplementary) ? 2 : -2; } } return set.contains(c) ? 1 : -1; }
/** * Reverses a UTF16 format Unicode string and replaces source's content with it. This method * will reverse surrogate characters correctly, instead of blindly reversing every character. * <p> * Examples:<br> * UTF16.reverse(new StringBuffer( "Supplementary characters \ud800\udc00\ud801\udc01"))<br> * returns "\ud801\udc01\ud800\udc00 sretcarahc yratnemelppuS". * * @param source The source StringBuffer that contains UTF16 format Unicode string to be reversed * @return a modified source with reversed UTF16 format Unicode string. * @stable ICU 2.6 */ public static StringBuffer reverse(StringBuffer source) { int length = source.length(); StringBuffer result = new StringBuffer(length); for (int i = length; i-- > 0;) { char ch = source.charAt(i); if (isTrailSurrogate(ch) && i > 0) { char ch2 = source.charAt(i - 1); if (isLeadSurrogate(ch2)) { result.append(ch2); result.append(ch); --i; continue; } } result.append(ch); } return result; }
if (isLeadSurrogate(ch) && ((result + 1) < limit) && isTrailSurrogate(source[result + 1])) { result++;
char ch = source.charAt(offset16); if (isSurrogate(ch)) { if (isLeadSurrogate(ch)) { if (++offset16 < source.length() && isTrailSurrogate(source.charAt(offset16))) { return LEAD_SURROGATE_BOUNDARY; if (offset16 >= 0 && isLeadSurrogate(source.charAt(offset16))) { return TRAIL_SURROGATE_BOUNDARY;
char ch = source.charAt(offset16); if (isSurrogate(ch)) { if (isLeadSurrogate(ch)) { if (++offset16 < source.length() && isTrailSurrogate(source.charAt(offset16))) { return LEAD_SURROGATE_BOUNDARY; if (offset16 >= 0 && isLeadSurrogate(source.charAt(offset16))) { return TRAIL_SURROGATE_BOUNDARY;
/** * Get the value associated with a pair of surrogates. * @param lead a lead surrogate * @param trail a trail surrogate */ public final int getSurrogateValue(char lead, char trail) { if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) { throw new IllegalArgumentException( "Argument characters do not form a supplementary character"); } // get fold position for the next trail surrogate int offset = getSurrogateOffset(lead, trail); // get the real data from the folded lead/trail units if (offset > 0) { return m_data_[offset]; } // return m_initialValue_ if there is an error return m_initialValue_; }
/** * Set a code point into a UTF16 position. Adjusts target according if we are replacing a * non-supplementary codepoint with a supplementary and vice versa. * * @param target Stringbuffer * @param offset16 UTF16 position to insert into * @param char32 Code point * @stable ICU 2.1 */ public static void setCharAt(StringBuffer target, int offset16, int char32) { int count = 1; char single = target.charAt(offset16); if (isSurrogate(single)) { // pairs of the surrogate with offset16 at the lead char found if (isLeadSurrogate(single) && (target.length() > offset16 + 1) && isTrailSurrogate(target.charAt(offset16 + 1))) { count++; } else { // pairs of the surrogate with offset16 at the trail char // found if (isTrailSurrogate(single) && (offset16 > 0) && isLeadSurrogate(target.charAt(offset16 - 1))) { offset16--; count++; } } } target.replace(offset16, offset16 + count, valueOf(char32)); }
int strLength = str.length(); if (!isTrailSurrogate(str.charAt(0)) && !isLeadSurrogate(str.charAt(strLength - 1))) { return source.lastIndexOf(str); if (result >= 0) { if (isLeadSurrogate(str.charAt(strLength - 1)) && (result < source.length() - 1) && isTrailSurrogate(source.charAt(result + strLength + 1))) { return lastIndexOf(source, str, result - 1); && isLeadSurrogate(source.charAt(result - 1))) { return lastIndexOf(source, str, result - 1);
private final CoderResult decodeTrail(ByteBuffer source, CharBuffer target, IntBuffer offsets, char lead) { if (!UTF16.isLeadSurrogate(lead)) {
/** * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See * the {@link UTF16 class description} for notes on roundtripping. * * @param source The UTF-16 string * @param offset32 UTF-32 offset * @return UTF-16 offset * @exception IndexOutOfBoundsException If offset32 is out of bounds. * @stable ICU 2.1 */ public static int findOffsetFromCodePoint(String source, int offset32) { char ch; int size = source.length(), result = 0, count = offset32; if (offset32 < 0 || offset32 > size) { throw new StringIndexOutOfBoundsException(offset32); } while (result < size && count > 0) { ch = source.charAt(result); if (isLeadSurrogate(ch) && ((result + 1) < size) && isTrailSurrogate(source.charAt(result + 1))) { result++; } count--; result++; } if (count != 0) { throw new StringIndexOutOfBoundsException(offset32); } return result; }
/** * Returns the UTF-16 offset that corresponds to a UTF-32 offset. Used for random access. See * the {@link UTF16 class description} for notes on roundtripping. * * @param source The UTF-16 string buffer * @param offset32 UTF-32 offset * @return UTF-16 offset * @exception IndexOutOfBoundsException If offset32 is out of bounds. * @stable ICU 2.1 */ public static int findOffsetFromCodePoint(StringBuffer source, int offset32) { char ch; int size = source.length(), result = 0, count = offset32; if (offset32 < 0 || offset32 > size) { throw new StringIndexOutOfBoundsException(offset32); } while (result < size && count > 0) { ch = source.charAt(result); if (isLeadSurrogate(ch) && ((result + 1) < size) && isTrailSurrogate(source.charAt(result + 1))) { result++; } count--; result++; } if (count != 0) { throw new StringIndexOutOfBoundsException(offset32); } return result; }
hadLeadSurrogate = false; // count valid trail as zero } else { hadLeadSurrogate = isLeadSurrogate(ch);
hadLeadSurrogate = false; // count valid trail as zero } else { hadLeadSurrogate = isLeadSurrogate(ch);
/** * Returns the code point at index, and increments to the next code point (post-increment semantics). If index does * not point to a valid surrogate pair, the behavior is the same as <code>next()</code>. Otherwise the iterator is * incremented past the surrogate pair, and the code point represented by the pair is returned. * * @return the next codepoint in text, or DONE if the index is at the limit of the text. * @stable ICU 2.4 */ @Override public int nextCodePoint() { int ch1 = next(); if (UTF16.isLeadSurrogate((char) ch1)) { int ch2 = next(); if (UTF16.isTrailSurrogate((char) ch2)) { return Character.toCodePoint((char) ch1, (char) ch2); } else if (ch2 != DONE) { // unmatched surrogate so back out previous(); } } return ch1; }
/** * Retreat to the start of the previous code point in the text, and return it (pre-decrement semantics). If the * index is not preceeded by a valid surrogate pair, the behavior is the same as <code>previous()</code>. Otherwise * the iterator is decremented to the start of the surrogate pair, and the code point represented by the pair is * returned. * * @return the previous code point in the text, or DONE if the new index is before the start of the text. * @stable ICU 2.4 */ public int previousCodePoint() { int ch1 = previous(); if (UTF16.isTrailSurrogate((char) ch1)) { int ch2 = previous(); if (UTF16.isLeadSurrogate((char) ch2)) { return Character.toCodePoint((char) ch2, (char) ch1); } else if (ch2 != DONE) { // unmatched trail surrogate so back out next(); } } return ch1; }
/** * Returns the current codepoint * @return current codepoint */ @Override public int currentCodePoint(){ // cannot use charAt due to it different // behaviour when index is pointing at a // trail surrogate, check for surrogates int ch = current(); if(UTF16.isLeadSurrogate((char)ch)){ // advance the index to get the next code point next(); // due to post increment semantics current() after next() // actually returns the next char which is what we want int ch2 = current(); // current should never change the current index so back off previous(); if(UTF16.isTrailSurrogate((char)ch2)){ // we found a surrogate pair return Character.toCodePoint((char)ch, (char)ch2); } } return ch; }
/** * Returns the codepoint at the current index. If the current index is invalid, DONE is returned. If the current * index points to a lead surrogate, and there is a following trail surrogate, then the code point is returned. * Otherwise, the code unit at index is returned. Index is not changed. * * @return current codepoint * @stable ICU 2.4 */ public int currentCodePoint() { int ch = current(); if (UTF16.isLeadSurrogate((char) ch)) { // advance the index to get the // next code point next(); // due to post increment semantics // current() after next() actually // returns the char we want int ch2 = current(); // current should never change // the current index so back off previous(); if (UTF16.isTrailSurrogate((char) ch2)) { // we found a surrogate pair // return the codepoint return Character.toCodePoint((char) ch, (char) ch2); } } return ch; }
if (!UTF16.isLeadSurrogate(lead)) { fromUChar32 = lead; return CoderResult.malformedForLength(1);
public static int current32(CharacterIterator ci) { char lead = ci.current(); int retVal = lead; if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) { return retVal; } if (UTF16.isLeadSurrogate(lead)) { int trail = (int)ci.next(); ci.previous(); if (UTF16.isTrailSurrogate((char)trail)) { retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } } else { if (lead == CharacterIterator.DONE) { if (ci.getIndex() >= ci.getEndIndex()) { retVal = DONE32; } } } return retVal; } }
public static int previous32(CharacterIterator ci) { if (ci.getIndex() <= ci.getBeginIndex()) { return DONE32; } char trail = ci.previous(); int retVal = trail; if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) { char lead = ci.previous(); if (UTF16.isLeadSurrogate(lead)) { retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } else { ci.next(); } } return retVal; }