@Override public int char32At(int pos) { return UTF16.charAt(buffer, 0, length, pos); }
@Override int codePointAt0(StringBuffer buffer, int offset) { return UTF16.charAt(buffer, offset); }
/** * Return the 32-bit code point at the given 16-bit offset into * the text. This assumes the text is stored as 16-bit code units * with surrogate pairs intermixed. If the offset of a leading or * trailing code unit of a surrogate pair is given, return the * code point of the surrogate pair. * @param offset an integer between 0 and <code>length()</code>-1 * inclusive * @return 32-bit code point of text at given offset * @stable ICU 2.0 */ public int char32At(int offset) { return UTF16.charAt(buf, offset); }
@Override int codePointAt0(StringBuffer buffer, int offset) { return UTF16.charAt(buffer, offset); }
protected int utfCharAt(int pos) { return UTF16.charAt((String) charSeq, pos); }
protected int utfCharAt(int pos) { return UTF16.charAt((StringBuffer) charSeq, pos); }
public int char32At(int pos) { return UTF16.charAt(buffer, 0, length, pos); }
/** * Returns the current 32-bit code point without parsing escapes, parsing * variables, or skipping whitespace. * @return the current 32-bit code point */ private int _current() { if (buf != null) { return UTF16.charAt(buf, 0, buf.length, bufPos); } else { int i = pos.getIndex(); return (i < text.length()) ? UTF16.charAt(text, i) : DONE; } }
public ICU() { // force attempt to access ICU4J API UTF16.charAt(getClass().getName(), 1); }
public ICU() { // force attempt to access ICU4J API UTF16.charAt(getClass().getName(), 1); }
/** * Internal method. Returns 8-bit index value for this rule. * This is the low byte of the first character of the key, * unless the first character of the key is a set. If it's a * set, or otherwise can match multiple keys, the index value is -1. */ final int getIndexValue() { if (anteContextLength == pattern.length()) { // A pattern with just ante context {such as foo)>bar} can // match any key. return -1; } int c = UTF16.charAt(pattern, anteContextLength); return data.lookupMatcher(c) == null ? (c & 0xFF) : -1; }
/** * Implement UnicodeMatcher */ @Override public boolean matchesIndexValue(int v) { if (pattern.length() == 0) { return true; } int c = UTF16.charAt(pattern, 0); UnicodeMatcher m = data.lookupMatcher(c); return (m == null) ? ((c & 0xFF) == v) : m.matchesIndexValue(v); }
/** * Skips over a run of zero or more bidi marks at pos in text. */ private static int skipBidiMarks(String text, int pos) { while (pos < text.length()) { int c = UTF16.charAt(text, pos); if (!isBidiMark(c)) { break; } pos += UTF16.getCharCount(c); } return pos; }
/** * Skips over a run of zero or more Pattern_White_Space characters at pos in text. */ private static int skipPatternWhiteSpace(String text, int pos) { while (pos < text.length()) { int c = UTF16.charAt(text, pos); if (!PatternProps.isWhiteSpace(c)) { break; } pos += UTF16.getCharCount(c); } return pos; }
/** * Skips over a run of zero or more isUWhiteSpace() characters at pos in text. */ private static int skipUWhiteSpace(String text, int pos) { while (pos < text.length()) { int c = UTF16.charAt(text, pos); if (!UCharacter.isUWhiteSpace(c)) { break; } pos += UTF16.getCharCount(c); } return pos; }
/** * Return true if this half looks like valid input, that is, does not * contain functions or other special output-only elements. */ public boolean isValidInput(TransliteratorParser parser) { for (int i=0; i<text.length(); ) { int c = UTF16.charAt(text, i); i += UTF16.getCharCount(c); if (!parser.parseData.isMatcher(c)) { return false; } } return true; } }
/** * Return true if this half looks like valid output, that is, does not * contain quantifiers or other special input-only elements. */ public boolean isValidOutput(TransliteratorParser parser) { for (int i=0; i<text.length(); ) { int c = UTF16.charAt(text, i); i += UTF16.getCharCount(c); if (!parser.parseData.isReplacer(c)) { return false; } } return true; }
/** * Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} * If this set already any particular character, it has no effect on that character. * @param s the source string * @return this object, for chaining * @stable ICU 2.0 */ public final UnicodeSet addAll(CharSequence s) { checkFrozen(); int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = UTF16.charAt(s, i); add_unchecked(cp, cp); } return this; }
/** * Implementation of UnicodeMatcher API. Union the set of all * characters that may be matched by this object into the given * set. * @param toUnionTo the set into which to union the source characters */ @Override public void addMatchSetTo(UnicodeSet toUnionTo) { int ch; for (int i=0; i<pattern.length(); i+=UTF16.getCharCount(ch)) { ch = UTF16.charAt(pattern, i); UnicodeMatcher matcher = data.lookupMatcher(ch); if (matcher == null) { toUnionTo.add(ch); } else { matcher.addMatchSetTo(toUnionTo); } } }
private int calcStatus(int current, int next) { if (current == BreakIterator.DONE || next == BreakIterator.DONE) return RuleBasedBreakIterator.WORD_NONE; int begin = start + current; int end = start + next; int codepoint; for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) { codepoint = UTF16.charAt(text, 0, end, begin); if (UCharacter.isDigit(codepoint)) return RuleBasedBreakIterator.WORD_NUMBER; else if (UCharacter.isLetter(codepoint)) { // TODO: try to separately specify ideographic, kana? // [currently all bundled as letter for this case] return RuleBasedBreakIterator.WORD_LETTER; } } return RuleBasedBreakIterator.WORD_NONE; }