final int length = s.length(); for (int offset = 0; offset < length; ) { final int codepoint = s.codePointAt(offset); // do something with the codepoint offset += Character.charCount(codepoint); }
public static boolean langIndependentPuncCheck(String token) { boolean isNotWord = true; for (int offset = 0; offset < token.length(); ) { final int codepoint = token.codePointAt(offset); if (Character.isLetterOrDigit(codepoint)) { isNotWord = false; } offset += Character.charCount(codepoint); } return isNotWord; }
static int codePointIndexToCharIndex(String s, int codePointCount) { for (int i = 0, j = 0, length = s.length(), c; i < length; i += Character.charCount(c)) { if (j == codePointCount) { return i; } c = s.codePointAt(i); if ((Character.isISOControl(c) && c != '\n' && c != '\r') || c == Buffer.REPLACEMENT_CHARACTER) { return -1; } j++; } return s.length(); }
String str = "...."; int offset = 0, strLen = str.length(); while (offset < strLen) { int curChar = str.codePointAt(offset); offset += Character.charCount(curChar); // do something with curChar }
private int ltrimIndex( String value ) { int start = 0, length = value.length(); while ( start < length ) { int codePoint = value.codePointAt( start ); if ( !Character.isWhitespace( codePoint ) ) { break; } start += Character.charCount( codePoint ); } return start; }
/** Returns {@code s} with control characters and non-ASCII characters replaced with '?'. */ private static String toHumanReadableAscii(String s) { for (int i = 0, length = s.length(), c; i < length; i += Character.charCount(c)) { c = s.codePointAt(i); if (c > '\u001f' && c < '\u007f') continue; Buffer buffer = new Buffer(); buffer.writeUtf8(s, 0, i); buffer.writeUtf8CodePoint('?'); for (int j = i + Character.charCount(c); j < length; j += Character.charCount(c)) { c = s.codePointAt(j); buffer.writeUtf8CodePoint(c > '\u001f' && c < '\u007f' ? c : '?'); } return buffer.readUtf8(); } return s; }
@Override public int computeHash() { //NOTE that we are basing the hash code on code points instead of char[] values. if ( value.isEmpty() ) { return 0; } int h = 1, length = value.length(); for ( int offset = 0, codePoint; offset < length; offset += Character.charCount( codePoint ) ) { codePoint = value.codePointAt( offset ); h = 31 * h + codePoint; } return h; }
public static boolean isPrintable(final String data) { final int length = data.length(); for (int offset = 0; offset < length; ) { final int codePoint = data.codePointAt(offset); if (!isPrintable(codePoint)) { return false; } offset += Character.charCount(codePoint); } return true; }
private static int[] toCodePoints(char[] str) { int[] codePoints = new int[Character.codePointCount(str, 0, str.length)]; for (int i = 0, c = 0; i < str.length; c++) { int cp = Character.codePointAt(str, i); codePoints[c] = cp; i += Character.charCount(cp); } return codePoints; }
public static String toJavaIdentifier(String suggestion) { StringBuilder result = new StringBuilder(); for (int i = 0; i < suggestion.length(); ) { int codePoint = suggestion.codePointAt(i); if (i == 0 && !Character.isJavaIdentifierStart(codePoint) && Character.isJavaIdentifierPart(codePoint)) { result.append("_"); } int validCodePoint = Character.isJavaIdentifierPart(codePoint) ? codePoint : '_'; result.appendCodePoint(validCodePoint); i += Character.charCount(codePoint); } return result.toString(); }
private int compareNonNumeric(String str0, String str1, int[] pos) { // find the end of both non-numeric substrings int start0 = pos[0]; int ch0 = str0.codePointAt(pos[0]); pos[0] += Character.charCount(ch0); while (pos[0] < str0.length() && !isDigit(ch0 = str0.codePointAt(pos[0]))) { pos[0] += Character.charCount(ch0); } int start1 = pos[1]; int ch1 = str1.codePointAt(pos[1]); pos[1] += Character.charCount(ch1); while (pos[1] < str1.length() && !isDigit(ch1 = str1.codePointAt(pos[1]))) { pos[1] += Character.charCount(ch1); } // compare the substrings return String.CASE_INSENSITIVE_ORDER.compare(str0.substring(start0, pos[0]), str1.substring(start1, pos[1])); }
private static String canonicalizeForPath(String input, boolean alreadyEncoded) { int codePoint; for (int i = 0, limit = input.length(); i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (codePoint < 0x20 || codePoint >= 0x7f || PATH_SEGMENT_ALWAYS_ENCODE_SET.indexOf(codePoint) != -1 || (!alreadyEncoded && (codePoint == '/' || codePoint == '%'))) { // Slow path: the character at i requires encoding! Buffer out = new Buffer(); out.writeUtf8(input, 0, i); canonicalizeForPath(out, input, i, limit, alreadyEncoded); return out.readUtf8(); } } // Fast path: no characters required encoding. return input; }
/** Returns {@code s} with control characters and non-ASCII characters replaced with '?'. */ public static String toHumanReadableAscii(String s) { for (int i = 0, length = s.length(), c; i < length; i += Character.charCount(c)) { c = s.codePointAt(i); if (c > '\u001f' && c < '\u007f') continue; Buffer buffer = new Buffer(); buffer.writeUtf8(s, 0, i); for (int j = i; j < length; j += Character.charCount(c)) { c = s.codePointAt(j); buffer.writeUtf8CodePoint(c > '\u001f' && c < '\u007f' ? c : '?'); } return buffer.readUtf8(); } return s; }
static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = encoded.codePointAt(i); if (codePoint == '%' && i + 2 < limit) { int d1 = decodeHexDigit(encoded.charAt(i + 1)); int d2 = decodeHexDigit(encoded.charAt(i + 2)); if (d1 != -1 && d2 != -1) { out.writeByte((d1 << 4) + d2); i += 2; continue; } } else if (codePoint == '+' && plusIsSpace) { out.writeByte(' '); continue; } out.writeUtf8CodePoint(codePoint); } }
@Nullable Charset charset) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (codePoint < 0x20
static void percentDecode(Buffer out, String encoded, int pos, int limit, boolean plusIsSpace) { int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = encoded.codePointAt(i); if (codePoint == '%' && i + 2 < limit) { int d1 = decodeHexDigit(encoded.charAt(i + 1)); int d2 = decodeHexDigit(encoded.charAt(i + 2)); if (d1 != -1 && d2 != -1) { out.writeByte((d1 << 4) + d2); i += 2; continue; } } else if (codePoint == '+' && plusIsSpace) { out.writeByte(' '); continue; } out.writeUtf8CodePoint(codePoint); } }
@AndroidIncompatible // slow @GwtIncompatible // Doubles.tryParse public void testTryParseAllCodePoints() { // Exercise non-ASCII digit test cases and the like. char[] tmp = new char[2]; for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) { Character.toChars(i, tmp, 0); checkTryParse(String.copyValueOf(tmp, 0, Character.charCount(i))); } }
@AndroidIncompatible // slow @GwtIncompatible // Floats.tryParse public void testTryParseAllCodePoints() { // Exercise non-ASCII digit test cases and the like. char[] tmp = new char[2]; for (int i = Character.MIN_CODE_POINT; i < Character.MAX_CODE_POINT; i++) { Character.toChars(i, tmp, 0); checkTryParse(String.copyValueOf(tmp, 0, Character.charCount(i))); } }
private static void canonicalizeForPath(Buffer out, String input, int pos, int limit, boolean alreadyEncoded) { Buffer utf8Buffer = null; // Lazily allocated. int codePoint; for (int i = pos; i < limit; i += Character.charCount(codePoint)) { codePoint = input.codePointAt(i); if (alreadyEncoded && (codePoint == '\t' || codePoint == '\n' || codePoint == '\f' || codePoint == '\r')) { // Skip this character. } else if (codePoint < 0x20 || codePoint >= 0x7f || PATH_SEGMENT_ALWAYS_ENCODE_SET.indexOf(codePoint) != -1 || (!alreadyEncoded && (codePoint == '/' || codePoint == '%'))) { // Percent encode this character. if (utf8Buffer == null) { utf8Buffer = new Buffer(); } utf8Buffer.writeUtf8CodePoint(codePoint); while (!utf8Buffer.exhausted()) { int b = utf8Buffer.readByte() & 0xff; out.writeByte('%'); out.writeByte(HEX_DIGITS[(b >> 4) & 0xf]); out.writeByte(HEX_DIGITS[b & 0xf]); } } else { // This character doesn't need encoding. Just copy it over. out.writeUtf8CodePoint(codePoint); } } }