public boolean accept(int codePoint) { return Character.isBmpCodePoint(codePoint); } };
public boolean accept(int codePoint) { return Character.isBmpCodePoint(codePoint); } };
public boolean accept(int codePoint) { return Character.isBmpCodePoint(codePoint); } };
/** * Determines whether the code point is in the Basic Multilingual Plane (BMP) and can thus be represented by a single <code>char</code>. * @return <code>true</code> if the code point is in the BMP. * @see #isBmpCodePoint() */ public boolean isBmpCodePoint() { return Character.isBmpCodePoint(codePoint); }
public boolean accept(int codePoint) { return Character.isBmpCodePoint(codePoint); } };
public boolean accept(int codePoint) { return Character.isBmpCodePoint(codePoint); } };
@Override public int codeToMbcLength(int code) { if (Character.isBmpCodePoint(code)) { return 2; } return 4; }
IntStream converted = utf8mb4string.codePoints().map(cp -> Character.isBmpCodePoint(cp) ? cp : '\uFFFD'); String str = converted.collect(StringBuilder::new, (buf, ch) -> buf.append((char) ch), StringBuilder::append).toString();
private boolean isValidUTF8(final String string) { for (int i = 0; i < string.length(); i++) { final char c = string.charAt(i); if (!Character.isBmpCodePoint(c)) { return false; } } return true; }
/** * Returns the string representation of a single code point. * * @param codePoint * the code point * @return the result string */ public static String fromCodePoint(int codePoint) { if (Character.isBmpCodePoint(codePoint)) { return String.valueOf((char) codePoint); } return String.valueOf(Character.toChars(codePoint)); }
private void appendCodeUnit(int codeUnit) { assert Character.isBmpCodePoint(codeUnit); out.append("\\u").append(toHexDigit(codeUnit >> 12)).append(toHexDigit(codeUnit >> 8)) .append(toHexDigit(codeUnit >> 4)).append(toHexDigit(codeUnit >> 0)); }
/** * WARNING!! {@code Character} based Codecs will silently transform code points that are not * legal UTF code points into garbage data as they will cast them to {@code char}s. * </br></br> * If you are implementing an {@code Integer} based codec, these will be silently discarded * based on the return from {@code Character.isValidCodePoint( int )}. This is the preferred * behavior moving forward. * * * {@inheritDoc} */ @Override public String encode(char[] immune, String input) { StringBuilder sb = new StringBuilder(); for(int offset = 0; offset < input.length(); ){ final int point = input.codePointAt(offset); if(Character.isBmpCodePoint(point)){ //We can then safely cast this to char and maintain legacy behavior. sb.append(encodeCharacter(immune, new Character((char) point))); }else{ sb.append(encodeCharacter(immune, point)); } offset += Character.charCount(point); } return sb.toString(); }
/** * Appends the characters from codepoint into the string builder. This is the same as Character#toChars * but prevents the additional char array garbage for BMP codepoints. * * @param dst * the destination in which to append the characters * @param codePoint * the codepoint to be appended */ private static void appendCodepoint(StringBuilder dst, int codePoint) { if (Character.isBmpCodePoint(codePoint)) { dst.append((char)codePoint); } else if (Character.isValidCodePoint(codePoint)) { dst.append(Character.highSurrogate(codePoint)); dst.append(Character.lowSurrogate(codePoint)); } else { throw new IllegalArgumentException("Invalid codepoint " + codePoint); } } }
@Override public int codeToMbc(int code, byte[] bytes, int p) { if (Character.isBmpCodePoint(code)) { bytes[p + 0] = (byte) (code >>> 8); bytes[p + 1] = (byte) (code >>> 0); return 2; } char high = Character.highSurrogate(code); char low = Character.lowSurrogate(code); bytes[p + 0] = (byte) (high >>> 8); bytes[p + 1] = (byte) (high >>> 0); bytes[p + 2] = (byte) (low >>> 8); bytes[p + 3] = (byte) (low >>> 0); return 4; }
/** * Appends the characters from codepoint into the string builder. This is the same as Character#toChars * but prevents the additional char array garbage for BMP codepoints. * * @param dst * the destination in which to append the characters * @param codePoint * the codepoint to be appended */ private static void appendCodepoint(StringBuilder dst, int codePoint) { if (Character.isBmpCodePoint(codePoint)) { dst.append((char)codePoint); } else if (Character.isValidCodePoint(codePoint)) { dst.append(Character.highSurrogate(codePoint)); dst.append(Character.lowSurrogate(codePoint)); } else { throw new IllegalArgumentException("Invalid codepoint " + codePoint); } } }
/** * Appends the given code point to the underlying {@link #out} stream or buffer. * * @param c the code point to append. * @throws IOException if an error occurred while appending the code point. */ final void appendCodePoint(final int c) throws IOException { if (Character.isBmpCodePoint(c)) { out.append((char) c); } else if (Character.isSupplementaryCodePoint(c)) { out.append(Character.highSurrogate(c)) .append(Character. lowSurrogate(c)); } else { throw new CharConversionException(); } }
/** * Appends the given code point to the underlying {@link #out} stream or buffer. * * @param c the code point to append. * @throws IOException if an error occurred while appending the code point. */ final void appendCodePoint(final int c) throws IOException { if (Character.isBmpCodePoint(c)) { out.append((char) c); } else if (Character.isSupplementaryCodePoint(c)) { out.append(Character.highSurrogate(c)) .append(Character. lowSurrogate(c)); } else { throw new CharConversionException(); } }
/** * Appends the code point to the buffer. * * @param c * the code point */ public void appendCodePoint(int c) { if (Character.isBmpCodePoint(c)) { append(c); } else { append(Character.highSurrogate(c)); append(Character.lowSurrogate(c)); } }
/** * Appends the string representation of the {@code codePoint} argument to this sequence. * * @param codePoint a Unicode code point * @return this {@code Output} instance to allow invocation chaining. * @throws IllegalArgumentException if the specified {@code codePoint} isn't a valid Unicode code point. * @see java.lang.StringBuilder#appendCodePoint(int) the corresponding method of <code>StringBuilder</code> */ default Output appendCodePoint( int codePoint ) { if ( isBmpCodePoint( codePoint ) ) { append( (char) codePoint ); } else if ( isValidCodePoint( codePoint ) ) { append( highSurrogate( codePoint ) ); append( lowSurrogate( codePoint ) ); } else { throw new IllegalArgumentException(); } return this; }
private char multibyteCharAt(Encoding enc, int beg, int length) { int p; int s = value.getBegin(); int end = s + length; byte[] bytes = value.getUnsafeBytes(); if (beg > 0 && beg > StringSupport.strLengthFromRubyString(this, enc)) { throw new StringIndexOutOfBoundsException(beg); } if (isCodeRangeValid() && enc.isUTF8()) { p = StringSupport.utf8Nth(bytes, s, end, beg); } else if (enc.isFixedWidth()) { int w = enc.maxLength(); p = s + beg * w; if (p > end || w > end - p) { throw new StringIndexOutOfBoundsException(beg); } } else if ((p = StringSupport.nth(enc, bytes, s, end, beg)) == end) { throw new StringIndexOutOfBoundsException(beg); } int codepoint = enc.mbcToCode(bytes, p, end); if (Character.isBmpCodePoint(codepoint)) { return (char) codepoint; } // we can only return high surrogate here return Character.highSurrogate(codepoint); }