public static int[] convertToUtf32(String text) { if (text == null) { return null; } List<Integer> charCodes = new ArrayList<>(text.length()); int pos = 0; while (pos < text.length()) { if (isSurrogatePair(text, pos)) { charCodes.add(convertToUtf32(text, pos)); pos += 2; } else { charCodes.add((int) text.charAt(pos)); pos++; } } return ArrayUtil.toIntArray(charCodes); }
public static boolean isWhitespaceOrNonPrintable(int code) { return Character.isWhitespace(code) || isNonPrintable(code); }
/** * Checks if two subsequent characters in a character array are * are the higher and the lower character in a surrogate * pair (and therefore eligible for conversion to a UTF 32 character). * * @param text the character array with the high and low surrogate characters * @param idx the index of the 'high' character in the pair * @return true if the characters are surrogate pairs */ public static boolean isSurrogatePair(char[] text, int idx) { return !(idx < 0 || idx > text.length - 2) && isSurrogateHigh(text[idx]) && isSurrogateLow(text[idx + 1]); }
private Character.UnicodeScript nextSignificantUnicodeScript(int from) { for (int i = from; i < text.length(); i++) { int codePoint; if (isSurrogatePair(text, i)) { codePoint = TextUtil.convertToUtf32(text, i); i++; } else { codePoint = (int) text.charAt(i); } Character.UnicodeScript unicodeScript = Character.UnicodeScript.of(codePoint); if (isSignificantUnicodeScript(unicodeScript)) { return unicodeScript; } } return Character.UnicodeScript.COMMON; }
private int nextSignificantIndex() { int nextValidChar = index; for (; nextValidChar < text.length(); nextValidChar++) { if (!TextUtil.isWhitespaceOrNonPrintable(text.charAt(nextValidChar))) { break; } } return nextValidChar; }
private static char[] getChars(int unicode) { return unicode > -1 ? TextUtil.convertFromUtf32(unicode) : null; } }
/** * /** * Converts a UTF32 code point sequence to a String with the corresponding character(s). * * @param text a Unicode text sequence * @param startPos start position of text to convert, inclusive * @param endPos end position of txt to convert, exclusive * @return the corresponding characters in a String */ public static String convertFromUtf32(int[] text, int startPos, int endPos) { StringBuilder sb = new StringBuilder(); for (int i = startPos; i < endPos; i++) { sb.append(convertFromUtf32ToCharArray(text[i])); } return sb.toString(); }
break; case ZAPF_DINGBATS_1: numberText = TextUtil.charToString((char) (index + 171)); break; case ZAPF_DINGBATS_2: numberText = TextUtil.charToString((char) (index + 181)); break; case ZAPF_DINGBATS_3: numberText = TextUtil.charToString((char) (index + 191)); break; case ZAPF_DINGBATS_4: numberText = TextUtil.charToString((char) (index + 201)); break; default:
private Character.UnicodeScript nextSignificantUnicodeScript(int from) { for (int i = from; i < text.length(); i++) { int codePoint; if (isSurrogatePair(text, i)) { codePoint = TextUtil.convertToUtf32(text, i); i++; } else { codePoint = (int) text.charAt(i); } Character.UnicodeScript unicodeScript = Character.UnicodeScript.of(codePoint); if (isSignificantUnicodeScript(unicodeScript)) { return unicodeScript; } } return Character.UnicodeScript.COMMON; }
private int nextSignificantIndex() { int nextValidChar = index; for (; nextValidChar < text.length(); nextValidChar++) { if (!TextUtil.isWhitespaceOrNonPrintable(text.charAt(nextValidChar))) { break; } } return nextValidChar; }
private static char[] getChars(int unicode) { return unicode > -1 ? TextUtil.convertFromUtf32(unicode) : null; } }
/** * /** * Converts a UTF32 code point sequence to a String with the corresponding character(s). * * @param text a Unicode text sequence * @param startPos start position of text to convert, inclusive * @param endPos end position of txt to convert, exclusive * @return the corresponding characters in a String */ public static String convertFromUtf32(int[] text, int startPos, int endPos) { StringBuilder sb = new StringBuilder(); for (int i = startPos; i < endPos; i++) { sb.append(convertFromUtf32ToCharArray(text[i])); } return sb.toString(); }
break; case ZAPF_DINGBATS_1: numberText = TextUtil.charToString((char) (index + 171)); break; case ZAPF_DINGBATS_2: numberText = TextUtil.charToString((char) (index + 181)); break; case ZAPF_DINGBATS_3: numberText = TextUtil.charToString((char) (index + 191)); break; case ZAPF_DINGBATS_4: numberText = TextUtil.charToString((char) (index + 201)); break; default:
public static int[] convertToUtf32(String text) { if (text == null) { return null; } List<Integer> charCodes = new ArrayList<>(text.length()); int pos = 0; while (pos < text.length()) { if (isSurrogatePair(text, pos)) { charCodes.add(convertToUtf32(text, pos)); pos += 2; } else { charCodes.add((int) text.charAt(pos)); pos++; } } return ArrayUtil.toIntArray(charCodes); }
/** * Checks if two subsequent characters in a character array are * are the higher and the lower character in a surrogate * pair (and therefore eligible for conversion to a UTF 32 character). * * @param text the character array with the high and low surrogate characters * @param idx the index of the 'high' character in the pair * @return true if the characters are surrogate pairs */ public static boolean isSurrogatePair(char[] text, int idx) { return !(idx < 0 || idx > text.length - 2) && isSurrogateHigh(text[idx]) && isSurrogateLow(text[idx + 1]); }
for (FontInfo f : selector.getFonts()) { int codePoint = isSurrogatePair(text, nextUnignorable) ? TextUtil.convertToUtf32(text, nextUnignorable) : (int) text.charAt(nextUnignorable); int to = nextUnignorable; for (int i = nextUnignorable; i < text.length(); i++) { int codePoint = isSurrogatePair(text, i) ? TextUtil.convertToUtf32(text, i) : (int) text.charAt(i); Character.UnicodeScript currScript = Character.UnicodeScript.of(codePoint); if (isSignificantUnicodeScript(currScript) && currScript != unicodeScript) {
public static boolean isWhitespaceOrNonPrintable(int code) { return Character.isWhitespace(code) || isNonPrintable(code); }
private boolean isAppendableGlyph(Glyph glyph) { // If font is specific and glyph.getCode() = 0, unicode value will be also 0. // Character.isIdentifierIgnorable(0) gets true. return glyph.getCode() > 0 || TextUtil.isWhitespaceOrNonPrintable(glyph.getUnicode()); }
private boolean glyphLinePartNeedsActualText(GlyphLine.GlyphLinePart glyphLinePart) { if (glyphLinePart.actualText == null) { return false; } boolean needsActualText = false; StringBuilder toUnicodeMapResult = new StringBuilder(); for (int i = glyphLinePart.start; i < glyphLinePart.end; i++) { Glyph currentGlyph = glyphLine.glyphs.get(i); if (!currentGlyph.hasValidUnicode()) { needsActualText = true; break; } // TODO zero glyph is a special case. Unicode might be special toUnicodeMapResult.append(TextUtil.convertFromUtf32(currentGlyph.getUnicode())); } return needsActualText || !toUnicodeMapResult.toString().equals(glyphLinePart.actualText); } }
private int appendUniGlyphs(String text, int from, int to, List<Glyph> glyphs) { int processed = 0; for (int k = from; k <= to; ++k) { int val; int currentlyProcessed = processed; if (TextUtil.isSurrogatePair(text, k)) { val = TextUtil.convertToUtf32(text, k); processed += 2; } else { val = text.charAt(k); processed++; } Glyph glyph = getGlyph(val); if (isAppendableGlyph(glyph)) { glyphs.add(glyph); } else { processed = currentlyProcessed; break; } } return processed; }