/** Encode characters from this String, starting at offset * for length characters. It is the responsibility of the * caller to make sure that the destination array is large enough. */ public static int UTF16toUTF8(final CharSequence s, final int offset, final int length, byte[] out) { return UTF16toUTF8(s, offset, length, out, 0); }
/** * Initialize the byte[] from the UTF8 bytes * for the provided String. * * @param text This must be well-formed * unicode text, with no unpaired surrogates. */ public BytesRef(CharSequence text) { this(new byte[UnicodeUtil.maxUTF8Length(text.length())]); length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(CharSequence text, int off, int len) { grow(UnicodeUtil.maxUTF8Length(len)); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(char[] text, int off, int len) { grow(UnicodeUtil.maxUTF8Length(len)); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
@Override public void writeString(String string) throws IOException { int maxLen = UnicodeUtil.maxUTF8Length(string.length()); if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING) { // string is small enough that we don't need to save memory by falling back to double-pass approach // this is just an optimized writeString() that re-uses scratchBytes. if (scratchBytes == null) { scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)]; } else { scratchBytes = ArrayUtil.grow(scratchBytes, maxLen); } int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes); writeVInt(len); writeBytes(scratchBytes, len); } else { // use a double pass approach to avoid allocating a large intermediate buffer for string encoding int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length()); writeVInt(numBytes); bytes = ArrayUtil.grow(bytes, length + numBytes); length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length); } }
private static BytesRef encodeUtf8Id(String id) { byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())]; // Prepend a byte that indicates that the content is an utf8 string b[0] = (byte) UTF8; int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1); return new BytesRef(b, 0, length); }
/** Encode characters from this String, starting at offset * for length characters. It is the responsibility of the * caller to make sure that the destination array is large enough. */ public static int UTF16toUTF8(final CharSequence s, final int offset, final int length, byte[] out) { return UTF16toUTF8(s, offset, length, out, 0); }
/** Encode characters from this String, starting at offset * for length characters. It is the responsibility of the * caller to make sure that the destination array is large enough. */ public static int UTF16toUTF8(final CharSequence s, final int offset, final int length, byte[] out) { return UTF16toUTF8(s, offset, length, out, 0); }
/** * Copies the UTF8 bytes for this string. * * @param text * Must be well-formed unicode text, with no unpaired surrogates or invalid UTF16 code units. */ public void copyChars(CharSequence text) { assert offset == 0; // TODO broken if offset != 0 UnicodeUtil.UTF16toUTF8(text, 0, text.length(), this); }
/** * Initialize the byte[] from the UTF8 bytes * for the provided String. * * @param text This must be well-formed * unicode text, with no unpaired surrogates. */ public BytesRef(CharSequence text) { this(new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * text.length()]); length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(CharSequence text, int off, int len) { grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
/** * Initialize the byte[] from the UTF8 bytes * for the provided String. * * @param text This must be well-formed * unicode text, with no unpaired surrogates. */ public BytesRef(CharSequence text) { this(new byte[UnicodeUtil.maxUTF8Length(text.length())]); length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes); }
/** Compresses the String value using the specified * compressionLevel (constants are defined in * java.util.zip.Deflater). */ public static byte[] compressString(String value, int compressionLevel) { byte[] b = new byte[UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * value.length()]; final int len = UnicodeUtil.UTF16toUTF8(value, 0, value.length(), b); return compress(b, 0, len, compressionLevel); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(char[] text, int off, int len) { grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(CharSequence text, int off, int len) { grow(len * UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }
private static BytesRef encodeUtf8Id(String id) { byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())]; // Prepend a byte that indicates that the content is an utf8 string b[0] = (byte) UTF8; int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1); return new BytesRef(b, 0, length); }
/** Writes a string. * @see IndexInput#readString() */ public void writeString(String s) throws IOException { UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result); writeVInt(utf8Result.length); writeBytes(utf8Result.result, 0, utf8Result.length); }
/** Compresses the String value using the specified * compressionLevel (constants are defined in * java.util.zip.Deflater). */ public static byte[] compressString(String value, int compressionLevel) { BytesRef result = new BytesRef(); UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result); return compress(result.bytes, 0, result.length, compressionLevel); }
/** Writes a string. * @see IndexInput#readString() */ public void writeString(String s) throws IOException { UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result); writeVInt(utf8Result.length); writeBytes(utf8Result.result, 0, utf8Result.length); }
/** * Replace the content of this buffer with UTF-8 encoded bytes that would * represent the provided text. */ public void copyChars(CharSequence text, int off, int len) { grow(UnicodeUtil.maxUTF8Length(len)); ref.length = UnicodeUtil.UTF16toUTF8(text, off, len, ref.bytes); }