public static boolean isSingleByteOptimizable(CodeRangeable string, Encoding encoding) { return string.getCodeRange() == CR_7BIT || encoding.maxLength() == 1; }
public static boolean isSingleByteOptimizable(CodeRangeable string, Encoding encoding) { return string.getCodeRange() == CR_7BIT || encoding.maxLength() == 1; }
private void updateCharOffsetOnlyOneReg(ByteList value, Encoding encoding) { if (charOffsets == null || charOffsets.numRegs < 1) charOffsets = new Region(1); if (encoding.maxLength() == 1) { charOffsets.beg[0] = begin; charOffsets.end[0] = end; charOffsetUpdated = true; return; } Pair[] pairs = new Pair[2]; pairs[0] = new Pair(); pairs[0].bytePos = begin; pairs[1] = new Pair(); pairs[1].bytePos = end; updatePairs(value, encoding, pairs); Pair key = new Pair(); key.bytePos = begin; charOffsets.beg[0] = pairs[Arrays.binarySearch(pairs, key)].charPos; key.bytePos = end; charOffsets.end[0] = pairs[Arrays.binarySearch(pairs, key)].charPos; }
private void updateCharOffsetOnlyOneReg(ByteList value, Encoding encoding) { if (charOffsets == null || charOffsets.numRegs < 1) charOffsets = new Region(1); if (encoding.maxLength() == 1) { charOffsets.beg[0] = begin; charOffsets.end[0] = end; charOffsetUpdated = true; return; } Pair[] pairs = new Pair[2]; pairs[0] = new Pair(); pairs[0].bytePos = begin; pairs[1] = new Pair(); pairs[1].bytePos = end; updatePairs(value, encoding, pairs); Pair key = new Pair(); key.bytePos = begin; charOffsets.beg[0] = pairs[Arrays.binarySearch(pairs, key)].charPos; key.bytePos = end; charOffsets.end[0] = pairs[Arrays.binarySearch(pairs, key)].charPos; }
public static int nth(Encoding enc, byte[]bytes, int p, int end, int n) { if (enc.isSingleByte()) { p += n; } else if (enc.isFixedWidth()) { p += n * enc.maxLength(); } else if (enc.isAsciiCompatible()) { p = nthAsciiCompatible(enc, bytes, p, end, n); } else { p = nthNonAsciiCompatible(enc, bytes, p, end, n); } return p > end ? end : p; }
public static int nth(Encoding enc, byte[]bytes, int p, int end, int n) { if (enc.isSingleByte()) { p += n; } else if (enc.isFixedWidth()) { p += n * enc.maxLength(); } else if (enc.isAsciiCompatible()) { p = nthAsciiCompatible(enc, bytes, p, end, n); } else { p = nthNonAsciiCompatible(enc, bytes, p, end, n); } return p > end ? end : p; }
/** * Get the position of the nth character in the given byte array, using the given encoding and range. * * @param enc encoding to use * @param bytes bytes to scan * @param p starting byte offset * @param end ending byte offset * @param n index of character for which to find byte offset * @param singlebyte whether the byte contents are in a single byte encoding * @return the offset of the nth character in the string, or -1 if nth is out of the string */ public static int nth(Encoding enc, byte[]bytes, int p, int end, int n, boolean singlebyte) { if (singlebyte) { p += n; } else if (enc.isFixedWidth()) { p += n * enc.maxLength(); } else if (enc.isAsciiCompatible()) { p = nthAsciiCompatible(enc, bytes, p, end, n); } else { p = nthNonAsciiCompatible(enc, bytes, p, end, n); } if (p < 0) return -1; return p > end ? end : p; }
private static int adjustStartPosInternal(RubyString str, Encoding enc, int pos, boolean reverse) { ByteList value = str.getByteList(); int len = value.getRealSize(); if (pos > 0 && enc.maxLength() != 1 && pos < len) { int start = value.getBegin(); if ((reverse ? -pos : len - pos) > 0) { return enc.rightAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } else { return enc.leftAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } } return pos; }
private static int adjustStartPosInternal(RubyString str, Encoding enc, int pos, boolean reverse) { ByteList value = str.getByteList(); int len = value.getRealSize(); if (pos > 0 && enc.maxLength() != 1 && pos < len) { int start = value.getBegin(); if ((reverse ? -pos : len - pos) > 0) { return enc.rightAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } else { return enc.leftAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } } return pos; }
private final int adjustStartPosInternal(RubyString str, Encoding enc, int pos, boolean reverse) { check(); ByteList value = str.getByteList(); int len = value.getRealSize(); if (pos > 0 && enc.maxLength() != 1 && pos < len) { int start = value.getBegin(); if ((reverse ? -pos : len - pos) > 0) { return enc.rightAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } else { return enc.leftAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } } return pos; }
private final int adjustStartPosInternal(RubyString str, Encoding enc, int pos, boolean reverse) { check(); ByteList value = str.getByteList(); int len = value.getRealSize(); if (pos > 0 && enc.maxLength() != 1 && pos < len) { int start = value.getBegin(); if ((reverse ? -pos : len - pos) > 0) { return enc.rightAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } else { return enc.leftAdjustCharHead(value.getUnsafeBytes(), start, start + pos, start + len) - start; } } return pos; }
public final RubyString catAscii(byte[] bytes, int ptr, int ptrLen) { Encoding enc = value.getEncoding(); if (enc.isAsciiCompatible()) { EncodingUtils.encCrStrBufCat(getRuntime(), this, new ByteList(bytes, ptr, ptrLen), enc, CR_7BIT); } else { byte buf[] = new byte[enc.maxLength()]; int end = ptr + ptrLen; while (ptr < end) { int c = bytes[ptr]; int len = codeLength(enc, c); EncodingUtils.encMbcput(c, buf, 0, enc); EncodingUtils.encCrStrBufCat(getRuntime(), this, buf, 0, len, enc, CR_VALID); ptr++; } } return this; }
public final RubyString catAscii(byte[] bytes, int ptr, int ptrLen) { Encoding enc = value.getEncoding(); if (enc.isAsciiCompatible()) { EncodingUtils.encCrStrBufCat(getRuntime(), this, new ByteList(bytes, ptr, ptrLen), enc, CR_7BIT); } else { byte buf[] = new byte[enc.maxLength()]; int end = ptr + ptrLen; while (ptr < end) { int c = bytes[ptr]; int len = codeLength(enc, c); EncodingUtils.encMbcput(c, buf, 0, enc); EncodingUtils.encCrStrBufCat(getRuntime(), this, buf, 0, len, enc, CR_VALID); ptr++; } } return this; }
private IRubyObject upcase_bang(ThreadContext context, int flags) { modifyAndKeepCodeRange(); Encoding enc = checkDummyEncoding(); if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) || (flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) { int s = value.getBegin(); int end = s + value.getRealSize(); byte[]bytes = value.getUnsafeBytes(); while (s < end) { int c = bytes[s] & 0xff; if (Encoding.isAscii(c) && 'a' <= c && c <= 'z') { bytes[s] = (byte)('A' + (c - 'a')); flags |= Config.CASE_MODIFIED; } s++; } } else { flags = caseMap(context.runtime, flags, enc); if ((flags & Config.CASE_MODIFIED) != 0) clearCodeRange(); } return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil; }
public final RubyString catAscii(byte[]bytes, int p, int len) { Encoding enc = value.getEncoding(); if (enc.isAsciiCompatible()) { EncodingUtils.encCrStrBufCat(getRuntime(), this, new ByteList(bytes, p, len), enc, CR_7BIT, null); } else { byte buf[] = new byte[enc.maxLength()]; int end = p + len; while (p < end) { int c = bytes[p]; int cl = codeLength(getRuntime(), enc, c); enc.codeToMbc(c, buf, 0); EncodingUtils.encCrStrBufCat(getRuntime(), this, new ByteList(bytes, p, len), enc, CR_7BIT, null); p++; } } return this; }
private IRubyObject downcase_bang(ThreadContext context, int flags) { modifyAndKeepCodeRange(); Encoding enc = checkDummyEncoding(); if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) || (flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) { int s = value.getBegin(); int end = s + value.getRealSize(); byte[]bytes = value.getUnsafeBytes(); while (s < end) { int c = bytes[s] & 0xff; if (Encoding.isAscii(c) && 'A' <= c && c <= 'Z') { bytes[s] = (byte)('a' + (c - 'A')); flags |= Config.CASE_MODIFIED; } s++; } } else { flags = caseMap(context.runtime, flags, enc); if ((flags & Config.CASE_MODIFIED) != 0) clearCodeRange(); } return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil; }
private IRubyObject downcase_bang(ThreadContext context, int flags) { modifyAndKeepCodeRange(); Encoding enc = checkDummyEncoding(); if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) || (flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) { int s = value.getBegin(); int end = s + value.getRealSize(); byte[]bytes = value.getUnsafeBytes(); while (s < end) { int c = bytes[s] & 0xff; if (Encoding.isAscii(c) && 'A' <= c && c <= 'Z') { bytes[s] = (byte)('a' + (c - 'A')); flags |= Config.CASE_MODIFIED; } s++; } } else { flags = caseMap(context.runtime, flags, enc); if ((flags & Config.CASE_MODIFIED) != 0) clearCodeRange(); } return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil; }
public final RubyString catAscii(byte[]bytes, int p, int len) { Encoding enc = value.getEncoding(); if (enc.isAsciiCompatible()) { EncodingUtils.encCrStrBufCat(getRuntime(), this, new ByteList(bytes, p, len), enc, CR_7BIT, null); } else { byte buf[] = new byte[enc.maxLength()]; int end = p + len; while (p < end) { int c = bytes[p]; int cl = codeLength(getRuntime(), enc, c); enc.codeToMbc(c, buf, 0); EncodingUtils.encCrStrBufCat(getRuntime(), this, new ByteList(bytes, p, len), enc, CR_7BIT, null); p++; } } return this; }
private IRubyObject upcase_bang(ThreadContext context, int flags) { modifyAndKeepCodeRange(); Encoding enc = checkDummyEncoding(); if (((flags & Config.CASE_ASCII_ONLY) != 0 && (enc.isUTF8() || enc.maxLength() == 1)) || (flags & Config.CASE_FOLD_TURKISH_AZERI) == 0 && getCodeRange() == CR_7BIT) { int s = value.getBegin(); int end = s + value.getRealSize(); byte[]bytes = value.getUnsafeBytes(); while (s < end) { int c = bytes[s] & 0xff; if (Encoding.isAscii(c) && 'a' <= c && c <= 'z') { bytes[s] = (byte)('A' + (c - 'a')); flags |= Config.CASE_MODIFIED; } s++; } } else { flags = caseMap(context.runtime, flags, enc); if ((flags & Config.CASE_MODIFIED) != 0) clearCodeRange(); } return ((flags & Config.CASE_MODIFIED) != 0) ? this : context.nil; }
private char multibyteCharAt(Encoding enc, int beg, int length) { int p; int s = value.getBegin(); int end = s + length; byte[] bytes = value.getUnsafeBytes(); if (beg > 0 && beg > StringSupport.strLengthFromRubyString(this, enc)) { throw new StringIndexOutOfBoundsException(beg); } if (isCodeRangeValid() && enc.isUTF8()) { p = StringSupport.utf8Nth(bytes, s, end, beg); } else if (enc.isFixedWidth()) { int w = enc.maxLength(); p = s + beg * w; if (p > end || w > end - p) { throw new StringIndexOutOfBoundsException(beg); } } else if ((p = StringSupport.nth(enc, bytes, s, end, beg)) == end) { throw new StringIndexOutOfBoundsException(beg); } int codepoint = enc.mbcToCode(bytes, p, end); if (Character.isBmpCodePoint(codepoint)) { return (char) codepoint; } // we can only return high surrogate here return Character.highSurrogate(codepoint); }