Refine search
@Override public String getCharset() { return encoding.getCharsetName(); }
private IRubyObject each_lineCommon19(ThreadContext context, IRubyObject sep, Block block) { Ruby runtime = context.runtime; if (sep.isNil()) { block.yield(context, this); return this; if (! sep.respondsTo("to_str")) { throw runtime.newTypeError("can't convert " + sep.getMetaClass() + " into String"); RubyString sepStr = sep.convertToString(); if (sepStr == runtime.getGlobalVariables().getDefaultSeparator()) { enc = val.getEncoding(); while (p < end) { if (bytes[p] == (byte)'\n') { int p0 = enc.leftAdjustCharHead(bytes, s, p, end); if (enc.isNewLine(bytes, p0, end)) { p = p0 + StringSupport.length(enc, bytes, p0, end); block.yield(context, makeShared19(runtime, val, s - offset, p - s).infectBy(this));
chomp = _chomp != null || _chomp.isTrue(); runtime.getWarnings().warn("given block not used"); } else { runtime.getWarnings().warning("passing a block to String#lines is deprecated"); wantarray = false; final IRubyObject defaultSep = runtime.getGlobalVariables().get("$/"); RubyString rs = arg.convertToString(); byte[] rsbytes = rsByteList.unsafeBytes(); int rsptr = rsByteList.begin(); if (rsByteList.length() == enc.minLength() && enc.isNewLine(rsbytes, rsptr, rsByteList.length())) { rsnewline = true; if (rs == defaultSep && !enc.isAsciiCompatible()) { rs = RubyString.newString(runtime, rsbytes, rsptr, rslen); rs = (RubyString) EncodingUtils.rbStrEncode(context, rs, runtime.getEncodingService().convertEncodingToRubyEncoding(enc), 0, context.nil); if (pos < 0) break; hit = subptr + pos; adjusted = enc.rightAdjustCharHead(strBytes, subptr, hit, pend); if (hit != adjusted) { subptr = adjusted;
Ruby runtime = context.runtime; if (begin instanceof RubyNumeric || end instanceof RubyNumeric || !TypeConverter.convertToTypeWithCheck(begin, runtime.getInteger(), "to_int").isNil() || !TypeConverter.convertToTypeWithCheck(end, runtime.getInteger(), "to_int").isNil()) { if (rangeLe(context, begin, obj) != null) { if (isExclusive) { if (rangeLt(context, obj, end) != null) return runtime.getTrue(); } else { if (rangeLe(context, obj, end) != null) return runtime.getTrue(); ((RubyString) begin).getByteList().getRealSize() == 1 && ((RubyString) end).getByteList().getRealSize() == 1) { if (obj.isNil()) return runtime.getFalse(); if (obj instanceof RubyString) { ByteList Vbytes = ((RubyString)obj).getByteList(); ByteList Ebytes = ((RubyString)end).getByteList(); int e = Ebytes.getUnsafeBytes()[Ebytes.getBegin()] & 0xff; if (Encoding.isAscii(v) && Encoding.isAscii(b) && Encoding.isAscii(e)) { if ((b <= v && v < e) || (!isExclusive && v == e)) return runtime.getTrue(); return runtime.getFalse();
boolean cflag = false; if (value.getRealSize() > 0) { if (enc.isAsciiCompatible()) { if (trSrc.buf.length > 0 && (trSrc.buf[trSrc.p] & 0xff) == '^' && trSrc.p + 1 < trSrc.pend) { cflag = true; if (enc.mbcToCode(trSrc.buf, trSrc.p, trSrc.pend) == '^' && trSrc.p + cl < trSrc.pend) { cflag = true; trSrc.p += cl; if (c != -1) { if (save == c) { if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID; continue; enc.codeToMbc(c, buf, t); if (mayModify && (tlen == 1 ? sbytes[s] != buf[t] : ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true; if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID; t += tlen; } else if (enc.isSingleByte() || (singlebyte && hash == null)) { while (s < send) { c = sbytes[s] & 0xff; if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID; s++; enc.codeToMbc(c, buf, t); if (mayModify && (tlen == 1 ? sbytes[s] != buf[t] : ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;
@JRubyMethod(name = "ungetc") public IRubyObject ungetc(ThreadContext context, IRubyObject arg) { Encoding enc, enc2; checkModifiable(); checkReadable(); if (arg.isNil()) return arg; if (arg instanceof RubyInteger) { int len, cc = RubyNumeric.num2int(arg); byte[] buf = new byte[16]; enc = getEncoding(); len = enc.codeToMbcLength(cc); if (len <= 0) EncodingUtils.encUintChr(context, cc, enc); enc.codeToMbc(cc, buf, 0); ungetbyteCommon(buf, 0, len); return context.nil; } else { arg = arg.convertToString(); enc = getEncoding(); RubyString argStr = (RubyString) arg; enc2 = argStr.getEncoding(); if (enc != enc2 && enc != ASCIIEncoding.INSTANCE) { argStr = EncodingUtils.strConvEnc(context, argStr, enc2, enc); } ByteList argBytes = argStr.getByteList(); ungetbyteCommon(argBytes.unsafeBytes(), argBytes.begin(), argBytes.realSize()); return context.nil; } }
private IRubyObject multiByteSwapcase(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) { boolean modify = false; while (s < end) { int c = codePoint(runtime, enc, bytes, s, end); if (enc.isUpper(c)) { enc.codeToMbc(toLower(enc, c), bytes, s); modify = true; } else if (enc.isLower(c)) { enc.codeToMbc(toUpper(enc, c), bytes, s); modify = true; } s += codeLength(runtime, enc, c); } return modify ? this : runtime.getNil(); }
private static NeighborChar succAlnumChar(Encoding enc, byte[]bytes, int p, int len, byte[]carry, int carryP) { byte save[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN]; int c = enc.mbcToCode(bytes, p, p + len); if (enc.isDigit(c)) { cType = CharacterType.DIGIT; } else if (enc.isAlpha(c)) { cType = CharacterType.ALPHA; } else { NeighborChar ret = succChar(enc, bytes, p, len); if (ret == NeighborChar.FOUND) { c = enc.mbcToCode(bytes, p, p + len); if (enc.isCodeCType(c, cType)) return NeighborChar.FOUND; ret = predChar(enc, bytes, p, len); if (ret == NeighborChar.FOUND) { c = enc.mbcToCode(bytes, p, p + len); if (!enc.isCodeCType(c, cType)) { System.arraycopy(save, 0, bytes, p, len); break;
final int c; final int cl; if (enc.isAsciiCompatible()) { cl = 1; c = bytes[p] & 0xff; } else { cl = StringSupport.preciseLength(enc, bytes, p, end); c = enc.mbcToCode(bytes, p, end); if (!Encoding.isAscii(c)) { p += StringSupport.length(enc, bytes, p, end); } else if (c != '/' && enc.isPrint(c)) { p += cl; } else { to.append(bytes, start, len); } else { boolean isUnicode = enc.isUnicode(); p = start; while (p < end) { final int c; final int cl; if (enc.isAsciiCompatible()) { cl = 1; c = bytes[p] & 0xff; } else { cl = StringSupport.preciseLength(enc, bytes, p, end); c = enc.mbcToCode(bytes, p, end);
private ByteList fromEncodedBytes(Ruby runtime, Encoding enc, int value) { int n; try { n = value < 0 ? 0 : enc.codeToMbcLength(value); } catch (EncodingException ee) { n = 0; } if (n <= 0) throw runtime.newRangeError(this.toString() + " out of char range"); ByteList bytes = new ByteList(n); try { enc.codeToMbc(value, bytes.getUnsafeBytes(), 0); } catch (EncodingException e) { throw runtime.newRangeError("invalid codepoint " + String.format("0x%x in ", value) + enc.getCharsetName()); } bytes.setRealSize(n); return bytes; }
/** * Is the string this constant represents a valid constant identifier name. */ public boolean validClassVariableName() { boolean valid = ByteListHelper.eachCodePoint(getBytes(), (int index, int codepoint, Encoding encoding) -> index == 0 && codepoint == '@' || index == 1 && codepoint == '@' || index == 2 && (!encoding.isDigit(codepoint)) && (encoding.isAlnum(codepoint) || !Encoding.isAscii(codepoint) || codepoint == '_') || index > 2 && (encoding.isAlnum(codepoint) || !Encoding.isAscii(codepoint) || codepoint == '_')); return valid && getBytes().length() >= 3; // FIXME: good enough on length check? Trying to avoid counter. }
public static int nth(Encoding enc, byte[]bytes, int p, int end, int n) { if (enc.isSingleByte()) { p += n; } else if (enc.isFixedWidth()) { p += n * enc.maxLength(); } else if (enc.isAsciiCompatible()) { p = nthAsciiCompatible(enc, bytes, p, end, n); } else { p = nthNonAsciiCompatible(enc, bytes, p, end, n); } return p > end ? end : p; }
int end = p + len; boolean needEscape = false; if (enc.isAsciiCompatible()) { while (p < end) { int c = bytes[p] & 0xff; if (c == '/' || (!enc.isPrint(c) && enc.length(bytes, p, end) == 1)) { needEscape = true; break; p += enc.length(bytes, p, end); int c = bytes[p] & 0xff; if (c == '\\') { int n = enc.length(bytes, p + 1, end) + 1; to.append(bytes, p, n); p += n; to.append((byte)'\\'); to.append(bytes, p, 1); } else if (enc.length(bytes, p, end) != 1) { to.append(bytes, p, enc.length(bytes, p, end)); p += enc.length(bytes, p, end); continue; } else if (enc.isPrint(c)) { to.append(bytes, p, 1); } else if (!enc.isSpace(c)) { Sprintf.sprintf(runtime, to, "\\%03o", bytes[p] & 0377); } else {
public void addCType(int ctype, boolean not, boolean asciiRange, ScanEnvironment env, IntHolder sbOut) { Encoding enc = env.enc; int[]ranges = enc.ctypeCodeRange(ctype, sbOut); if (ranges != null) { if (asciiRange) { } else { CClassNode ccAscii = new CClassNode(); if (enc.minLength() > 1) { ccAscii.addCodeRangeToBuf(env, 0x00, 0x7F); } else { if (not) { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (!enc.isCodeCType(c, ctype)) bs.set(env, c); if (enc.isCodeCType(c, ctype)) bs.set(env, c); if (not) { for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) { if (!enc.isCodeCType(c, ctype) || c >= maxCode) bs.set(env, c); if (enc.isCodeCType(c, ctype)) bs.set(env, c); if (!not) { for (int c=0; c<maxCode; c++) { if (enc.isSbWord(c)) bs.set(env, c); if (enc.codeToMbcLength(c) > 0 && /* check invalid code point */ !(enc.isWord(c) || c >= maxCode)) bs.set(env, c);
public static int encCodelen(ThreadContext context, int c, Encoding enc) { int n = enc.codeToMbcLength(c); if (n == 0) { throw context.runtime.newArgumentError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc); } return n; }
public static int codeLength(Ruby runtime, Encoding enc, int c) { int n = enc.codeToMbcLength(c); if (n == 0) throw runtime.newRangeError("invalid codepoint " + String.format("0x%x in ", c) + enc.getName()); return n; }
/** * Is the string this constant represents a valid constant identifier name. */ public boolean validConstantName() { boolean valid = ByteListHelper.eachCodePoint(getBytes(), (int index, int codepoint, Encoding encoding) -> index == 0 && encoding.isUpper(codepoint) || index != 0 && (encoding.isAlnum(codepoint) || !Encoding.isAscii(codepoint) || codepoint == '_')); return valid && getBytes().length() >= 1; }
public static int encMbcput(ThreadContext context, int c, byte[] buf, int p, Encoding enc) { int len = enc.codeToMbc(c, buf, p); // in MRI, this check occurs within some of the individual encoding functions, such as the // US-ASCII check for values >= 0x80. In MRI, unlike in JRuby, we can't throw Ruby errors // from within encoding logic, so we try to reproduce the expected results via normal // error codes here. // See MRI's rb_enc_mbcput and related downstream encoding functions. if (len < 0) { switch (len) { case ErrorCodes.ERR_INVALID_CODE_POINT_VALUE: throw context.runtime.newRangeError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc); case ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE: throw context.runtime.newRangeError("" + (c & 0xFFFFFFFFL) + " out of char range"); } throw context.runtime.newEncodingError(EncodingError.fromCode(len).getMessage()); } return len; }