org.jcodings.Encoding java code examples

Refine search

@Override
public String getCharset() {
 return encoding.getCharsetName();
}

private IRubyObject each_lineCommon19(ThreadContext context, IRubyObject sep, Block block) {        
  Ruby runtime = context.runtime;
  if (sep.isNil()) {
    block.yield(context, this);
    return this;
  if (! sep.respondsTo("to_str")) {
    throw runtime.newTypeError("can't convert " + sep.getMetaClass() + " into String");
  RubyString sepStr = sep.convertToString();
  if (sepStr == runtime.getGlobalVariables().getDefaultSeparator()) {
    enc = val.getEncoding();
    while (p < end) {
      if (bytes[p] == (byte)'\n') {
        int p0 = enc.leftAdjustCharHead(bytes, s, p, end);
        if (enc.isNewLine(bytes, p0, end)) {
          p = p0 + StringSupport.length(enc, bytes, p0, end);
          block.yield(context, makeShared19(runtime, val, s - offset, p - s).infectBy(this));

  chomp = _chomp != null || _chomp.isTrue();
      runtime.getWarnings().warn("given block not used");
    } else {
      runtime.getWarnings().warning("passing a block to String#lines is deprecated");
      wantarray = false;
final IRubyObject defaultSep = runtime.getGlobalVariables().get("$/");
RubyString rs = arg.convertToString();
byte[] rsbytes = rsByteList.unsafeBytes();
int rsptr = rsByteList.begin();
if (rsByteList.length() == enc.minLength() && enc.isNewLine(rsbytes, rsptr, rsByteList.length())) {
  rsnewline = true;
if (rs == defaultSep && !enc.isAsciiCompatible()) {
  rs = RubyString.newString(runtime, rsbytes, rsptr, rslen);
  rs = (RubyString) EncodingUtils.rbStrEncode(context, rs, runtime.getEncodingService().convertEncodingToRubyEncoding(enc), 0, context.nil);
  if (pos < 0) break;
  hit = subptr + pos;
  adjusted = enc.rightAdjustCharHead(strBytes, subptr, hit, pend);
  if (hit != adjusted) {
    subptr = adjusted;

Ruby runtime = context.runtime;
if (begin instanceof RubyNumeric || end instanceof RubyNumeric ||
    !TypeConverter.convertToTypeWithCheck(begin, runtime.getInteger(), "to_int").isNil() ||
    !TypeConverter.convertToTypeWithCheck(end, runtime.getInteger(), "to_int").isNil()) {
  if (rangeLe(context, begin, obj) != null) {
    if (isExclusive) {
      if (rangeLt(context, obj, end) != null) return runtime.getTrue();
    } else {
      if (rangeLe(context, obj, end) != null) return runtime.getTrue();
    ((RubyString) begin).getByteList().getRealSize() == 1 &&
    ((RubyString) end).getByteList().getRealSize() == 1) {
  if (obj.isNil()) return runtime.getFalse();
  if (obj instanceof RubyString) {
    ByteList Vbytes = ((RubyString)obj).getByteList();
    ByteList Ebytes = ((RubyString)end).getByteList();
    int e = Ebytes.getUnsafeBytes()[Ebytes.getBegin()] & 0xff;
    if (Encoding.isAscii(v) && Encoding.isAscii(b) && Encoding.isAscii(e)) {
      if ((b <= v && v < e) || (!isExclusive && v == e)) return runtime.getTrue();
      return runtime.getFalse();

boolean cflag = false;
if (value.getRealSize() > 0) {
  if (enc.isAsciiCompatible()) {
    if (trSrc.buf.length > 0 && (trSrc.buf[trSrc.p] & 0xff) == '^' && trSrc.p + 1 < trSrc.pend) {
      cflag = true;
    if (enc.mbcToCode(trSrc.buf, trSrc.p, trSrc.pend) == '^' && trSrc.p + cl < trSrc.pend) {
      cflag = true;
      trSrc.p += cl;
    if (c != -1) {
      if (save == c) {
        if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
        continue;
    enc.codeToMbc(c, buf, t);
    if (mayModify && (tlen == 1 ? sbytes[s] != buf[t] : ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;
    if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
    t += tlen;
} else if (enc.isSingleByte() || (singlebyte && hash == null)) {
  while (s < send) {
    c = sbytes[s] & 0xff;
    if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
    s++;
    enc.codeToMbc(c, buf, t);
    if (mayModify && (tlen == 1 ? sbytes[s] != buf[t] : ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;

@JRubyMethod(name = "ungetc")
public IRubyObject ungetc(ThreadContext context, IRubyObject arg) {
  Encoding enc, enc2;
  checkModifiable();
  checkReadable();
  if (arg.isNil()) return arg;
  if (arg instanceof RubyInteger) {
    int len, cc = RubyNumeric.num2int(arg);
    byte[] buf = new byte[16];
    enc = getEncoding();
    len = enc.codeToMbcLength(cc);
    if (len <= 0) EncodingUtils.encUintChr(context, cc, enc);
    enc.codeToMbc(cc, buf, 0);
    ungetbyteCommon(buf, 0, len);
    return context.nil;
  } else {
    arg = arg.convertToString();
    enc = getEncoding();
    RubyString argStr = (RubyString) arg;
    enc2 = argStr.getEncoding();
    if (enc != enc2 && enc != ASCIIEncoding.INSTANCE) {
      argStr = EncodingUtils.strConvEnc(context, argStr, enc2, enc);
    }
    ByteList argBytes = argStr.getByteList();
    ungetbyteCommon(argBytes.unsafeBytes(), argBytes.begin(), argBytes.realSize());
    return context.nil;
  }
}

private IRubyObject multiByteSwapcase(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  boolean modify = false;
  while (s < end) {
    int c = codePoint(runtime, enc, bytes, s, end);
    if (enc.isUpper(c)) {
      enc.codeToMbc(toLower(enc, c), bytes, s);
      modify = true;
    } else if (enc.isLower(c)) {
      enc.codeToMbc(toUpper(enc, c), bytes, s);
      modify = true;
    }
    s += codeLength(runtime, enc, c);
  }
  return modify ? this : runtime.getNil();
}

private static NeighborChar succAlnumChar(Encoding enc, byte[]bytes, int p, int len, byte[]carry, int carryP) {
  byte save[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
  int c = enc.mbcToCode(bytes, p, p + len);
  if (enc.isDigit(c)) {
    cType = CharacterType.DIGIT;
  } else if (enc.isAlpha(c)) {
    cType = CharacterType.ALPHA;
  } else {
  NeighborChar ret = succChar(enc, bytes, p, len);
  if (ret == NeighborChar.FOUND) {
    c = enc.mbcToCode(bytes, p, p + len);
    if (enc.isCodeCType(c, cType)) return NeighborChar.FOUND;
    ret = predChar(enc, bytes, p, len);
    if (ret == NeighborChar.FOUND) {
      c = enc.mbcToCode(bytes, p, p + len);
      if (!enc.isCodeCType(c, cType)) {
        System.arraycopy(save, 0, bytes, p, len);
        break;

  final int c;
  final int cl;
  if (enc.isAsciiCompatible()) {
    cl = 1;
    c = bytes[p] & 0xff;
  } else {
    cl = StringSupport.preciseLength(enc, bytes, p, end);
    c = enc.mbcToCode(bytes, p, end);
  if (!Encoding.isAscii(c)) {
    p += StringSupport.length(enc, bytes, p, end);
  } else if (c != '/' && enc.isPrint(c)) {
    p += cl;
  } else {
  to.append(bytes, start, len);
} else {
  boolean isUnicode = enc.isUnicode();
  p = start;
  while (p < end) {
    final int c;
    final int cl;
    if (enc.isAsciiCompatible()) {
      cl = 1;
      c = bytes[p] & 0xff;
    } else {
      cl = StringSupport.preciseLength(enc, bytes, p, end);
      c = enc.mbcToCode(bytes, p, end);

private ByteList fromEncodedBytes(Ruby runtime, Encoding enc, int value) {
  int n;
  try {
    n = value < 0 ? 0 : enc.codeToMbcLength(value);
  } catch (EncodingException ee) {
    n = 0;
  }
  if (n <= 0) throw runtime.newRangeError(this.toString() + " out of char range");
  
  ByteList bytes = new ByteList(n);
  
  try {
    enc.codeToMbc(value, bytes.getUnsafeBytes(), 0);
  } catch (EncodingException e) {
    throw runtime.newRangeError("invalid codepoint " + String.format("0x%x in ", value) + enc.getCharsetName());
  }
  bytes.setRealSize(n);
  return bytes;
}

/**
 * Is the string this constant represents a valid constant identifier name.
 */
public boolean validClassVariableName() {
  boolean valid = ByteListHelper.eachCodePoint(getBytes(), (int index, int codepoint, Encoding encoding) ->
      index == 0 && codepoint == '@' ||
          index == 1 && codepoint == '@' ||
          index == 2 && (!encoding.isDigit(codepoint)) && (encoding.isAlnum(codepoint) || !Encoding.isAscii(codepoint) || codepoint == '_') ||
          index > 2 && (encoding.isAlnum(codepoint) || !Encoding.isAscii(codepoint) || codepoint == '_'));
  return valid && getBytes().length() >= 3; // FIXME: good enough on length check?  Trying to avoid counter.
}

public static int nth(Encoding enc, byte[]bytes, int p, int end, int n) {
  if (enc.isSingleByte()) {
    p += n;
  } else if (enc.isFixedWidth()) {
    p += n * enc.maxLength();             
  } else if (enc.isAsciiCompatible()) {
    p = nthAsciiCompatible(enc, bytes, p, end, n);
  } else {
    p = nthNonAsciiCompatible(enc, bytes, p, end, n);
  }
  return p > end ? end : p;
}

int end = p + len;
boolean needEscape = false;
if (enc.isAsciiCompatible()) {
  while (p < end) {
    int c = bytes[p] & 0xff;
    if (c == '/' || (!enc.isPrint(c) && enc.length(bytes, p, end) == 1)) {
      needEscape = true;
      break;
    p += enc.length(bytes, p, end);
    int c = bytes[p] & 0xff;
    if (c == '\\') {
      int n = enc.length(bytes, p + 1, end) + 1;
      to.append(bytes, p, n);
      p += n;
      to.append((byte)'\\');
      to.append(bytes, p, 1);
    } else if (enc.length(bytes, p, end) != 1) {
      to.append(bytes, p, enc.length(bytes, p, end));
      p += enc.length(bytes, p, end);
      continue;
    } else if (enc.isPrint(c)) {
      to.append(bytes, p, 1);
    } else if (!enc.isSpace(c)) {
      Sprintf.sprintf(runtime, to, "\\%03o", bytes[p] & 0377);
    } else {

public void addCType(int ctype, boolean not, boolean asciiRange, ScanEnvironment env, IntHolder sbOut) {
  Encoding enc = env.enc;
  int[]ranges = enc.ctypeCodeRange(ctype, sbOut);
  if (ranges != null) {
    if (asciiRange) {
      } else {
        CClassNode ccAscii = new CClassNode();
        if (enc.minLength() > 1) {
          ccAscii.addCodeRangeToBuf(env, 0x00, 0x7F);
        } else {
    if (not) {
      for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
        if (!enc.isCodeCType(c, ctype)) bs.set(env, c);
        if (enc.isCodeCType(c, ctype)) bs.set(env, c);
    if (not) {
      for (int c=0; c<BitSet.SINGLE_BYTE_SIZE; c++) {
        if (!enc.isCodeCType(c, ctype) || c >= maxCode) bs.set(env, c);
        if (enc.isCodeCType(c, ctype)) bs.set(env, c);
    if (!not) {
      for (int c=0; c<maxCode; c++) {
        if (enc.isSbWord(c)) bs.set(env, c);
        if (enc.codeToMbcLength(c) > 0 && /* check invalid code point */
            !(enc.isWord(c) || c >= maxCode)) bs.set(env, c);

public static int scanOct(byte[]bytes, int p, int len, Encoding enc) {
  int v = 0;
  int c;
  while (len-- > 0 && enc.isDigit(c = bytes[p++] & 0xff) && c < '8') {
    v = (v << 3) + Encoding.digitVal(c);
  }
  return v;
}

private void opAnyChar() {
  final int n;
  if (s >= range || s + (n = enc.length(bytes, s, end)) > range || enc.isNewLine(bytes, s, end)) {opFail(); return;}
  s += n;
  sprev = sbegin; // break;
}

public static int encCodelen(ThreadContext context, int c, Encoding enc) {
  int n = enc.codeToMbcLength(c);
  if (n == 0) {
    throw context.runtime.newArgumentError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc);
  }
  return n;
}

public static int codeLength(Ruby runtime, Encoding enc, int c) {
  int n = enc.codeToMbcLength(c);
  if (n == 0) throw runtime.newRangeError("invalid codepoint " + String.format("0x%x in ", c) + enc.getName());
  return n;
}

/**
 * Is the string this constant represents a valid constant identifier name.
 */
public boolean validConstantName() {
  boolean valid =  ByteListHelper.eachCodePoint(getBytes(), (int index, int codepoint, Encoding encoding) ->
    index == 0 && encoding.isUpper(codepoint) ||
        index != 0 && (encoding.isAlnum(codepoint) || !Encoding.isAscii(codepoint) || codepoint == '_'));
  return valid && getBytes().length() >= 1;
}

public static int encMbcput(ThreadContext context, int c, byte[] buf, int p, Encoding enc) {
  int len = enc.codeToMbc(c, buf, p);
  // in MRI, this check occurs within some of the individual encoding functions, such as the
  // US-ASCII check for values >= 0x80. In MRI, unlike in JRuby, we can't throw Ruby errors
  // from within encoding logic, so we try to reproduce the expected results via normal
  // error codes here.
  // See MRI's rb_enc_mbcput and related downstream encoding functions.
  if (len < 0) {
    switch (len) {
      case ErrorCodes.ERR_INVALID_CODE_POINT_VALUE:
        throw context.runtime.newRangeError("invalid codepoint " + Long.toHexString(c & 0xFFFFFFFFL) + " in " + enc);
      case ErrorCodes.ERR_TOO_BIG_WIDE_CHAR_VALUE:
        throw context.runtime.newRangeError("" + (c & 0xFFFFFFFFL) + " out of char range");
    }
    throw context.runtime.newEncodingError(EncodingError.fromCode(len).getMessage());
  }
  return len;
}

Most used methods

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
startActivity (Activity)
onRequestPermissionsResult (Fragment)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
String (java.lang)
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Top Sublime Text plugins

How to useEncoding in org.jcodings

Best Java code snippets using org.jcodings.Encoding (Showing top 20 results out of 315)

Refine search

How to use
Encoding
in
org.jcodings