java.nio.charset.Charset java code examples

Refine search

 String line;
try (
  InputStream fis = new FileInputStream("the_file_name");
  InputStreamReader isr = new InputStreamReader(fis, Charset.forName("UTF-8"));
  BufferedReader br = new BufferedReader(isr);
) {
  while ((line = br.readLine()) != null) {
    // Deal with the line
  }
}

@Override
public void configure(Configuration parameters) {
  super.configure(parameters);
  if (charsetName == null || !Charset.isSupported(charsetName)) {
    throw new RuntimeException("Unsupported charset: " + charsetName);
  }
  if (charsetName.equalsIgnoreCase(StandardCharsets.US_ASCII.name())) {
    ascii = true;
  }
  this.decoder = Charset.forName(charsetName).newDecoder();
  this.byteWrapper = ByteBuffer.allocate(1);
}

private BufferedReader readerFor(InputStream stream) {
 return new BufferedReader(new InputStreamReader(stream, Charset.defaultCharset()));
}

/**
 * Attempt to read a file as a string
 * @throws IOException
 */
public static String readFileAsString(String path, Charset charset) throws IOException {
  if (charset == null) charset = Charset.defaultCharset();
  try (FileChannel fc = FileChannel.open(Paths.get(path))) {
    MappedByteBuffer bb = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
    return charset.decode(bb).toString();
  }
}

OutputStreamWriter char_output = new OutputStreamWriter(
  new FileOutputStream("some_output.utf8"),
  Charset.forName("UTF-8").newEncoder() 
);
InputStreamReader char_input = new InputStreamReader(
  new FileInputStream("some_input.utf8"),
  Charset.forName("UTF-8").newDecoder() 
);

public static JSONArray readJsonFromUrl(String url) throws IOException, JSONException {
  InputStream is = new URL(url).openStream();
  try {
    BufferedReader rd = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8")));
    String jsonText = readAll(rd);
    JSONArray json = new JSONArray(jsonText);
    return json;
  } finally {
    is.close();
  }
}

 URLConnection connection = new URL("https://www.google.com/search?q=" + query).openConnection();
connection.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
connection.connect();

BufferedReader r  = new BufferedReader(new InputStreamReader(connection.getInputStream(), Charset.forName("UTF-8")));

StringBuilder sb = new StringBuilder();
String line;
while ((line = r.readLine()) != null) {
  sb.append(line);
}
System.out.println(sb.toString());

FileInputStream stream = new FileInputStream(path);
  InputStreamReader input = new InputStreamReader(stream, Charset.defaultCharset());
  Reader reader = new BufferedReader(input);
  int read;
  while ((read = reader.read(buffer, 0, buffer.length)) > 0) {
    builder.append(buffer, 0, read);
  stream.close();

err = proc.getErrorStream();
inr = new BufferedReader(new InputStreamReader(in, Charset.defaultCharset()));
String line = inr.readLine();
while (line != null && lines.size() < max) {
  line = line.toLowerCase(Locale.ENGLISH).trim();
  lines.add(line);
  line = inr.readLine();
inr = null;
in.close();
in = null;
  err.close();
  err = null;

@Test
public void testMultiByteBreak() throws Exception {
  System.out.println("testMultiByteBreak() Default charset: "+Charset.defaultCharset().displayName());
  final long delay = 50;
  final File origin = new File(this.getClass().getResource("/test-file-utf8.bin").toURI());
  final File file = new File(getTestDirectory(), "testMultiByteBreak.txt");
  createFile(file, 0);
     BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(origin), charsetUTF8))) {
    final List<String> lines = new ArrayList<>();
    String line;
    while((line = reader.readLine()) != null){
      out.write(line);
      out.write("\n");

 public static String readFile(String file, String csName)
      throws IOException {
  Charset cs = Charset.forName(csName);
  return readFile(file, cs);
}

public static String readFile(String file, Charset cs)
      throws IOException {
  // No real need to close the BufferedReader/InputStreamReader
  // as they're only wrapping the stream
  FileInputStream stream = new FileInputStream(file);
  try {
    Reader reader = new BufferedReader(new InputStreamReader(stream, cs));
    StringBuilder builder = new StringBuilder();
    char[] buffer = new char[8192];
    int read;
    while ((read = reader.read(buffer, 0, buffer.length)) > 0) {
      builder.append(buffer, 0, read);
    }
    return builder.toString();
  } finally {
    // Potential issue here: if this throws an IOException,
    // it will mask any others. Normally I'd use a utility
    // method which would log exceptions and swallow them
    stream.close();
  }        
}

reader = new BufferedReader(
    new InputStreamReader( is, Charset.forName( "UTF-8" ) )
);
BufferedWriter writer = new BufferedWriter( sw );
for ( int c = reader.read(); c != -1; c = reader.read() ) {
  writer.write( c );
  reader.close();
is.close();

public static String urlToText(URL url, String encoding) throws IOException {
 Charset charset = encoding == null ? StandardCharsets.UTF_8 : Charset.forName(encoding);
 try (BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream(), charset))) {
  return reader.lines().collect(joining(System.lineSeparator()));
 }
}

           ? new InputStreamReader(new FileInputStream(file1), Charset.defaultCharset())
           : new InputStreamReader(new FileInputStream(file1), charsetName);
 Reader input2 = charsetName == null
           ? new InputStreamReader(new FileInputStream(file2), Charset.defaultCharset())
           : new InputStreamReader(new FileInputStream(file2), charsetName)) {
return IOUtils.contentEqualsIgnoreEOL(input1, input2);

final InputStream inputStream = zipFile.getInputStream(entry);
try {
  final Reader reader = new InputStreamReader(inputStream, Charset.forName("UTF-8"));
  try {
    final char[] chars = new char[1024];
    int read = reader.read(chars);
    while (read != -1) {
      writer.write(chars, 0, read);
      read = reader.read(chars);
    reader.close();
  inputStream.close();

BufferedReader br = new BufferedReader(new InputStreamReader(System.in, Charset.forName("ISO-8859-1")),1024);
 // ...
    // inside some iteration / processing logic:
    if (br.ready()) {
      int readCount = br.read(inputData, bufferOffset, inputData.length-bufferOffset);
    }

/**
 * Creates a new scanner.
 * There is also java.io.Reader version of this constructor.
 *
 * @param   in  the java.io.Inputstream to read input from.
 */
NegraPennLexer(java.io.InputStream in) {
 this(new java.io.InputStreamReader
      (in, java.nio.charset.Charset.forName("UTF-8")));
}

@Override
public boolean update() {
  try {
    InputStream stream = HttpUtils.getData(url);
    if (stream == null) {
      log.warn("Failed to get data from url " + url);
      return false;
    }
    Reader reader = new BufferedReader(new InputStreamReader(stream,
        Charset.forName("UTF-8")));
    StringBuilder builder = new StringBuilder();
    char[] buffer = new char[4096];
    int charactersRead;
    while ((charactersRead = reader.read(buffer, 0, buffer.length)) > 0) {
      builder.append(buffer, 0, charactersRead);
    }
    String data = builder.toString();
    parseJson(data);
  } catch (IOException e) {
    log.warn("Error reading bike rental feed from " + url, e);
    return false;
  } catch (ParserConfigurationException e) {
    throw new RuntimeException(e);
  } catch (SAXException e) {
    log.warn("Error parsing bike rental feed from " + url + "(bad XML of some sort)", e);
    return false;
  }
  return true;
}

public static <T> T readValue(URL src, Class<T> valueType) throws IOException {
 try (InputStream inputStream = src.openStream()) {
  Reader reader = new InputStreamReader(inputStream, Charset.forName("UTF-8"));
  return objectMapperWithoutIndentation.readValue(reader, valueType);
 }
}

public Writer writeTo(final Writer out) throws IOException {
  try (Reader reader = (this.encoding == null)
      ? new InputStreamReader(Files.newInputStream(this))
      : new InputStreamReader(Files.newInputStream(this), Charset.forName(this.encoding))) {
    int c = reader.read();
    while (c != -1) {
      out.write(c);
      c = reader.read();
    }
  }
  return out;
}

Javadoc

A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode, converting a byte sequence into a sequence of characters, and some can also encode, converting a sequence of characters into a byte sequence. Use the method #canEncode to find out whether a charset supports both.

Characters

In the context of this class, character always refers to a Java character: a Unicode code point in the range U+0000 to U+FFFF. (Java represents supplementary characters using surrogates.) Not all byte sequences will represent a character, and not all characters can necessarily be represented by a given charset. The method #containscan be used to determine whether every character representable by one charset can also be represented by another (meaning that a lossless transformation is possible from the contained to the container).

Encodings

There are many possible ways to represent Unicode characters as byte sequences. See UTR#17: Unicode Character Encoding Model for detailed discussion.

The most important mappings capable of representing every character are the Unicode Transformation Format (UTF) charsets. Of those, UTF-8 and the UTF-16 family are the most common. UTF-8 (described in RFC 3629) encodes a character using 1 to 4 bytes. UTF-16 uses exactly 2 bytes per character (potentially wasting space, but allowing efficient random access into BMP text), and UTF-32 uses exactly 4 bytes per character (trading off even more space for efficient random access into text that includes supplementary characters).

UTF-16 and UTF-32 encode characters directly, using their code point as a two- or four-byte integer. This means that any given UTF-16 or UTF-32 byte sequence is either big- or little-endian. To assist decoders, Unicode includes a special byte order mark (BOM) character U+FEFF used to determine the endianness of a sequence. The corresponding byte-swapped code point U+FFFE is guaranteed never to be assigned. If a UTF-16 decoder sees 0xfe, 0xff, for example, it knows it's reading a big-endian byte sequence, while 0xff, 0xfe, would indicate a little-endian byte sequence.

UTF-8 can contain a BOM, but since the UTF-8 encoding of a character always uses the same byte sequence, there is no information about endianness to convey. Seeing the bytes corresponding to the UTF-8 encoding of U+FEFF ( 0xef, 0xbb, 0xbf) would only serve to suggest that you're reading UTF-8. Note that BOMs are decoded as the U+FEFF character, and will appear in the output character sequence. This means that a disadvantage to including a BOM in UTF-8 is that most applications that use UTF-8 do not expect to see a BOM. (This is also a reason to prefer UTF-8: it's one less complication to worry about.)

Because a BOM indicates how the data that follows should be interpreted, a BOM should occur as the first character in a character sequence.

See the Byte Order Mark (BOM) FAQ for more about dealing with BOMs.

Endianness and BOM behavior

The following tables show the endianness and BOM behavior of the UTF-16 variants.

This table shows what the encoder writes. "BE" means that the byte sequence is big-endian, "LE" means little-endian. "BE BOM" means a big-endian BOM (that is, 0xfe, 0xff).

Charset	Encoder writes
UTF-16BE	BE, no BOM
UTF-16LE	LE, no BOM
UTF-16	BE, with BE BOM

The next table shows how each variant's decoder behaves when reading a byte sequence. The exact meaning of "failure" in the table is dependent on the CodingErrorAction supplied to CharsetDecoder#malformedInputAction, so "BE, failure" means "the byte sequence is treated as big-endian, and a little-endian BOM triggers the malformedInputAction".

The phrase "includes BOM" means that the output includes the U+FEFF byte order mark character.

Charset	BE BOM	LE BOM	No BOM
UTF-16BE	BE, includes BOM	BE, failure	BE
UTF-16LE	LE, failure	LE, includes BOM	LE
UTF-16	BE	LE	BE

Charset names

A charset has a canonical name, returned by #name. Most charsets will also have one or more aliases, returned by #aliases. A charset can be looked up by canonical name or any of its aliases using #forName.

Guaranteed-available charsets

The following charsets are available on every Java implementation:

ISO-8859-1
US-ASCII
UTF-16
UTF-16BE
UTF-16LE
UTF-8

All of these charsets support both decoding and encoding. The charsets whose names begin "UTF" can represent all characters, as mentioned above. The "ISO-8859-1" and "US-ASCII" charsets can only represent small subsets of these characters. Except when required to do otherwise for compatibility, new code should use one of the UTF charsets listed above. The platform's default charset is UTF-8. (This is in contrast to some older implementations, where the default charset depended on the user's locale.)

Most implementations will support hundreds of charsets. Use #availableCharsets or #isSupported to see what's available. If you intend to use the charset if it's available, just call #forName and catch the exceptions it throws if the charset isn't available.

Additional charsets can be made available by configuring one or more charset providers through provider configuration files. Such files are always named as "java.nio.charset.spi.CharsetProvider" and located in the "META-INF/services" directory of one or more classpaths. The files should be encoded in "UTF-8". Each line of their content specifies the class name of a charset provider which extends java.nio.charset.spi.CharsetProvider. A line should end with '\r', '\n' or '\r\n'. Leading and trailing whitespace is trimmed. Blank lines, and lines (after trimming) starting with "#" which are regarded as comments, are both ignored. Duplicates of names already found are also ignored. Both the configuration files and the provider classes will be loaded using the thread context class loader.

Although class is thread-safe, the CharsetDecoder and CharsetEncoder instances it returns are inherently stateful.

Most used methods

forName
Returns a Charset instance for the named charset.
name
Returns the canonical name of this charset.If a charset is in the IANA registry, this will be the MI
defaultCharset
Returns the system's default charset. This is determined during VM startup, and will not change ther
newEncoder
Returns a new instance of an encoder for this charset.
newDecoder
Returns a new instance of a decoder for this charset.
decode
Returns a new CharBuffer containing the characters decoded from buffer. This method uses CodingError
encode
Returns a new ByteBuffer containing the bytes encoding the characters from buffer. This method uses
toString
Gets a string representation of this charset. Usually this contains the canonical name of the charse
isSupported
Determines whether the specified charset is supported by this runtime.
equals
Determines whether this charset equals to the given object. They are considered to be equal if they
displayName
Returns the name of this charset for the specified locale.The default implementation returns the can
availableCharsets
Returns an immutable case-insensitive map from canonical names to Charset instances. If multiple cha

Popular in Java

Start an intent from android
addToBackStack (FragmentTransaction)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
setContentView (Activity)
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Runner (org.openjdk.jmh.runner)
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top plugins for Android Studio

How to useCharset in java.nio.charset

Best Java code snippets using java.nio.charset.Charset (Showing top 20 results out of 60,255)

Refine search

How to use
Charset
in
java.nio.charset