charsetMatch = csr.match(this); if (charsetMatch != null) { confidence = charsetMatch.getConfidence() & 0x000000ff; if (confidence > 0) { CharsetMatch m = new CharsetMatch(this, csr, confidence, charsetMatch.getName(), charsetMatch.getLanguage()); matches.add(m);
/** * Create a Java String from Unicode character data corresponding * to the original byte data supplied to the Charset detect operation. * * @return a String created from the converted input data. * @stable ICU 3.4 */ public String getString() throws java.io.IOException { return getString(-1); }
/** * Create a java.io.Reader for reading the Unicode character data corresponding * to the original byte data supplied to the Charset detect operation. * <p> * CAUTION: if the source of the byte data was an InputStream, a Reader * can be created for only one matching char set using this method. If more * than one charset needs to be tried, the caller will need to reset * the InputStream and create InputStreamReaders itself, based on the charset name. * * @return the Reader for the Unicode character data. * @stable ICU 3.4 */ public Reader getReader() { InputStream inputStream = fInputStream; if (inputStream == null) { inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength); } try { inputStream.reset(); return new InputStreamReader(inputStream, getName()); } catch (IOException e) { return null; } }
detector.setText(text.getRawValue()); CharsetMatch match = detector.detect(); if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) { return;
return confidence == 0 ? null : new CharsetMatch(det, this, confidence);
public String toString() { String s = "Match of " + fCharsetName; if (getLanguage() != null) { s += " in " + getLanguage(); } s += " with confidence " + fConfidence; return s; } }
return match.getReader(); } catch (IOException e) { return null;
return CharsetUtils.forName(match.getName()); } catch (Exception e) {
encoding = match.getName(); int confidence = match.getConfidence();
public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1255" : "ISO-8859-8-I"; int confidence = match(det, ngrams, byteMap); return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "he"); } }
public String toString() { String s = "Match of " + fCharsetName; if (getLanguage() != null) { s += " in " + getLanguage(); } s += " with confidence " + fConfidence; return s; } }
return match.getReader(); } catch (IOException e) { return null;
charsetMatch = csr.match(this); if (charsetMatch != null) { confidence = charsetMatch.getConfidence() & 0x000000ff; if (confidence > 0) { CharsetMatch m = new CharsetMatch(this, csr, confidence, charsetMatch.getName(), charsetMatch.getLanguage()); matches.add(m);
/** * Create a java.io.Reader for reading the Unicode character data corresponding * to the original byte data supplied to the Charset detect operation. * <p> * CAUTION: if the source of the byte data was an InputStream, a Reader * can be created for only one matching char set using this method. If more * than one charset needs to be tried, the caller will need to reset * the InputStream and create InputStreamReaders itself, based on the charset name. * * @return the Reader for the Unicode character data. * @stable ICU 3.4 */ public Reader getReader() { InputStream inputStream = fInputStream; if (inputStream == null) { inputStream = new ByteArrayInputStream(fRawInput, 0, fRawLength); } try { inputStream.reset(); return new InputStreamReader(inputStream, getName()); } catch (IOException e) { return null; } }
detector.setText(text.getRawValue()); CharsetMatch match = detector.detect(); if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) { return;
public CharsetMatch match(CharsetDetector det) { String name = det.fC1Bytes ? "windows-1254" : "ISO-8859-9"; int confidence = match(det, ngrams, byteMap); return confidence == 0 ? null : new CharsetMatch(det, this, confidence, name, "tr"); } }
/** * Autodetect the charset of an inputStream, and return a String * containing the converted input data. * <p> * This is a convenience method that is equivalent to * <code>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</code> * <p> * Raise an exception if no charsets appear to match the input data. * * @param in The source of the byte data in the unknown charset. * @param declaredEncoding A declared encoding for the data, if available, * or null or an empty string if none is available. * @stable ICU 3.4 */ public String getString(byte[] in, String declaredEncoding) { fDeclaredEncoding = declaredEncoding; try { setText(in); CharsetMatch match = detect(); if (match == null) { return null; } return match.getString(-1); } catch (IOException e) { return null; } }
public String toString() { String s = "Match of " + fCharsetName; if (getLanguage() != null) { s += " in " + getLanguage(); } s += " with confidence " + fConfidence; return s; } }