/** * Autodetect the charset of an inputStream, and return a String * containing the converted input data. * <p> * This is a convenience method that is equivalent to * <code>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</code> * <p> * Raise an exception if no charsets appear to match the input data. * * @param in The source of the byte data in the unknown charset. * @param declaredEncoding A declared encoding for the data, if available, * or null or an empty string if none is available. * @stable ICU 3.4 */ public String getString(byte[] in, String declaredEncoding) { fDeclaredEncoding = declaredEncoding; try { setText(in); CharsetMatch match = detect(); if (match == null) { return null; } return match.getString(-1); } catch (IOException e) { return null; } }
setText(in); CharsetMatch match = detect();
CharsetDetector detector = new CharsetDetector(); detector.setText(text.getRawValue()); CharsetMatch match = detector.detect(); if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
public String guessEncoding(InputStream is) throws IOException { CharsetDetector charsetDetector = new CharsetDetector(); charsetDetector.setText( is instanceof BufferedInputStream ? is : new BufferedInputStream(is) ); charsetDetector.enableInputFilter(true); CharsetMatch cm = charsetDetector.detect(); return cm.getName(); }
/** * Autodetect the charset of an inputStream, and return a String * containing the converted input data. * <p> * This is a convenience method that is equivalent to * <code>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</code> * <p> * Raise an exception if no charsets appear to match the input data. * * @param in The source of the byte data in the unknown charset. * @param declaredEncoding A declared encoding for the data, if available, * or null or an empty string if none is available. * @stable ICU 3.4 */ public String getString(byte[] in, String declaredEncoding) { fDeclaredEncoding = declaredEncoding; try { setText(in); CharsetMatch match = detect(); if (match == null) { return null; } return match.getString(-1); } catch (IOException e) { return null; } }
/** * Autodetect the charset of an inputStream, and return a String * containing the converted input data. * <p> * This is a convenience method that is equivalent to * <code>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</code> * <p> * Raise an exception if no charsets appear to match the input data. * * @param in The source of the byte data in the unknown charset. * @param declaredEncoding A declared encoding for the data, if available, * or null or an empty string if none is available. * @stable ICU 3.4 */ public String getString(byte[] in, String declaredEncoding) { fDeclaredEncoding = declaredEncoding; try { setText(in); CharsetMatch match = detect(); if (match == null) { return null; } return match.getString(-1); } catch (IOException e) { return null; } }
/** * Autodetect the charset of an inputStream, and return a String * containing the converted input data. * <p> * This is a convenience method that is equivalent to * <code>this.setDeclaredEncoding(declaredEncoding).setText(in).detect().getString();</code> * <p> * Raise an exception if no charsets appear to match the input data. * * @param in The source of the byte data in the unknown charset. * @param declaredEncoding A declared encoding for the data, if available, * or null or an empty string if none is available. * @stable ICU 3.4 */ public String getString(byte[] in, String declaredEncoding) { fDeclaredEncoding = declaredEncoding; try { setText(in); CharsetMatch match = detect(); if (match == null) { return null; } return match.getString(-1); } catch (IOException e) { return null; } }
setText(in); CharsetMatch match = detect();
setText(in); CharsetMatch match = detect();
setText(in); CharsetMatch match = detect();
/** * Detects the character encoding of a string. When the character * encoding of what the input is supposed to be is known, specifying * it as a declared encoding will influence the detection result. * @param input the input to detect encoding on * @param declaredEncoding declared input encoding, if known * @return the character encoding official name or <code>null</code> * if the input is null or blank * @throws IOException if there is a problem find the character encoding */ public static String detectCharset( String input, String declaredEncoding) throws IOException { if (StringUtils.isBlank(input)) { return null; } CharsetDetector cd = new CharsetDetector(); if (StringUtils.isNotBlank(declaredEncoding)) { cd.setDeclaredEncoding(declaredEncoding); } String charset = null; cd.enableInputFilter(true); cd.setText(input.getBytes("UTF-8")); CharsetMatch match = cd.detect(); charset = match.getName(); if (LOG.isDebugEnabled()) { LOG.debug("Detected encoding: " + charset); } return charset; }
@Override public String detect(byte[] data, String hint) { CharsetDetector detector = new CharsetDetector(); if (hint != null) { detector.setDeclaredEncoding(hint); } detector.setText(data); CharsetMatch match = detector.detect(); return match.getName(); } }
cd.setText(input); rewind(input); CharsetMatch match = cd.detect(); charset = match.getName(); if (LOG.isDebugEnabled()) {
bytes = fileString.getBytes(charsetDetector.detect().getName());
@Override public List<ImporterDocument> parseDocument(ImporterDocument doc, Writer output) throws DocumentParserException { try { //TODO have a generic utility method for this? BufferedInputStream is = new BufferedInputStream(doc.getContent()); CharsetDetector detector = new CharsetDetector(); detector.enableInputFilter(true); detector.setText(is); CharsetMatch match = detector.detect(); String charset = StandardCharsets.UTF_8.toString(); if (match != null && Charset.isSupported(match.getName())) { charset = match.getName(); } BufferedReader reader = new BufferedReader( new InputStreamReader(is, charset)); parse(reader, output, doc.getMetadata()); } catch (IOException | ParserConfigurationException | SAXException e) { throw new DocumentParserException( "Could not parse " + doc.getReference(), e); } return null; }
detector.setText(contentBytes); CharsetMatch match = detector.detect();
CharsetDetector detector = new CharsetDetector(); detector.setText(text.getRawValue()); CharsetMatch match = detector.detect(); if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
CharsetDetector detector = new CharsetDetector(); detector.setText(text.getRawValue()); CharsetMatch match = detector.detect(); if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
CharsetDetector detector = new CharsetDetector(); detector.setText(text.getRawValue()); CharsetMatch match = detector.detect(); if (match != null && match.getConfidence() > 35 && tryToSet7BitEncoding(msg, match.getName())) {
CharsetDetector charDetect = new CharsetDetector(); charDetect.setText(content); String charSet = charDetect.detect().getName();