charsetDetector.enableInputFilter(true); CharsetMatch cm = charsetDetector.detect(); if (cm != null) {
/** * Use a third party library as last resort to guess the charset from the * bytes. */ private static String getCharsetFromText(byte[] content, String declaredCharset, int maxLengthCharsetDetection) { String charset = null; // filter HTML tags CharsetDetector charsetDetector = new CharsetDetector(); charsetDetector.enableInputFilter(true); // give it a hint if (declaredCharset != null) charsetDetector.setDeclaredEncoding(declaredCharset); // trim the content of the text for the detection byte[] subContent = content; if (maxLengthCharsetDetection != -1 && content.length > maxLengthCharsetDetection) { subContent = Arrays.copyOfRange(content, 0, maxLengthCharsetDetection); } charsetDetector.setText(subContent); try { CharsetMatch charsetMatch = charsetDetector.detect(); charset = validateCharset(charsetMatch.getName()); } catch (Exception e) { charset = null; } return charset; }