/** * * @param input text document input stream, or <code>null</code> * @param metadata input metadata for the document * @return the detected Charset or null if no charset could be detected * @throws IOException */ @Override public Charset detect(InputStream input, Metadata metadata) throws IOException { for (EncodingDetector detector : getDetectors()) { Charset detected = detector.detect(input, metadata); if (detected != null) { return detected; } } return null; }
private static void addEncodingDetectors(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception { EncodingDetector encDetector = config.getEncodingDetector(); if (mode == Mode.MINIMAL && encDetector instanceof DefaultEncodingDetector) { // Don't output anything, all using defaults Node detComment = doc.createComment( "for example: <encodingDetectors><encodingDetector class=\"" + "org.apache.tika.detect.DefaultEncodingDetector\"></encodingDetectors>"); rootElement.appendChild(detComment); return; } Element encDetectorsElement = doc.createElement("encodingDetectors"); if (mode == Mode.CURRENT && encDetector instanceof DefaultEncodingDetector || ! (encDetector instanceof CompositeEncodingDetector)) { Element encDetectorElement = doc.createElement("encodingDetector"); encDetectorElement.setAttribute("class", encDetector.getClass().getCanonicalName()); encDetectorsElement.appendChild(encDetectorElement); } else { List<EncodingDetector> children = ((CompositeEncodingDetector)encDetector).getDetectors(); for (EncodingDetector d : children) { Element encDetectorElement = doc.createElement("encodingDetector"); encDetectorElement.setAttribute("class", d.getClass().getCanonicalName()); encDetectorsElement.appendChild(encDetectorElement); } } rootElement.appendChild(encDetectorsElement); }
/** * * @param input text document input stream, or <code>null</code> * @param metadata input metadata for the document * @return the detected Charset or null if no charset could be detected * @throws IOException */ @Override public Charset detect(InputStream input, Metadata metadata) throws IOException { for (EncodingDetector detector : getDetectors()) { Charset detected = detector.detect(input, metadata); if (detected != null) { return detected; } } return null; }
private static void addEncodingDetectors(Mode mode, Element rootElement, Document doc, TikaConfig config) throws Exception { EncodingDetector encDetector = config.getEncodingDetector(); if (mode == Mode.MINIMAL && encDetector instanceof DefaultEncodingDetector) { // Don't output anything, all using defaults Node detComment = doc.createComment( "for example: <encodingDetectors><encodingDetector class=\"" + "org.apache.tika.detect.DefaultEncodingDetector\"></encodingDetectors>"); rootElement.appendChild(detComment); return; } Element encDetectorsElement = doc.createElement("encodingDetectors"); if (mode == Mode.CURRENT && encDetector instanceof DefaultEncodingDetector || ! (encDetector instanceof CompositeEncodingDetector)) { Element encDetectorElement = doc.createElement("encodingDetector"); encDetectorElement.setAttribute("class", encDetector.getClass().getCanonicalName()); encDetectorsElement.appendChild(encDetectorElement); } else { List<EncodingDetector> children = ((CompositeEncodingDetector)encDetector).getDetectors(); for (EncodingDetector d : children) { Element encDetectorElement = doc.createElement("encodingDetector"); encDetectorElement.setAttribute("class", d.getClass().getCanonicalName()); encDetectorsElement.appendChild(encDetectorElement); } } rootElement.appendChild(encDetectorsElement); }