/** * @see org.opencms.search.extractors.A_CmsTextExtractor#extractText(java.io.InputStream, java.lang.String) */ @Override public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception { ZipInputStream zin = new ZipInputStream(in); ZipEntry ze; boolean FOUND_CONTENT = false; String result = ""; while (!FOUND_CONTENT) { ze = zin.getNextEntry(); FOUND_CONTENT = ze.getName().equalsIgnoreCase("content.xml"); if (FOUND_CONTENT) { result = readContent(zin); try { zin.close(); } catch (Exception e) { e.printStackTrace(System.err); } } } result = removeControlChars(result); return new CmsExtractionResult(result); }
/** * Returns the raw text content of a given vfs resource containing MS Word data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { CmsFile file = readFile(cms, resource); try { return CmsExtractorOpenOffice.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * Returns the raw text content of a given vfs resource containing MS Word data.<p> * * @see org.opencms.search.documents.I_CmsSearchExtractor#extractContent(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsExtractionResult extractContent(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsIndexException, CmsException { logContentExtraction(resource, index); CmsFile file = readFile(cms, resource); try { return CmsExtractorOpenOffice.getExtractor().extractText(file.getContents()); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } }
/** * @see org.opencms.search.extractors.A_CmsTextExtractor#extractText(java.io.InputStream, java.lang.String) */ @Override public I_CmsExtractionResult extractText(InputStream in, String encoding) throws Exception { ZipInputStream zin = new ZipInputStream(in); ZipEntry ze; boolean FOUND_CONTENT = false; String result = ""; while (!FOUND_CONTENT) { ze = zin.getNextEntry(); FOUND_CONTENT = ze.getName().equalsIgnoreCase("content.xml"); if (FOUND_CONTENT) { result = readContent(zin); try { zin.close(); } catch (Exception e) { e.printStackTrace(System.err); } } } result = removeControlChars(result); return new CmsExtractionResult(result); }