extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); extractor.close(); wb.close();
InputStream inp = new FileInputStream("workbook.xls"); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp)); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); String text = extractor.getText();
/** * {@inheritDoc} */ public Reader extractText(InputStream stream, String type, String encoding) throws IOException { try { POIFSFileSystem fs = new POIFSFileSystem(stream); return new StringReader(new ExcelExtractor(fs).getText()); } catch (RuntimeException e) { logger.warn("Failed to extract Excel text content", e); return new StringReader(""); } finally { stream.close(); } } }
/** * Extrae el texto de un fichero excel. * @param in * @return String. Devuelve el texto crudo * @throws Exception */ public static String extractText(InputStream in) throws Exception { String result = ""; HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor ee = new ExcelExtractor(wb); result = ee.getText(); // Eliminamos los caracteres que no nos sirven para indexar. result = ExtractorUtil.removeControlChars(result); return result; }
public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException { try { POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data)); ExcelExtractor extractor = new ExcelExtractor(fs); String excelText = extractor.getText(); return new IndexDocument(fileData.path, excelText, null); } catch (IOException e) { String msg = "Failed to write to the index"; log.error(msg, e); throw new SolrException(ErrorCode.SERVER_ERROR, msg); } }
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hssf.extractor.ExcelExtractor( new HSSFWorkbook(in)).getText()); } catch (final IOException e) { throw new ExtractException(e); } } }
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hssf.extractor.ExcelExtractor( new HSSFWorkbook(in)).getText()); } catch (final IOException e) { throw new ExtractException(e); } } }
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hssf.extractor.ExcelExtractor( new HSSFWorkbook(in)).getText()); } catch (final IOException e) { throw new ExtractException(e); } } }
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new CrawlerSystemException("The inputstream is null."); } try { @SuppressWarnings("resource") final org.apache.poi.hssf.extractor.ExcelExtractor excelExtractor = new org.apache.poi.hssf.extractor.ExcelExtractor(new HSSFWorkbook(in)); return new ExtractData(excelExtractor.getText()); } catch (final IOException e) { throw new ExtractException(e); } } }
/** * 处理excel2003 * @param path * @return * @throws IOException */ public String readExcel(InputStream in) throws IOException { String content = null; try { HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); content = extractor.getText(); this.m_documentSummary = extractor.getDocSummaryInformation(); this.m_summary = extractor.getSummaryInformation(); } catch (FileNotFoundException e) { e.printStackTrace(); } return content; } /**
extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); } catch (Exception e) { e.printStackTrace();
extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); } catch (Exception e) { e.printStackTrace();
extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); extractor.close(); wb.close();
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { final HSSFWorkbook workbook = new HSSFWorkbook(inputStream); try (final ExcelExtractor excel = new ExcelExtractor(workbook)) { final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); final SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } final ParserFieldsBuilder result = resultBuilder.newDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(result, CONTENT, 10000)); } } }