} catch (CommandParseException e) { System.err.println(e.getMessage()); printUsageMessage(System.err); System.exit(1); return; // suppress compiler error printUsageMessage(System.out); return; is.close(); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames()); extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas()); extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments()); extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); extractor.close(); wb.close();
private String extractHeaderFooter(HeaderFooter hf) { return ExcelExtractor._extractHeaderFooter(hf); } }
/** * Extrae el texto de un fichero excel. * @param in * @return String. Devuelve el texto crudo * @throws Exception */ public static String extractText(InputStream in) throws Exception { String result = ""; HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor ee = new ExcelExtractor(wb); result = ee.getText(); // Eliminamos los caracteres que no nos sirven para indexar. result = ExtractorUtil.removeControlChars(result); return result; }
/** * 处理excel2003 * @param path * @return * @throws IOException */ public String readExcel(InputStream in) throws IOException { String content = null; try { HSSFWorkbook wb = new HSSFWorkbook(in); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); content = extractor.getText(); this.m_documentSummary = extractor.getDocSummaryInformation(); this.m_summary = extractor.getSummaryInformation(); } catch (FileNotFoundException e) { e.printStackTrace(); } return content; } /**
/** * 获取 {@link ExcelExtractor} 对象 * * @return {@link ExcelExtractor} * @since 4.1.0 */ public ExcelExtractor getExtractor() { ExcelExtractor extractor; Workbook wb = this.workbook; if (wb instanceof HSSFWorkbook) { extractor = new org.apache.poi.hssf.extractor.ExcelExtractor((HSSFWorkbook) wb); } else { extractor = new XSSFExcelExtractor((XSSFWorkbook) wb); } return extractor; }
@Override public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream, final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception { final HSSFWorkbook workbook = new HSSFWorkbook(inputStream); try (final ExcelExtractor excel = new ExcelExtractor(workbook)) { final ParserFieldsBuilder metas = resultBuilder.metas(); metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault)); final SummaryInformation info = excel.getSummaryInformation(); if (info != null) { metas.add(TITLE, info.getTitle()); metas.add(AUTHOR, info.getAuthor()); metas.add(SUBJECT, info.getSubject()); metas.add(CREATION_DATE, info.getCreateDateTime()); metas.add(MODIFICATION_DATE, info.getLastSaveDateTime()); metas.add(KEYWORDS, info.getKeywords()); } final ParserFieldsBuilder result = resultBuilder.newDocument(); result.add(CONTENT, excel.getText()); result.add(LANG_DETECTION, languageDetection(result, CONTENT, 10000)); } } }
InputStream inp = new FileInputStream("workbook.xls"); HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp)); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setFormulasNotResults(true); extractor.setIncludeSheetNames(false); String text = extractor.getText();
/** * {@inheritDoc} */ public Reader extractText(InputStream stream, String type, String encoding) throws IOException { try { POIFSFileSystem fs = new POIFSFileSystem(stream); return new StringReader(new ExcelExtractor(fs).getText()); } catch (RuntimeException e) { logger.warn("Failed to extract Excel text content", e); return new StringReader(""); } finally { stream.close(); } } }
/** * 获取 {@link ExcelExtractor} 对象 * * @return {@link ExcelExtractor} * @since 4.1.0 */ public ExcelExtractor getExtractor() { ExcelExtractor extractor; Workbook wb = this.workbook; if (wb instanceof HSSFWorkbook) { extractor = new org.apache.poi.hssf.extractor.ExcelExtractor((HSSFWorkbook) wb); } else { extractor = new XSSFExcelExtractor((XSSFWorkbook) wb); } return extractor; }
} catch (CommandParseException e) { System.err.println(e.getMessage()); printUsageMessage(System.err); System.exit(1); return; // suppress compiler error printUsageMessage(System.out); return; ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames()); extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas()); extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments()); extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); } catch (Exception e) { e.printStackTrace();
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hssf.extractor.ExcelExtractor( new HSSFWorkbook(in)).getText()); } catch (final IOException e) { throw new ExtractException(e); } } }
return new EventBasedExcelExtractor(poifsDir); return new ExcelExtractor(poifsDir);
text.append(_extractHeaderFooter(sheet.getHeader())); text.append(_extractHeaderFooter(sheet.getFooter()));
} catch (CommandParseException e) { System.err.println(e.getMessage()); printUsageMessage(System.err); System.exit(1); return; // suppress compiler error printUsageMessage(System.out); return; ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames()); extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas()); extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments()); extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); } catch (Exception e) { e.printStackTrace();
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hssf.extractor.ExcelExtractor( new HSSFWorkbook(in)).getText()); } catch (final IOException e) { throw new ExtractException(e); } } }
class ConcreteExcelExtractor implements Extractor { private ExcelExtractor x; public void setInputStream(FileInputStream fis) { // load the Excel workbook from input stream this.x = new ExcelExtractor(...); } public String getText() { // your logic for Excel } }
protected void extractHeaderFooter(String hf, XHTMLContentHandler xhtml) throws SAXException { String content = ExcelExtractor._extractHeaderFooter( new HeaderFooterFromString(hf)); if (content.length() > 0) { xhtml.element("p", content); } }
} catch (CommandParseException e) { System.err.println(e.getMessage()); printUsageMessage(System.err); System.exit(1); return; // suppress compiler error printUsageMessage(System.out); return; is.close(); ExcelExtractor extractor = new ExcelExtractor(wb); extractor.setIncludeSheetNames(cmdArgs.shouldShowSheetNames()); extractor.setFormulasNotResults(!cmdArgs.shouldEvaluateFormulas()); extractor.setIncludeCellComments(cmdArgs.shouldShowCellComments()); extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells()); extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters()); System.out.println(extractor.getText()); extractor.close(); wb.close();
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hssf.extractor.ExcelExtractor( new HSSFWorkbook(in)).getText()); } catch (final IOException e) { throw new ExtractException(e); } } }
/** * 获取 {@link ExcelExtractor} 对象 * * @return {@link ExcelExtractor} * @since 4.1.0 */ public ExcelExtractor getExtractor() { ExcelExtractor extractor; Workbook wb = this.workbook; if (wb instanceof HSSFWorkbook) { extractor = new org.apache.poi.hssf.extractor.ExcelExtractor((HSSFWorkbook) wb); } else { extractor = new XSSFExcelExtractor((XSSFWorkbook) wb); } return extractor; }