org.apache.poi.hssf.extractor.ExcelExtractor.getText java code examples

extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells());
extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters());
System.out.println(extractor.getText());
extractor.close();
wb.close();

 InputStream inp = new FileInputStream("workbook.xls");
HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(inp));
ExcelExtractor extractor = new ExcelExtractor(wb);

extractor.setFormulasNotResults(true);
extractor.setIncludeSheetNames(false);
String text = extractor.getText();

  /**
   * {@inheritDoc}
   */
  public Reader extractText(InputStream stream,
               String type,
               String encoding) throws IOException {
    try {
      POIFSFileSystem fs = new POIFSFileSystem(stream);
      return new StringReader(new ExcelExtractor(fs).getText());
    } catch (RuntimeException e) {
      logger.warn("Failed to extract Excel text content", e);
      return new StringReader("");
    } finally {
      stream.close();
    }
  }
}

/**
 * Extrae el texto de un fichero excel.
 * @param in
 * @return String. Devuelve el texto crudo
 * @throws Exception
 */
public static String extractText(InputStream in) throws Exception {
  String result = "";
  HSSFWorkbook wb = new HSSFWorkbook(in);
  ExcelExtractor ee = new ExcelExtractor(wb);
  result = ee.getText();
  // Eliminamos los caracteres que no nos sirven para indexar.
  result = ExtractorUtil.removeControlChars(result);
  return result;
}

public IndexDocument getIndexedDocument(File2Index fileData)
    throws SolrException {
  try {
    POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
    ExcelExtractor extractor = new ExcelExtractor(fs);
    String excelText = extractor.getText();
    return new IndexDocument(fileData.path, excelText, null);
  } catch (IOException e) {
    String msg = "Failed to write to the index";
    log.error(msg, e);
    throw new SolrException(ErrorCode.SERVER_ERROR, msg);
  }
}

  @Override
  public ExtractData getText(final InputStream in,
      final Map<String, String> params) {
    if (in == null) {
      throw new RobotSystemException("The inputstream is null.");
    }
    try {
      return new ExtractData(
        new org.apache.poi.hssf.extractor.ExcelExtractor(
          new HSSFWorkbook(in)).getText());
    } catch (final IOException e) {
      throw new ExtractException(e);
    }
  }
}

  @Override
  public ExtractData getText(final InputStream in,
      final Map<String, String> params) {
    if (in == null) {
      throw new RobotSystemException("The inputstream is null.");
    }
    try {
      return new ExtractData(
        new org.apache.poi.hssf.extractor.ExcelExtractor(
          new HSSFWorkbook(in)).getText());
    } catch (final IOException e) {
      throw new ExtractException(e);
    }
  }
}

  @Override
  public ExtractData getText(final InputStream in,
      final Map<String, String> params) {
    if (in == null) {
      throw new RobotSystemException("The inputstream is null.");
    }
    try {
      return new ExtractData(
          new org.apache.poi.hssf.extractor.ExcelExtractor(
              new HSSFWorkbook(in)).getText());
    } catch (final IOException e) {
      throw new ExtractException(e);
    }
  }
}

  @Override
  public ExtractData getText(final InputStream in,
      final Map<String, String> params) {
    if (in == null) {
      throw new CrawlerSystemException("The inputstream is null.");
    }
    try {
      @SuppressWarnings("resource")
      final org.apache.poi.hssf.extractor.ExcelExtractor excelExtractor =
          new org.apache.poi.hssf.extractor.ExcelExtractor(new HSSFWorkbook(in));
      return new ExtractData(excelExtractor.getText());
    } catch (final IOException e) {
      throw new ExtractException(e);
    }
  }
}

/** 
   * 处理excel2003 
   * @param path 
   * @return 
   * @throws IOException 
   */  
  public String readExcel(InputStream in) throws IOException {  
        String content = null;  
    try {  
     
      HSSFWorkbook wb = new HSSFWorkbook(in);  
      ExcelExtractor extractor = new ExcelExtractor(wb);  
      extractor.setFormulasNotResults(true);  
      extractor.setIncludeSheetNames(false);  
      content = extractor.getText();  
      this.m_documentSummary = extractor.getDocSummaryInformation();
      this.m_summary = extractor.getSummaryInformation();
    } catch (FileNotFoundException e) {  
      e.printStackTrace();  
    }  
    return content;  
  }  
  /**

  extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells());
  extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters());
  System.out.println(extractor.getText());
} catch (Exception e) {
  e.printStackTrace();

  extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells());
  extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters());
  System.out.println(extractor.getText());
} catch (Exception e) {
  e.printStackTrace();

extractor.setIncludeBlankCells(cmdArgs.shouldShowBlankCells());
extractor.setIncludeHeadersFooters(cmdArgs.shouldIncludeHeadersFooters());
System.out.println(extractor.getText());
extractor.close();
wb.close();

  @Override
  public void parseContent(final MultivaluedMap<String, String> parameters, final InputStream inputStream,
      final String extension, final String mimeType, final ParserResultBuilder resultBuilder) throws Exception {

    final HSSFWorkbook workbook = new HSSFWorkbook(inputStream);

    try (final ExcelExtractor excel = new ExcelExtractor(workbook)) {

      final ParserFieldsBuilder metas = resultBuilder.metas();
      metas.set(MIME_TYPE, findMimeType(extension, mimeType, this::findMimeTypeUsingDefault));

      final SummaryInformation info = excel.getSummaryInformation();
      if (info != null) {
        metas.add(TITLE, info.getTitle());
        metas.add(AUTHOR, info.getAuthor());
        metas.add(SUBJECT, info.getSubject());
        metas.add(CREATION_DATE, info.getCreateDateTime());
        metas.add(MODIFICATION_DATE, info.getLastSaveDateTime());
        metas.add(KEYWORDS, info.getKeywords());
      }

      final ParserFieldsBuilder result = resultBuilder.newDocument();
      result.add(CONTENT, excel.getText());
      result.add(LANG_DETECTION, languageDetection(result, CONTENT, 10000));
    }
  }
}

Javadoc

Retrieves the text contents of the file

Popular methods of ExcelExtractor

<init>
_extractHeaderFooter
setFormulasNotResults
Should we return the formula itself, and not the result it produces? Default is false
setIncludeSheetNames
Should sheet names be included? Default is true
printUsageMessage
setIncludeBlankCells
Should blank cells be output? Default is to only output cells that are present in the file and are n
setIncludeCellComments
Should cell comments be included? Default is false
setIncludeHeadersFooters
Should headers and footers be included in the output? Default is to include them.
close
getSummaryInformation
getDocSummaryInformation

getDocSummaryInformation

Popular in Java

Start an intent from android
getContentResolver (Context)
getSystemService (Context)
requestLocationUpdates (LocationManager)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Top Vim plugins

How to use getTextmethodin org.apache.poi.hssf.extractor.ExcelExtractor

Best Java code snippets using org.apache.poi.hssf.extractor.ExcelExtractor.getText (Showing top 14 results out of 315)

How to use
getText
method
in
org.apache.poi.hssf.extractor.ExcelExtractor