org.apache.poi.hslf.extractor.PowerPointExtractor.getText java code examples

/**
 * Fetches all the slide text from the slideshow, but not the notes, unless
 * you've called setSlidesByDefault() and setNotesByDefault() to change this
 */
public String getText() {
  return getText(_slidesByDefault, _notesByDefault, _commentsByDefault, _masterByDefault);
}

/**
 * Fetches text from the slideshow, be it slide text or note text. Because
 * the final block of text in a TextRun normally have their last \n
 * stripped, we add it back
 *
 * @param getSlideText fetch slide text
 * @param getNoteText fetch note text
 */
public String getText(boolean getSlideText, boolean getNoteText) {
  return getText(getSlideText, getNoteText, _commentsByDefault, _masterByDefault);
}

/**
 * Fetches all the notes text from the slideshow, but not the slide text
 */
public String getNotes() {
  return getText(false, true, false, false);
}

/**
 * Fetches all the notes text from the slideshow, but not the slide text
 */
public String getNotes() {
  return getText(false, true);
}

/**
 * Fetches text from the slideshow, be it slide text or note text. Because
 * the final block of text in a TextRun normally have their last \n
 * stripped, we add it back
 *
 * @param getSlideText fetch slide text
 * @param getNoteText fetch note text
 */
public String getText(boolean getSlideText, boolean getNoteText) {
  return getText(getSlideText,getNoteText,commentsByDefault,masterByDefault);
}

  /**
   * {@inheritDoc}
   */
  public Reader extractText(InputStream stream,
               String type,
               String encoding) throws IOException {
    try {
      PowerPointExtractor extractor = new PowerPointExtractor(stream);
      return new StringReader(extractor.getText(true, true));
    } catch (RuntimeException e) {
      logger.warn("Failed to extract PowerPoint text content", e);
      return new StringReader("");
    } finally {
      try {
        stream.close();
      } catch (IOException ignored) {
      }
    }
  }
}

/**
 * Basic extractor. Returns all the text, and optionally all the notes
 */
public static void main(String args[]) throws IOException {
  if (args.length < 1) {
    System.err.println("Useage:");
    System.err.println("\tPowerPointExtractor [-notes] <file>");
    System.exit(1);
  }
  boolean notes = false;
  boolean comments = false;
  boolean master = true;
  
  String file;
  if (args.length > 1) {
    notes = true;
    file = args[1];
    if (args.length > 2) {
      comments = true;
    }
  } else {
    file = args[0];
  }
  PowerPointExtractor ppe = new PowerPointExtractor(file);
  System.out.println(ppe.getText(true, notes, comments, master));
}

/**
 * Basic extractor. Returns all the text, and optionally all the notes
 */
public static void main(String args[]) throws IOException {
  if (args.length < 1) {
    System.err.println("Useage:");
    System.err.println("\tPowerPointExtractor [-notes] <file>");
    System.exit(1);
  }
  boolean notes = false;
  boolean comments = false;
  boolean master = true;
  
  String file;
  if (args.length > 1) {
    notes = true;
    file = args[1];
    if (args.length > 2) {
      comments = true;
    }
  } else {
    file = args[0];
  }
  PowerPointExtractor ppe = new PowerPointExtractor(file);
  System.out.println(ppe.getText(true, notes, comments, master));
  ppe.close();
}

public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) {
 try {
  PowerPointExtractor extractor = new PowerPointExtractor(new BufferedInputStream(new ByteArrayInputStream(cc.getContent())));
  String s = extractor.getText();
  char[] c = s.toCharArray();
  handler.startRegion("document");
  handler.text(c, 0, c.length);
  handler.endRegion();
 } catch (Exception e) {
  throw new OntopiaRuntimeException(e);
 }    
}

public IndexDocument getIndexedDocument(File2Index fileData)
    throws SolrException {
  try {
    POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data));
    PowerPointExtractor extractor = new PowerPointExtractor(fs);
    String ppText = extractor.getText();
    return new IndexDocument(fileData.path, ppText, null);
  } catch (IOException e) {
    String msg = "Failed to write to the index";
    log.error(msg, e);
    throw new SolrException(ErrorCode.SERVER_ERROR, msg);
  }
}

@Override
public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) {
 try {
  PowerPointExtractor extractor = new PowerPointExtractor(new BufferedInputStream(new ByteArrayInputStream(cc.getContent())));
  String s = extractor.getText();
  char[] c = s.toCharArray();
  handler.startRegion("document");
  handler.text(c, 0, c.length);
  handler.endRegion();
 } catch (Exception e) {
  throw new OntopiaRuntimeException(e);
 }    
}

} else if (pptExtractor instanceof PowerPointExtractor) { // Legacy PowerPoint files
  extractedText = ((PowerPointExtractor) pptExtractor).getText()
    + " " + ((PowerPointExtractor) pptExtractor).getNotes();

  /** 
    * 处理ppt 
     * @param path 
     * @return 
     */  
    public  String readPowerPoint(InputStream in) {  
      String content = null;  
      try {  
        HSLFSlideShow slideShow = new HSLFSlideShow(in);
        org.apache.poi.hslf.extractor.PowerPointExtractor extractor = new PowerPointExtractor(slideShow);
        this.m_documentSummary = extractor.getDocSummaryInformation();
        this.m_summary = extractor.getSummaryInformation();
        content = extractor.getText();
//                 SlideShow ss = new SlideShow(new HSLFSlideShow(in));// is  
//                // 为文件的InputStream，建立SlideShow  
//                Slide[] slides = ss.getSlides();// 获得每一张幻灯片  
//                 for (int i = 0; i < slides.length; i++) {  
//                    TextRun[] t = slides[i].getTextRuns();// 为了取得幻灯片的文字内容，建立TextRun  
//                     for (int j = 0; j < t.length; j++) {  
//                         content.append(t[j].getText());// 这里会将文字内容加到content中去  
//                    }  
//                 }  
      } catch (Exception ex) {  
        System.out.println(ex.toString());  
       }  
       return content;  
    }

return ppe.getText(true, true);

@Override
public ExtractData getText(final InputStream in,
    final Map<String, String> params) {
  if (in == null) {
    throw new CrawlerSystemException("The inputstream is null.");
  }
  try {
    @SuppressWarnings("resource")
    final org.apache.poi.hslf.extractor.PowerPointExtractor powerPointExtractor =
        new org.apache.poi.hslf.extractor.PowerPointExtractor(in);
    return new ExtractData(powerPointExtractor.getText());
  } catch (final IOException e) {
    throw new ExtractException(e);
  }
}

@Override
public ExtractData getText(final InputStream in,
    final Map<String, String> params) {
  if (in == null) {
    throw new RobotSystemException("The inputstream is null.");
  }
  try {
    return new ExtractData(
      new org.apache.poi.hslf.extractor.PowerPointExtractor(in)
        .getText());
  } catch (final IOException e) {
    throw new ExtractException(e);
  }
}

@Override
public ExtractData getText(final InputStream in,
    final Map<String, String> params) {
  if (in == null) {
    throw new RobotSystemException("The inputstream is null.");
  }
  try {
    return new ExtractData(
      new org.apache.poi.hslf.extractor.PowerPointExtractor(in)
        .getText());
  } catch (final IOException e) {
    throw new ExtractException(e);
  }
}

@Override
public ExtractData getText(final InputStream in,
    final Map<String, String> params) {
  if (in == null) {
    throw new RobotSystemException("The inputstream is null.");
  }
  try {
    return new ExtractData(
        new org.apache.poi.hslf.extractor.PowerPointExtractor(in)
            .getText());
  } catch (final IOException e) {
    throw new ExtractException(e);
  }
}

Javadoc

Fetches all the slide text from the slideshow, but not the notes, unless you've called setSlidesByDefault() and setNotesByDefault() to change this

Popular methods of PowerPointExtractor

<init>
Creates a PowerPointExtractor, from an open POIFSFileSystem
close
getDocSummaryInformation
getNotes
Fetches all the notes text from the slideshow, but not the slide text
getSummaryInformation
setFilesystem
textRunsToText

Popular in Java

Updating database using SQL prepared statement
startActivity (Activity)
getExternalFilesDir (Context)
getContentResolver (Context)
BufferedInputStream (java.io)
A BufferedInputStream adds functionality to another input stream-namely, the ability to buffer the i
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
JCheckBox (javax.swing)
Top Vim plugins

How to use getTextmethodin org.apache.poi.hslf.extractor.PowerPointExtractor

Best Java code snippets using org.apache.poi.hslf.extractor.PowerPointExtractor.getText (Showing top 18 results out of 315)

How to use
getText
method
in
org.apache.poi.hslf.extractor.PowerPointExtractor