/** * Fetches all the slide text from the slideshow, but not the notes, unless * you've called setSlidesByDefault() and setNotesByDefault() to change this */ public String getText() { return getText(_slidesByDefault, _notesByDefault, _commentsByDefault, _masterByDefault); }
/** * Fetches text from the slideshow, be it slide text or note text. Because * the final block of text in a TextRun normally have their last \n * stripped, we add it back * * @param getSlideText fetch slide text * @param getNoteText fetch note text */ public String getText(boolean getSlideText, boolean getNoteText) { return getText(getSlideText, getNoteText, _commentsByDefault, _masterByDefault); }
/** * Fetches all the notes text from the slideshow, but not the slide text */ public String getNotes() { return getText(false, true, false, false); }
/** * Fetches all the notes text from the slideshow, but not the slide text */ public String getNotes() { return getText(false, true); }
/** * Fetches text from the slideshow, be it slide text or note text. Because * the final block of text in a TextRun normally have their last \n * stripped, we add it back * * @param getSlideText fetch slide text * @param getNoteText fetch note text */ public String getText(boolean getSlideText, boolean getNoteText) { return getText(getSlideText,getNoteText,commentsByDefault,masterByDefault); }
/** * {@inheritDoc} */ public Reader extractText(InputStream stream, String type, String encoding) throws IOException { try { PowerPointExtractor extractor = new PowerPointExtractor(stream); return new StringReader(extractor.getText(true, true)); } catch (RuntimeException e) { logger.warn("Failed to extract PowerPoint text content", e); return new StringReader(""); } finally { try { stream.close(); } catch (IOException ignored) { } } } }
/** * Basic extractor. Returns all the text, and optionally all the notes */ public static void main(String args[]) throws IOException { if (args.length < 1) { System.err.println("Useage:"); System.err.println("\tPowerPointExtractor [-notes] <file>"); System.exit(1); } boolean notes = false; boolean comments = false; boolean master = true; String file; if (args.length > 1) { notes = true; file = args[1]; if (args.length > 2) { comments = true; } } else { file = args[0]; } PowerPointExtractor ppe = new PowerPointExtractor(file); System.out.println(ppe.getText(true, notes, comments, master)); }
/** * Basic extractor. Returns all the text, and optionally all the notes */ public static void main(String args[]) throws IOException { if (args.length < 1) { System.err.println("Useage:"); System.err.println("\tPowerPointExtractor [-notes] <file>"); System.exit(1); } boolean notes = false; boolean comments = false; boolean master = true; String file; if (args.length > 1) { notes = true; file = args[1]; if (args.length > 2) { comments = true; } } else { file = args[0]; } PowerPointExtractor ppe = new PowerPointExtractor(file); System.out.println(ppe.getText(true, notes, comments, master)); ppe.close(); }
public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) { try { PowerPointExtractor extractor = new PowerPointExtractor(new BufferedInputStream(new ByteArrayInputStream(cc.getContent()))); String s = extractor.getText(); char[] c = s.toCharArray(); handler.startRegion("document"); handler.text(c, 0, c.length); handler.endRegion(); } catch (Exception e) { throw new OntopiaRuntimeException(e); } }
public IndexDocument getIndexedDocument(File2Index fileData) throws SolrException { try { POIFSFileSystem fs = new POIFSFileSystem(new ByteArrayInputStream(fileData.data)); PowerPointExtractor extractor = new PowerPointExtractor(fs); String ppText = extractor.getText(); return new IndexDocument(fileData.path, ppText, null); } catch (IOException e) { String msg = "Failed to write to the index"; log.error(msg, e); throw new SolrException(ErrorCode.SERVER_ERROR, msg); } }
@Override public void readContent(ClassifiableContentIF cc, TextHandlerIF handler) { try { PowerPointExtractor extractor = new PowerPointExtractor(new BufferedInputStream(new ByteArrayInputStream(cc.getContent()))); String s = extractor.getText(); char[] c = s.toCharArray(); handler.startRegion("document"); handler.text(c, 0, c.length); handler.endRegion(); } catch (Exception e) { throw new OntopiaRuntimeException(e); } }
} else if (pptExtractor instanceof PowerPointExtractor) { // Legacy PowerPoint files extractedText = ((PowerPointExtractor) pptExtractor).getText() + " " + ((PowerPointExtractor) pptExtractor).getNotes();
/** * 处理ppt * @param path * @return */ public String readPowerPoint(InputStream in) { String content = null; try { HSLFSlideShow slideShow = new HSLFSlideShow(in); org.apache.poi.hslf.extractor.PowerPointExtractor extractor = new PowerPointExtractor(slideShow); this.m_documentSummary = extractor.getDocSummaryInformation(); this.m_summary = extractor.getSummaryInformation(); content = extractor.getText(); // SlideShow ss = new SlideShow(new HSLFSlideShow(in));// is // // 为文件的InputStream,建立SlideShow // Slide[] slides = ss.getSlides();// 获得每一张幻灯片 // for (int i = 0; i < slides.length; i++) { // TextRun[] t = slides[i].getTextRuns();// 为了取得幻灯片的文字内容,建立TextRun // for (int j = 0; j < t.length; j++) { // content.append(t[j].getText());// 这里会将文字内容加到content中去 // } // } } catch (Exception ex) { System.out.println(ex.toString()); } return content; }
return ppe.getText(true, true);
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new CrawlerSystemException("The inputstream is null."); } try { @SuppressWarnings("resource") final org.apache.poi.hslf.extractor.PowerPointExtractor powerPointExtractor = new org.apache.poi.hslf.extractor.PowerPointExtractor(in); return new ExtractData(powerPointExtractor.getText()); } catch (final IOException e) { throw new ExtractException(e); } }
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hslf.extractor.PowerPointExtractor(in) .getText()); } catch (final IOException e) { throw new ExtractException(e); } }
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hslf.extractor.PowerPointExtractor(in) .getText()); } catch (final IOException e) { throw new ExtractException(e); } }
@Override public ExtractData getText(final InputStream in, final Map<String, String> params) { if (in == null) { throw new RobotSystemException("The inputstream is null."); } try { return new ExtractData( new org.apache.poi.hslf.extractor.PowerPointExtractor(in) .getText()); } catch (final IOException e) { throw new ExtractException(e); } }