private void extractTableText(XHTMLContentHandler xhtml, HSLFTable shape) throws SAXException { xhtml.startElement("table"); for (int row = 0; row < shape.getNumberOfRows(); row++) { xhtml.startElement("tr"); for (int col = 0; col < shape.getNumberOfColumns(); col++) { HSLFTableCell cell = shape.getCell(row, col); //insert empty string for empty cell if cell is null String txt = ""; if (cell != null) { txt = cell.getText(); } xhtml.element("td", txt); } xhtml.endElement("tr"); } xhtml.endElement("table"); }
private void handleSlideEmbeddedPictures(HSLFSlideShow slideshow, XHTMLContentHandler xhtml) throws TikaException, SAXException, IOException { for (HSLFPictureData pic : slideshow.getPictureData()) { String mediaType; switch (pic.getType()) { case EMF: mediaType = "image/emf"; break; default: mediaType = pic.getContentType(); break; data = pic.getData(); } catch (Exception e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);
private void extractGroupText(XHTMLContentHandler xhtml, ShapeContainer shapeContainer, int depth) throws SAXException { List<HSLFShape> shapes = getShapes(shapeContainer); if (shapes == null) { return; } //Only process items with depth > 0 because they should have been included //already in slide.getTextParagraphs above. //However, cells are considered grouped within the table, so ignore them. //I don't believe that cells can be inside a text box or other //grouped text containing object, so always ignore them. //I also don't believe that a table can be grouped with a table. //If these beliefs are wrong...must fix! List<List<HSLFTextParagraph>> paragraphList = new ArrayList<>(); for (HSLFShape shape : shapes) { if (shape instanceof HSLFGroupShape) { //work recursively, HSLFGroupShape can contain HSLFGroupShape extractGroupText(xhtml, ((HSLFGroupShape)shape), depth+1); } else if (shape instanceof HSLFTextShape && ! (shape instanceof HSLFTableCell) && depth > 0) { paragraphList.add(((HSLFTextShape)shape).getTextParagraphs()); } } textRunsToText(xhtml, paragraphList); }
ss = new HSLFSlideShow(root); } catch (EncryptedPowerPointFileException e) { throw new EncryptedDocumentException(e); _slides = ss.getSlides(); HeadersFooters slideHeaderFooters = (officeParserConfig.getIncludeHeadersAndFooters()) ? slide.getHeadersFooters() : null; ss.getNotesHeadersFooters() : null; extractMaster(xhtml, slide.getMasterSheet()); textRunsToText(xhtml, slide.getTextParagraphs());
boolean nextBullet = htp.isBullet(); List<HSLFTextRun> textRuns = htp.getTextRuns(); String firstLine = removePBreak(textRuns.get(0).getRawText()); boolean showBullet = (isBullet && (textRuns.size() > 1 || !"".equals(firstLine))); String paraTag = showBullet ? "li" : "p"; boolean runIsHyperLink = false; for (HSLFTextRun htr : textRuns) { Hyperlink link = htr.getHyperlink(); if (link != null) { String address = link.getAddress(); String line = htr.getRawText(); if (line != null) { boolean isfirst = true;
HSLFObjectData data = null; try { data = oleShape.getObjectData(); } catch (NullPointerException e) { String objID = Integer.toString(oleShape.getObjectID()); InputStream dataStream = null; try { dataStream = data.getInputStream(); } catch (Exception e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata); if ("Excel.Chart.8".equals(oleShape.getProgId())) { mediaType = "application/vnd.ms-excel"; } else {
private void extractMacros(HSLFSlideShow ppt, XHTMLContentHandler xhtml) { //get macro persist id DocInfoListContainer list = (DocInfoListContainer)ppt.getDocumentRecord().findFirstOfType(RecordTypes.List.typeID); if (list == null) { return; } VBAInfoContainer vbaInfo = (VBAInfoContainer)list.findFirstOfType(RecordTypes.VBAInfo.typeID); if (vbaInfo == null) { return; } VBAInfoAtom vbaAtom = (VBAInfoAtom)vbaInfo.findFirstOfType(RecordTypes.VBAInfoAtom.typeID); if (vbaAtom == null) { return; } long persistId = vbaAtom.getPersistIdRef(); for (HSLFObjectData objData : ppt.getEmbeddedObjects()) { if (objData.getExOleObjStg().getPersistId() == persistId) { try (POIFSFileSystem poifsFileSystem = new POIFSFileSystem(objData.getInputStream())) { try { OfficeParser.extractMacros(poifsFileSystem, xhtml, EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context)); } catch (IOException|SAXException inner) { EmbeddedDocumentUtil.recordException(inner, parentMetadata); } } catch (IOException e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata);//swallow } } } }
HSLFNotes notes = slide.getNotes(); if (notes == null) { return; textRunsToText(xhtml, notes.getTextParagraphs());
/** * Color scheme for this slide. */ @Override public ColorSchemeAtom getColorScheme() { if (getFollowMasterScheme()) { final HSLFMasterSheet ms = getMasterSheet(); return (ms == null) ? null : ms.getColorScheme(); } return super.getColorScheme(); }
/** * Background for this slide. */ @Override public HSLFBackground getBackground() { if (getFollowMasterBackground()) { final HSLFMasterSheet ms = getMasterSheet(); return (ms == null) ? null : ms.getBackground(); } return super.getBackground(); }
@Override public void setSheet(HSLFSheet sheet) { super.setSheet(sheet); // Initialize _txtrun object. // (We can't do it in the constructor because the sheet // is not assigned then, it's only built once we have // all the records) List<HSLFTextParagraph> ltp = getTextParagraphs(); HSLFTextParagraph.supplySheet(ltp, sheet); }
/** * Sets the value of the given Paragraph TextProp, add if required * @param propName The name of the Paragraph TextProp * @param val The value to set for the TextProp */ public void setCharTextPropVal(String propName, Integer val) { getTextParagraph().setPropVal(characterStyle, propName, val); }
@Override protected EscherContainerRecord createSpContainer(boolean isChild){ EscherContainerRecord ecr = super.createSpContainer(isChild); AbstractEscherOptRecord opt = getEscherOptRecord(); setEscherProperty(opt, EscherProperties.TEXT__TEXTID, 0); setEscherProperty(opt, EscherProperties.TEXT__SIZE_TEXT_TO_FIT_SHAPE, 0x20000); setEscherProperty(opt, EscherProperties.FILL__NOFILLHITTEST, 0x150001); setEscherProperty(opt, EscherProperties.SHADOWSTYLE__SHADOWOBSURED, 0x20000); setEscherProperty(opt, EscherProperties.PROTECTION__LOCKAGAINSTGROUPING, 0x40000); return ecr; }
private void extractMaster(XHTMLContentHandler xhtml, HSLFMasterSheet master) throws SAXException { if (master == null) { return; } List<HSLFShape> shapes = getShapes(master); if (shapes == null || shapes.isEmpty()) { return; } xhtml.startElement("div", "class", "slide-master-content"); for (HSLFShape shape : shapes) { if (shape != null && ! isPlaceholder(shape)) { if (shape instanceof HSLFTextShape) { HSLFTextShape tsh = (HSLFTextShape) shape; String text = tsh.getText(); if (text != null) { xhtml.element("p", text); } } } } xhtml.endElement("div"); }
private void handleComments(HSLFSlide slide, XHTMLContentHandler xhtml) throws SAXException { if (slide.getComments() == null || slide.getComments().size() == 0) { return; for (Comment comment : slide.getComments()) { authorStringBuilder.setLength(0); xhtml.startElement("p", "class", "slide-comment");
@Override public HSLFHyperlink createHyperlink() { if (link == null) { link = HSLFHyperlink.createHyperlink(this); parentParagraph.setDirty(); } return link; }
/** * Create a new <code>Picture</code> * * @param data the picture data * @param parent the parent shape */ public HSLFPictureShape(HSLFPictureData data, ShapeContainer<HSLFShape,HSLFTextParagraph> parent) { super(null, parent); createSpContainer(data.getIndex(), parent instanceof HSLFGroupShape); }
/** * Create a new Placeholder and initialize internal structures * * @return the created <code>EscherContainerRecord</code> which holds shape data */ @Override protected EscherContainerRecord createSpContainer(boolean isChild){ EscherContainerRecord ecr = super.createSpContainer(isChild); setPlaceholder(Placeholder.BODY); return ecr; } }
@Override public String getFontFamily(FontGroup fontGroup) { HSLFFontInfo fi = getFontInfo(fontGroup); return (fi != null) ? fi.getTypeface() : null; }
@Override protected void moveAndScale(Rectangle2D anchorDest){ super.moveAndScale(anchorDest); updateRowHeightsProperty(); }