Refine search
FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument document = new HWPFDocument(fis); extractor = new WordExtractor(document); String[] fileData = extractor.getParagraphText(); for (int i = 0; i < fileData.length; i++)
public static void build(File tmpFile, Map<String, Object> contentMap, String exportFile) throws Exception { FileInputStream tempFileInputStream = new FileInputStream(tmpFile); HWPFDocument document = new HWPFDocument(tempFileInputStream); // 读取文本内容 Range bodyRange = document.getRange(); // 替换内容 for (Map.Entry<String, Object> entry : contentMap.entrySet()) { bodyRange.replaceText("${" + entry.getKey() + "}", entry.getValue().toString()); } // 导出到文件 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); document.write(byteArrayOutputStream); write(exportFile, byteArrayOutputStream); document.close(); }
HWPFDocument document; try { document = new HWPFDocument(root); } catch (org.apache.poi.EncryptedDocumentException e) { throw new EncryptedDocumentException(e); new org.apache.poi.hwpf.extractor.WordExtractor(document); PicturesTable pictureTable = document.getPicturesTable(); PicturesSource pictures = new PicturesSource(document); HeaderStories headerFooter = null; Range r = document.getRange(); ListManager listManager = new ListManager(document); for (int i = 0; i < r.numParagraphs(); i++) { Paragraph p = r.getParagraph(i); i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, listManager, xhtml); for (String paragraph : wordExtractor.getMainTextboxText()) { xhtml.element("p", paragraph); for (String paragraph : wordExtractor.getFootnoteText()) { xhtml.element("p", paragraph);
private PicturesSource(HWPFDocument doc) { picturesTable = doc.getPicturesTable(); all = picturesTable.getAllPictures(); // Build the Offset-Picture lookup map lookup = new HashMap<Integer, Picture>(); for (Picture p : all) { lookup.put(p.getStartOffset(), p); } // Work out which Pictures aren't referenced by // a \u0001 in the main text // These are \u0008 escher floating ones, ones // found outside the normal text, and who // knows what else... nonU1based = new ArrayList<Picture>(); nonU1based.addAll(all); Range r = doc.getRange(); for (int i = 0; i < r.numCharacterRuns(); i++) { CharacterRun cr = r.getCharacterRun(i); if (picturesTable.hasPicture(cr)) { Picture p = getFor(cr); int at = nonU1based.indexOf(p); nonU1based.set(at, null); } } }
public static void main(String[] args) throws IOException { try (InputStream is = new FileInputStream(args[0]); OutputStream out = new FileOutputStream("test.xml")) { new Word2Forrest(new HWPFDocument(is), out); } } }
in = new FileInputStream("wto.doc"); doc = new HWPFDocument(in); Range range = doc.getRange(); japan.write(outJapan); in.close(); outUs.close(); outJapan.close();
boolean isHidden = false; try { fs = new POIFSFileSystem(new FileInputStream(filesname)); HWPFDocument doc = new HWPFDocument(fs); WordExtractor we = new WordExtractor(doc); String[] paragraphs = we.getParagraphText(); System.out.println("Word Document has " + paragraphs.length + " paragraphs"); Range range = doc.getRange(); for (int k = 0; k < range.numParagraphs(); k++) { org.apache.poi.hwpf.usermodel.Paragraph paragraph = range .getParagraph(k); paragraph.text().trim(); paragraph.text().replaceAll("\\cM?\r?\n", ""); for (int j = 0; j < paragraph.numCharacterRuns(); j++) { org.apache.poi.hwpf.usermodel.CharacterRun cr = paragraph .getCharacterRun(j); if (cr.isVanished()) { // it is hidden System.out.println("text is hidden "); isHidden = true; break; } }
FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument docs = new HWPFDocument(fis); extractor = new WordExtractor(docs); String[] fileData = extractor.getParagraphText(); for (int i = 0; i < fileData.length; i++) data+=fileData[i]; fis.close(); file = new File("file2.doc"); fis = new FileInputStream(file.getAbsolutePath()); docs = new HWPFDocument(fis); extractor = new WordExtractor(docs); fileData = extractor.getParagraphText(); for (int i = 0; i < fileData.length; i++)
public static void main(String[] args) throws IOException { HWPFDocument doc = new HWPFDocument(new FileInputStream(args[0])); Range r = doc.getRange(); System.out.println("Example you supplied:"); System.out.println("---------------------"); for (int x = 0; x < r.numSections(); x++) { Section s = r.getSection(x); for (int y = 0; y < s.numParagraphs(); y++) { Paragraph p = s.getParagraph(y); for (int z = 0; z < p.numCharacterRuns(); z++) { // character run CharacterRun run = p.getCharacterRun(z); // character run text String text = run.text(); // show us the text System.out.print(text); } // use a new line at the paragraph break System.out.println(); } } doc.close(); } }
import java.io.*; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.*; public class test { public static void main(String[] args) throws Exception { // POI apparently can't create a document from scratch, // so we need an existing empty dummy document HWPFDocument doc = new HWPFDocument(new FileInputStream("D:\\src.doc")); Range range = doc.getRange(); CharacterRun run = range .insertAfter("Text After copied file contents!"); run.setBold(true); OutputStream out = new FileOutputStream("D:\\result.doc"); doc.write(out); out.flush(); out.close(); } }
import org.apache.poi.hwpf.HWPFDocument; ... File fin = new File(yourFilePath); FileInputStream fis = new FileInputStream(fin); HWPFDocument doc = new HWPFDocument(fis); String text = doc.getDocumentText(); System.out.println(text); ...
FileInputStream is = new FileInputStream(args[0]); HSLFSlideShow ppt = new HSLFSlideShow(is); is.close(); HWPFDocument doc = new HWPFDocument(data.getInputStream()); Range r = doc.getRange(); for(int k = 0; k < r.numParagraphs(); k++) { Paragraph p = r.getParagraph(k); System.out.println(p.text()); doc.write(out); out.close(); doc.close(); } else { FileOutputStream out = new FileOutputStream(ole.getProgId() + "-"+(oleIdx+1)+".dat"); out.write(chunk,0,count); is.close(); out.close();
FileInputStream fis = new FileInputStream(file.getAbsolutePath()); HWPFDocument document = new HWPFDocument(fis); WordExtractor extractor = new WordExtractor(document); String rawText = extractor.getText(); String displayText = extractor.stripFields(rawText);
File file = null; FileInputStream fis = null; HWPFDocument document = null; Range commentRange = null; try { file = new File(fileName); fis = new FileInputStream(file); document = new HWPFDocument(fis); commentRange = document.getCommentsRange(); int numComments = commentRange.numParagraphs(); for (int i = 0; i < numComments; i++) { String comments = commentRange.getParagraph(i).text(); comments = comments.replaceAll("\\cM?\r?\n", "").trim(); if (!comments.equals("")) { System.out.println("comment :- " + comments); } } } catch (Exception e) { e.printStackTrace(); }
//you can use the org.apache.poi.hwpf.extractor.WordExtractor to get the text String fileName = "example.doc"; HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName)); WordExtractor extractor = new WordExtractor(wordDoc); String[] text = extractor.getParagraphText(); int lineCounter = text.length; String articleStr = ""; // This string object use to store text from the word document. for(int index = 0;index < lineCounter;++ index){ String paragraphStr = text[index].replaceAll("\r\n","").replaceAll("\n","").trim(); int paragraphLength = paragraphStr.length(); if(paragraphLength != 0){ articleStr.concat(paragraphStr); } } //you can use the org.apache.poi.hwpf.usermodel.Picture to get the image List<Picture> picturesList = wordDoc.getPicturesTable().getAllPictures(); for(int i = 0;i < picturesList.size();++i){ BufferedImage image = null; Picture pic = picturesList.get(i); image = ImageIO.read(new ByteArrayInputStream(pic.getContent())); if(image != null){ System.out.println("Image["+i+"]"+" ImageWidth:"+image.getWidth()+" ImageHeight:"+image.getHeight()+" Suggest Image Format:"+pic.suggestFileExtension()); } }
fis = new FileInputStream(new File(FilePath)); XWPFDocument doc = new XWPFDocument(fis); XWPFWordExtractor extract = new XWPFWordExtractor(doc); fis = new FileInputStream(new File(FilePath)); HWPFDocument doc = new HWPFDocument(fis); WordExtractor extractor = new WordExtractor(doc); System.out.println(extractor.getText()); } catch (IOException e) { e.printStackTrace();
@Override public byte[] handle(File originFile, String watermark, String color) throws IOException { watermark = StringUtils.isBlank(watermark)?DEFAULT_WATERMARK:watermark; color = StringUtils.isBlank(color)?DEFAULT_FONT_COLOR:color; if (originFile.getName().toLowerCase().endsWith("docx")) { try (InputStream in = new FileInputStream(originFile)){ XWPFDocument doc = new XWPFDocument(in); addWaterMark(doc, watermark, color); try (OutputStream out = new FileOutputStream(originFile)){ doc.write(out); doc.close(); } } return IOUtils.toByteArray(new FileInputStream(originFile)); } else if (originFile.getName().toLowerCase().endsWith("doc")) { try (InputStream in = new FileInputStream(originFile)){ HWPFDocument doc = new HWPFDocument(in); addWaterMark(doc, watermark, color); try (OutputStream out = new FileOutputStream(originFile)){ doc.write(out); doc.close(); } } return IOUtils.toByteArray(new FileInputStream(originFile)); } return null; }
String lowerFilePath = filePath.toLowerCase(); if (lowerFilePath.endsWith(".xls")) { HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(lowerFilePath)); Integer sheetNums = workbook.getNumberOfSheets(); if (sheetNums > 0) { return workbook.getSheetAt(0).getRowBreaks().length + 1; } } else if (lowerFilePath.endsWith(".xlsx")) { XSSFWorkbook xwb = new XSSFWorkbook(lowerFilePath); Integer sheetNums = xwb.getNumberOfSheets(); if (sheetNums > 0) { return xwb.getSheetAt(0).getRowBreaks().length + 1; } } else if (lowerFilePath.endsWith(".docx")) { XWPFDocument docx = new XWPFDocument(POIXMLDocument.openPackage(lowerFilePath)); return docx.getProperties().getExtendedProperties().getUnderlyingProperties().getPages(); } else if (lowerFilePath.endsWith(".doc")) { HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(lowerFilePath)); return wordDoc.getSummaryInformation().getPageCount(); } else if (lowerFilePath.endsWith(".ppt")) { HSLFSlideShow document = new HSLFSlideShow(new FileInputStream(lowerFilePath)); SlideShow slideShow = new SlideShow(document); return slideShow.getSlides().length; } else if (lowerFilePath.endsWith(".pptx")) { XSLFSlideShow xdocument = new XSLFSlideShow(lowerFilePath); XMLSlideShow xslideShow = new XMLSlideShow(xdocument); return xslideShow.getSlides().length; }
InputStream fis = new FileInputStream(fileName); POIFSFileSystem fs = new POIFSFileSystem(fis); HWPFDocument doc = new HWPFDocument(fs); Range range = doc.getRange(); TableIterator itr = new TableIterator(range); while(itr.hasNext()){ Table table = itr.next(); for(int rowIndex = 0; rowIndex < table.numRows(); rowIndex++){ TableRow row = table.getRow(rowIndex); for(int colIndex = 0; colIndex < row.numCells(); colIndex++){ TableCell cell = row.getCell(colIndex); System.out.println(cell.getParagraph(0).text()); } } }
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename)); HWPFDocument doc = new HWPFDocument(fs); ListTables listtables = doc.getListTables(); Paragraph para; Range range = doc.getRange(); for(int x=0; x<range.numParagraphs(); x++) { para = range.getParagraph(x);