org.apache.poi.hwpf.HWPFDocument java code examples

Refine search

FileInputStream fis = new FileInputStream(file.getAbsolutePath());
HWPFDocument document = new HWPFDocument(fis);
extractor = new WordExtractor(document);
String[] fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)

public static void build(File tmpFile, Map<String, Object> contentMap, String exportFile) throws Exception {
  FileInputStream tempFileInputStream = new FileInputStream(tmpFile);
  HWPFDocument document = new HWPFDocument(tempFileInputStream);
  // 读取文本内容
  Range bodyRange = document.getRange();
  // 替换内容
  for (Map.Entry<String, Object> entry : contentMap.entrySet()) {
    bodyRange.replaceText("${" + entry.getKey() + "}", entry.getValue().toString());
  }
  // 导出到文件
  ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
  document.write(byteArrayOutputStream);
  write(exportFile, byteArrayOutputStream);
  document.close();
}

HWPFDocument document;
try {
  document = new HWPFDocument(root);
} catch (org.apache.poi.EncryptedDocumentException e) {
    throw new EncryptedDocumentException(e);
    new org.apache.poi.hwpf.extractor.WordExtractor(document);
PicturesTable pictureTable = document.getPicturesTable();
PicturesSource pictures = new PicturesSource(document);
HeaderStories headerFooter = null;
Range r = document.getRange();
ListManager listManager = new ListManager(document);
for (int i = 0; i < r.numParagraphs(); i++) {
  Paragraph p = r.getParagraph(i);
  i += handleParagraph(p, 0, r, document, FieldsDocumentPart.MAIN, pictures, pictureTable, listManager, xhtml);
  for (String paragraph : wordExtractor.getMainTextboxText()) {
    xhtml.element("p", paragraph);
for (String paragraph : wordExtractor.getFootnoteText()) {
  xhtml.element("p", paragraph);

private PicturesSource(HWPFDocument doc) {
  picturesTable = doc.getPicturesTable();
  all = picturesTable.getAllPictures();
  // Build the Offset-Picture lookup map
  lookup = new HashMap<Integer, Picture>();
  for (Picture p : all) {
    lookup.put(p.getStartOffset(), p);
  }
  // Work out which Pictures aren't referenced by
  //  a \u0001 in the main text
  // These are \u0008 escher floating ones, ones
  //  found outside the normal text, and who
  //  knows what else...
  nonU1based = new ArrayList<Picture>();
  nonU1based.addAll(all);
  Range r = doc.getRange();
  for (int i = 0; i < r.numCharacterRuns(); i++) {
    CharacterRun cr = r.getCharacterRun(i);
    if (picturesTable.hasPicture(cr)) {
      Picture p = getFor(cr);
      int at = nonU1based.indexOf(p);
      nonU1based.set(at, null);
    }
  }
}

  public static void main(String[] args) throws IOException {
   try (InputStream is = new FileInputStream(args[0]);
      OutputStream out = new FileOutputStream("test.xml")) {
    new Word2Forrest(new HWPFDocument(is), out);
   }
  }
}

in = new FileInputStream("wto.doc");
doc = new HWPFDocument(in);
Range range = doc.getRange();
japan.write(outJapan);
in.close();
outUs.close();
outJapan.close();

boolean isHidden = false;
 try {
   fs = new POIFSFileSystem(new FileInputStream(filesname));
   HWPFDocument doc = new HWPFDocument(fs);
   WordExtractor we = new WordExtractor(doc);
   String[] paragraphs = we.getParagraphText();
   System.out.println("Word Document has " + paragraphs.length
       + " paragraphs");
   Range range = doc.getRange();
   for (int k = 0; k < range.numParagraphs(); k++) {
     org.apache.poi.hwpf.usermodel.Paragraph paragraph = range
         .getParagraph(k);
     paragraph.text().trim();
     paragraph.text().replaceAll("\\cM?\r?\n", "");
     for (int j = 0; j < paragraph.numCharacterRuns(); j++) {
       org.apache.poi.hwpf.usermodel.CharacterRun cr = paragraph
           .getCharacterRun(j);
       if (cr.isVanished()) {
         // it is hidden
         System.out.println("text is hidden ");
         isHidden = true;
         break;
       }
     }

FileInputStream fis = new FileInputStream(file.getAbsolutePath());
HWPFDocument docs = new HWPFDocument(fis);
extractor = new WordExtractor(docs);
String[] fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)
    data+=fileData[i];
fis.close();
file = new File("file2.doc");
fis = new FileInputStream(file.getAbsolutePath());
docs = new HWPFDocument(fis);
extractor = new WordExtractor(docs);
fileData = extractor.getParagraphText();
for (int i = 0; i < fileData.length; i++)

  public static void main(String[] args) throws IOException {
    HWPFDocument doc = new HWPFDocument(new FileInputStream(args[0]));
    Range r = doc.getRange();

    System.out.println("Example you supplied:");
    System.out.println("---------------------");
    for (int x = 0; x < r.numSections(); x++) {
      Section s = r.getSection(x);
      for (int y = 0; y < s.numParagraphs(); y++) {
        Paragraph p = s.getParagraph(y);
        for (int z = 0; z < p.numCharacterRuns(); z++) {
          // character run
          CharacterRun run = p.getCharacterRun(z);
          // character run text
          String text = run.text();
          // show us the text
          System.out.print(text);
        }
        // use a new line at the paragraph break
        System.out.println();
      }
    }
    doc.close();
  }
}

 import java.io.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;

public class test {
  public static void main(String[] args) throws Exception {
    // POI apparently can't create a document from scratch,
    // so we need an existing empty dummy document
    HWPFDocument doc = new HWPFDocument(new FileInputStream("D:\\src.doc"));
    Range range = doc.getRange();
    CharacterRun run = range
        .insertAfter("Text After copied file contents!");
    run.setBold(true);
    OutputStream out = new FileOutputStream("D:\\result.doc");
    doc.write(out);
    out.flush();
    out.close();

  }
}

 import org.apache.poi.hwpf.HWPFDocument;
...
File fin = new File(yourFilePath);
FileInputStream fis = new FileInputStream(fin);
HWPFDocument doc = new HWPFDocument(fis);
String text = doc.getDocumentText();
System.out.println(text);
...

FileInputStream is = new FileInputStream(args[0]);
HSLFSlideShow ppt = new HSLFSlideShow(is);
is.close();
        HWPFDocument doc = new HWPFDocument(data.getInputStream());
        Range r = doc.getRange();
        for(int k = 0; k < r.numParagraphs(); k++) {
          Paragraph p = r.getParagraph(k);
          System.out.println(p.text());
        doc.write(out);
        out.close();
        doc.close();
       }  else {
        FileOutputStream out = new FileOutputStream(ole.getProgId() + "-"+(oleIdx+1)+".dat");
         out.write(chunk,0,count);
        is.close();
        out.close();

FileInputStream fis = new FileInputStream(file.getAbsolutePath());
 HWPFDocument document = new HWPFDocument(fis);
 WordExtractor extractor = new WordExtractor(document);
 String rawText = extractor.getText();
 String displayText = extractor.stripFields(rawText);

File file = null;
 FileInputStream fis = null;
 HWPFDocument document = null;
 Range commentRange = null;
 try {
   file = new File(fileName);
   fis = new FileInputStream(file);
   document = new HWPFDocument(fis);
   commentRange = document.getCommentsRange();
   int numComments = commentRange.numParagraphs();
   for (int i = 0; i < numComments; i++) {
     String comments = commentRange.getParagraph(i).text();
     comments = comments.replaceAll("\\cM?\r?\n", "").trim();
     if (!comments.equals("")) {
       System.out.println("comment :-  " + comments);
     }
   }
 } catch (Exception e) {
   e.printStackTrace();
 }

//you can use the org.apache.poi.hwpf.extractor.WordExtractor to get the text
 String fileName = "example.doc";
 HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(fileName));
 WordExtractor extractor = new WordExtractor(wordDoc);
 String[] text = extractor.getParagraphText();
 int lineCounter = text.length;
 String articleStr = ""; // This string object use to store text from the word document.
 for(int index = 0;index < lineCounter;++ index){
   String paragraphStr = text[index].replaceAll("\r\n","").replaceAll("\n","").trim();
   int paragraphLength = paragraphStr.length();
   if(paragraphLength != 0){
     articleStr.concat(paragraphStr);
   }
 }
 //you can use the org.apache.poi.hwpf.usermodel.Picture to get the image
 List<Picture> picturesList = wordDoc.getPicturesTable().getAllPictures();
 for(int i = 0;i < picturesList.size();++i){
   BufferedImage image = null;
   Picture pic = picturesList.get(i);
   image = ImageIO.read(new ByteArrayInputStream(pic.getContent()));
   if(image != null){
     System.out.println("Image["+i+"]"+" ImageWidth:"+image.getWidth()+" ImageHeight:"+image.getHeight()+" Suggest Image Format:"+pic.suggestFileExtension());
   }
 }

fis = new FileInputStream(new File(FilePath));
XWPFDocument doc = new XWPFDocument(fis);
XWPFWordExtractor extract = new XWPFWordExtractor(doc);
  fis = new FileInputStream(new File(FilePath));
  HWPFDocument doc = new HWPFDocument(fis);
  WordExtractor extractor = new WordExtractor(doc);
  System.out.println(extractor.getText());
} catch (IOException e) {
  e.printStackTrace();

@Override
public byte[] handle(File originFile, String watermark, String color) throws IOException {
  watermark = StringUtils.isBlank(watermark)?DEFAULT_WATERMARK:watermark;
  color = StringUtils.isBlank(color)?DEFAULT_FONT_COLOR:color;
  if (originFile.getName().toLowerCase().endsWith("docx")) {
    try (InputStream in = new FileInputStream(originFile)){
      XWPFDocument doc = new XWPFDocument(in);
      addWaterMark(doc, watermark, color);
      try (OutputStream out = new FileOutputStream(originFile)){
        doc.write(out);
        doc.close();
      }
    }
    return IOUtils.toByteArray(new FileInputStream(originFile));
  } else if (originFile.getName().toLowerCase().endsWith("doc")) {
    try (InputStream in = new FileInputStream(originFile)){
      HWPFDocument doc = new HWPFDocument(in);
      addWaterMark(doc, watermark, color);
      try (OutputStream out = new FileOutputStream(originFile)){
        doc.write(out);
        doc.close();
      }
    }
    return IOUtils.toByteArray(new FileInputStream(originFile));
  }
  return null;
}

 String lowerFilePath = filePath.toLowerCase();
if (lowerFilePath.endsWith(".xls")) {
      HSSFWorkbook workbook = new HSSFWorkbook(new FileInputStream(lowerFilePath));
      Integer sheetNums = workbook.getNumberOfSheets();
      if (sheetNums > 0) {
        return workbook.getSheetAt(0).getRowBreaks().length + 1;
      }
    } else if (lowerFilePath.endsWith(".xlsx")) {
      XSSFWorkbook xwb = new XSSFWorkbook(lowerFilePath);
      Integer sheetNums = xwb.getNumberOfSheets();
      if (sheetNums > 0) {
        return xwb.getSheetAt(0).getRowBreaks().length + 1;
      }
    } else if (lowerFilePath.endsWith(".docx")) {
      XWPFDocument docx = new XWPFDocument(POIXMLDocument.openPackage(lowerFilePath));
      return docx.getProperties().getExtendedProperties().getUnderlyingProperties().getPages();
    } else if (lowerFilePath.endsWith(".doc")) {
      HWPFDocument wordDoc = new HWPFDocument(new FileInputStream(lowerFilePath));
      return wordDoc.getSummaryInformation().getPageCount();
    } else if (lowerFilePath.endsWith(".ppt")) {
      HSLFSlideShow document = new HSLFSlideShow(new FileInputStream(lowerFilePath));
      SlideShow slideShow = new SlideShow(document);
      return slideShow.getSlides().length;
    } else if (lowerFilePath.endsWith(".pptx")) {
      XSLFSlideShow xdocument = new XSLFSlideShow(lowerFilePath);
      XMLSlideShow xslideShow = new XMLSlideShow(xdocument);
      return xslideShow.getSlides().length;
}

InputStream fis = new FileInputStream(fileName);  
 POIFSFileSystem fs = new POIFSFileSystem(fis);  
 HWPFDocument doc = new HWPFDocument(fs);  
 Range range = doc.getRange();
 TableIterator itr = new TableIterator(range);
 while(itr.hasNext()){
   Table table = itr.next();
   for(int rowIndex = 0; rowIndex < table.numRows(); rowIndex++){
     TableRow row = table.getRow(rowIndex);
     for(int colIndex = 0; colIndex < row.numCells(); colIndex++){
       TableCell cell = row.getCell(colIndex);
       System.out.println(cell.getParagraph(0).text());
     }
   }
 }

POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename));
HWPFDocument doc = new HWPFDocument(fs);
ListTables listtables = doc.getListTables();
Paragraph para;
Range range = doc.getRange();
for(int x=0; x<range.numParagraphs(); x++) {
  para = range.getParagraph(x);

Javadoc

This class acts as the bucket that we throw all of the Word data structures into.

Most used methods

<init>
This constructor loads a Word document from a POIFSFileSystem
getRange
getStyleSheet
write
getPicturesTable
close
getFields
Returns user-friendly interface to access document Fields
getSummaryInformation
getCommentsRange
Returns the Range which covers all annotations.
getDocumentText
getListTables
getOfficeDrawingsMain

Popular in Java

Reactive rest calls using spring rest template
getApplicationContext (Context)
getSharedPreferences (Context)
compareTo (BigDecimal)
Deque (java.util)
A linear collection that supports element insertion and removal at both ends. The name deque is shor
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
Github Copilot alternatives

How to useHWPFDocument in org.apache.poi.hwpf

Best Java code snippets using org.apache.poi.hwpf.HWPFDocument (Showing top 20 results out of 315)

Refine search

How to use
HWPFDocument
in
org.apache.poi.hwpf