feiyu02
2024-08-13 b8cc591541b88dd2bb93f111f8e8075842dce7ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
package cn.flightfeather.supervision.docx4j.simpleDemo;
 
import org.apache.commons.io.IOUtils;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.WordprocessingML.OleObjectBinaryPart;
import org.docx4j.org.apache.poi.poifs.filesystem.DocumentInputStream;
 
import java.io.File;
import java.util.Map;
 
public class OLEExtractEmbeddedPDF {
 
    public static void main(String[] args) throws Exception {
        
        WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new File(System.getProperty("user.dir")
                + "/yourdocx.docx"));
 
        for (Map.Entry<PartName,Part> entry : wordMLPackage.getParts().getParts().entrySet()) {
            
            Part p = entry.getValue();
            if (p instanceof OleObjectBinaryPart) {
 
                OleObjectBinaryPart olePart = (OleObjectBinaryPart)p;
                DocumentInputStream inputStream = olePart.getFs().createDocumentInputStream("CONTENTS");
                byte[] pdfBytes = IOUtils.toByteArray(inputStream);
                
                // Do something with it here...
                System.out.println("Found " + p.getPartName().getName() );
            }            
            
        }
    }
    
    
}