mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 07:56:16 +00:00
Extract inline images from PDFs
This commit is contained in:
parent
2d239849b1
commit
89fcf04cb7
@ -34,6 +34,7 @@ import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.parser.pdf.PDFParserConfig;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
@ -79,6 +80,12 @@ final class PDFAttachmentExtractor {
|
||||
ParseContext parseContext = new ParseContext();
|
||||
parseContext.set(Parser.class, parser);
|
||||
|
||||
PDFParserConfig pdfConfig = new PDFParserConfig();
|
||||
pdfConfig.setExtractInlineImages(true);
|
||||
pdfConfig.setExtractUniqueInlineImagesOnly(true);
|
||||
|
||||
parseContext.set(PDFParserConfig.class, pdfConfig);
|
||||
|
||||
//Keep track of the attachment files as they are being extracted and written to disk.
|
||||
NewResourceWatcher watcher = new NewResourceWatcher();
|
||||
parseContext.set(EmbeddedDocumentExtractor.class, new EmbeddedAttachmentHandler(outputDir, parentID, watcher));
|
||||
|
Loading…
x
Reference in New Issue
Block a user