mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 07:56:16 +00:00
Extract inline images from PDFs
This commit is contained in:
parent
2d239849b1
commit
89fcf04cb7
@ -34,6 +34,7 @@ import org.apache.tika.metadata.Metadata;
|
|||||||
import org.apache.tika.parser.AutoDetectParser;
|
import org.apache.tika.parser.AutoDetectParser;
|
||||||
import org.apache.tika.parser.ParseContext;
|
import org.apache.tika.parser.ParseContext;
|
||||||
import org.apache.tika.parser.Parser;
|
import org.apache.tika.parser.Parser;
|
||||||
|
import org.apache.tika.parser.pdf.PDFParserConfig;
|
||||||
import org.apache.tika.sax.BodyContentHandler;
|
import org.apache.tika.sax.BodyContentHandler;
|
||||||
import org.xml.sax.ContentHandler;
|
import org.xml.sax.ContentHandler;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
@ -79,6 +80,12 @@ final class PDFAttachmentExtractor {
|
|||||||
ParseContext parseContext = new ParseContext();
|
ParseContext parseContext = new ParseContext();
|
||||||
parseContext.set(Parser.class, parser);
|
parseContext.set(Parser.class, parser);
|
||||||
|
|
||||||
|
PDFParserConfig pdfConfig = new PDFParserConfig();
|
||||||
|
pdfConfig.setExtractInlineImages(true);
|
||||||
|
pdfConfig.setExtractUniqueInlineImagesOnly(true);
|
||||||
|
|
||||||
|
parseContext.set(PDFParserConfig.class, pdfConfig);
|
||||||
|
|
||||||
//Keep track of the attachment files as they are being extracted and written to disk.
|
//Keep track of the attachment files as they are being extracted and written to disk.
|
||||||
NewResourceWatcher watcher = new NewResourceWatcher();
|
NewResourceWatcher watcher = new NewResourceWatcher();
|
||||||
parseContext.set(EmbeddedDocumentExtractor.class, new EmbeddedAttachmentHandler(outputDir, parentID, watcher));
|
parseContext.set(EmbeddedDocumentExtractor.class, new EmbeddedAttachmentHandler(outputDir, parentID, watcher));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user