From 8a846b493752aa50a5efc2a98d0ab8ae629efdcc Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Tue, 30 Apr 2019 12:14:47 -0400 Subject: [PATCH 1/5] Added support for extracting PDF attachments in EFE module and disabled embedded content extraction for Tika so that we do not duplicate solr text for documents supported by EFE --- ... => DocumentEmbeddedContentExtractor.java} | 49 ++++- .../EmbeddedFileExtractorIngestModule.java | 10 +- .../PDFAttachmentExtractor.java | 179 ++++++++++++++++++ .../textextractors/TikaTextExtractor.java | 6 +- 4 files changed, 231 insertions(+), 13 deletions(-) rename Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/{MSOfficeEmbeddedContentExtractor.java => DocumentEmbeddedContentExtractor.java} (93%) create mode 100755 Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java similarity index 93% rename from Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java rename to Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java index d9c142563b..c19ef48b2f 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java @@ -22,8 +22,10 @@ import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -33,13 +35,11 @@ import org.apache.commons.io.IOUtils; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hslf.usermodel.HSLFPictureData; import org.apache.poi.hslf.usermodel.HSLFSlideShow; -import org.apache.poi.hssf.record.RecordInputStream.LeftoverDataException; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.model.PicturesTable; import org.apache.poi.sl.usermodel.PictureData.PictureType; import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.util.RecordFormatException; import org.apache.tika.config.TikaConfig; import org.apache.tika.detect.Detector; import org.apache.tika.exception.TikaException; @@ -72,13 +72,13 @@ import org.xml.sax.SAXException; /** * Extracts embedded content (e.g. images, audio, video) from Microsoft Office - * documents (both original and OOXML forms). + * documents (both original and OOXML forms) and PDF documents. */ -class MSOfficeEmbeddedContentExtractor { +class DocumentEmbeddedContentExtractor { private final FileManager fileManager; private final IngestServices services; - private static final Logger LOGGER = Logger.getLogger(MSOfficeEmbeddedContentExtractor.class.getName()); + private static final Logger LOGGER = Logger.getLogger(EmbeddedDocumentExtractor.class.getName()); private final IngestJobContext context; private String parentFileName; private final String UNKNOWN_IMAGE_NAME_PREFIX = "image_"; //NON-NLS @@ -101,7 +101,8 @@ class MSOfficeEmbeddedContentExtractor { PPT("application/vnd.ms-powerpoint"), //NON-NLS PPTX("application/vnd.openxmlformats-officedocument.presentationml.presentation"), //NON-NLS XLS("application/vnd.ms-excel"), //NON-NLS - XLSX("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); //NON-NLS + XLSX("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), //NON-NLS + PDF("application/pdf"); //NON-NLS private final String mimeType; @@ -116,7 +117,7 @@ class MSOfficeEmbeddedContentExtractor { } private SupportedExtractionFormats abstractFileExtractionFormat; - MSOfficeEmbeddedContentExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) throws NoCurrentCaseException { + DocumentEmbeddedContentExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) throws NoCurrentCaseException { this.fileManager = Case.getCurrentCaseThrows().getServices().getFileManager(); this.services = IngestServices.getInstance(); @@ -190,6 +191,9 @@ class MSOfficeEmbeddedContentExtractor { case XLS: listOfExtractedImages = extractImagesFromXls(abstractFile); break; + case PDF: + listOfExtractedImages = extractEmbeddedContentFromPDF(abstractFile); + break; default: break; } @@ -470,6 +474,37 @@ class MSOfficeEmbeddedContentExtractor { return listOfExtractedImages; } + + /** + * + * @param abstractFile + * @return + */ + private List extractEmbeddedContentFromPDF(AbstractFile abstractFile) { + PDFAttachmentExtractor pdfExtractor = new PDFAttachmentExtractor(parser); + try { + Path outputDirectory = Paths.get(getOutputFolderPath(parentFileName)); + //Get map of attachment name -> location disk. + Map extractedAttachments = pdfExtractor.extract( + new ReadContentInputStream(abstractFile), abstractFile.getId(), + outputDirectory); + + //Convert output to hook into the existing logic for creating derived files + List extractedFiles = new ArrayList<>(); + extractedAttachments.entrySet().forEach((pathEntry) -> { + String fileName = pathEntry.getKey(); + Path writeLocation = pathEntry.getValue(); + extractedFiles.add(new ExtractedFile(fileName, + getFileRelativePath(writeLocation.getFileName().toString()), + writeLocation.toFile().length())); + }); + + return extractedFiles; + } catch (IOException | SAXException | TikaException ex) { + LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs", ex); //NON-NLS + } + return Collections.emptyList(); + } /** * Writes image to the module output location. diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java index 66c7f7030d..fd833b59a7 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java @@ -50,7 +50,7 @@ public final class EmbeddedFileExtractorIngestModule extends FileIngestModuleAda //Outer concurrent hashmap with keys of JobID, inner concurrentHashmap with keys of objectID private static final ConcurrentHashMap> mapOfDepthTrees = new ConcurrentHashMap<>(); private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter(); - private MSOfficeEmbeddedContentExtractor officeExtractor; + private DocumentEmbeddedContentExtractor documentExtractor; private SevenZipExtractor archiveExtractor; private FileTypeDetector fileTypeDetector; private long jobId; @@ -115,10 +115,10 @@ public final class EmbeddedFileExtractorIngestModule extends FileIngestModuleAda } /* * Construct an embedded content extractor for processing Microsoft - * Office documents. + * Office documents and PDF documents. */ try { - this.officeExtractor = new MSOfficeEmbeddedContentExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute); + this.documentExtractor = new DocumentEmbeddedContentExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute); } catch (NoCurrentCaseException ex) { throw new IngestModuleException(Bundle.EmbeddedFileExtractorIngestModule_UnableToGetMSOfficeExtractor_errMsg(), ex); } @@ -155,8 +155,8 @@ public final class EmbeddedFileExtractorIngestModule extends FileIngestModuleAda */ if (archiveExtractor.isSevenZipExtractionSupported(abstractFile)) { archiveExtractor.unpack(abstractFile, mapOfDepthTrees.get(jobId)); - } else if (officeExtractor.isContentExtractionSupported(abstractFile)) { - officeExtractor.extractEmbeddedContent(abstractFile); + } else if (documentExtractor.isContentExtractionSupported(abstractFile)) { + documentExtractor.extractEmbeddedContent(abstractFile); } return ProcessResult.OK; } diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java new file mode 100755 index 0000000000..ae3d967ef3 --- /dev/null +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java @@ -0,0 +1,179 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2019 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.modules.embeddedfileextractor; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.HashMap; +import java.util.Map; +import java.util.logging.Level; +import org.apache.commons.io.FilenameUtils; +import org.apache.commons.io.IOUtils; +import org.apache.tika.exception.TikaException; +import org.apache.tika.extractor.EmbeddedDocumentExtractor; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.datamodel.EncodedFileOutputStream; +import org.sleuthkit.datamodel.TskData; + +/** + * Facility for extracting and storing attachments from PDF documents. + * Implementation specifics, however, are generic enough to be used on any + * document with embedded resources. The current name reflects the only known + * use case for this class. + */ +final class PDFAttachmentExtractor { + + private static Logger logger = Logger.getLogger(PDFAttachmentExtractor.class.getName()); + private final AutoDetectParser parser; + + public PDFAttachmentExtractor() { + parser = new AutoDetectParser(); + } + + public PDFAttachmentExtractor(AutoDetectParser parser) { + this.parser = parser; + } + + /** + * The public endpoint + * + * @param input + * @param parentID + * @param outputDir + * @return + * @throws IOException + * @throws SAXException + * @throws TikaException + */ + public Map extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException { + ExtractionPreconditions.checkArgument(Files.exists(outputDir), + String.format("Output directory: %s, does not exist.", outputDir.toString())); //NON-NLS + + ParseContext parseContext = new ParseContext(); + parseContext.set(Parser.class, parser); + + //Keep track of the attachment files as they are being extracted and written to disk. + NewResourceWatcher watcher = new NewResourceWatcher(); + parseContext.set(EmbeddedDocumentExtractor.class, new EmbeddedAttachmentHandler(outputDir, parentID, watcher)); + + //Parse input with default params, except for our ParseContext + parser.parse(input, new BodyContentHandler(-1), new Metadata(), parseContext); + + return watcher.getSnapshot(); + } + + /** + * Internal Tika class that is invoked upon encountering an embedded + * resource. + */ + static class EmbeddedAttachmentHandler implements EmbeddedDocumentExtractor { + + private final Path outputDirectory; + private final NewResourceWatcher watcher; + private final Long parentID; + private Integer attachmentCount; + + public EmbeddedAttachmentHandler(Path outputDirectory, long parentID, NewResourceWatcher watcher) { + this.outputDirectory = outputDirectory; + this.watcher = watcher; + this.parentID = parentID; + attachmentCount = 0; + } + + @Override + public boolean shouldParseEmbedded(Metadata mtdt) { + //Grab every available attachment + return true; + } + + @Override + public void parseEmbedded(InputStream in, ContentHandler ch, Metadata mtdt, boolean bln) throws SAXException, IOException { + //Resource naming scheme is used internally in autopsy, therefore we can guarentee uniqueness. + String uniqueExtractedName = parentID + "_attch_" + attachmentCount++; //NON-NLS + + String name = mtdt.get(Metadata.RESOURCE_NAME_KEY); + String ext = FilenameUtils.getExtension(name); + + //Append the extension if we can. + if(ext == null) { + name = uniqueExtractedName; + } else if(!ext.isEmpty()) { + uniqueExtractedName += "." + ext; + } + + Path outputFile = outputDirectory.resolve(uniqueExtractedName); + + try (EncodedFileOutputStream outputStream = new EncodedFileOutputStream( + new FileOutputStream(outputFile.toFile()), TskData.EncodingType.XOR1)){ + IOUtils.copy(in, outputStream); + watcher.notify(name, outputFile); + } catch (IOException ex) { + logger.log(Level.WARNING, String.format("Could not extract attachment %s into directory %s", //NON-NLS + uniqueExtractedName, outputFile), ex); + } + } + } + + /** + * Convenient wrapper for keeping track of new resource paths and the display + * name for each of these resources. + * + * It is necessary to maintain a snapshot of only our changes when the + * output directory is shared among other processes/threads. + */ + static class NewResourceWatcher { + + private final Map newResourcePaths; + + public NewResourceWatcher() { + newResourcePaths = new HashMap<>(); + } + + public void notify(String name, Path newResource) { + newResourcePaths.put(name, newResource); + } + + public Map getSnapshot() { + return newResourcePaths; + } + } + + /** + * Static convenience methods that ensure the PDF extractor is being invoked + * correctly. + */ + static class ExtractionPreconditions { + + public static void checkArgument(boolean expression, String msg) throws IOException { + if (!expression) { + throw new IOException(msg); + } + } + } +} diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index b4a53b2e55..b7cab58101 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -44,6 +44,7 @@ import java.util.stream.Stream; import org.apache.tika.Tika; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.EmptyParser; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.parser.ParsingReader; @@ -177,7 +178,10 @@ final class TikaTextExtractor implements TextExtractor { InputStream stream = null; ParseContext parseContext = new ParseContext(); - parseContext.set(Parser.class, parser); + + //Disable appending embedded file text to output + //JIRA-4975 + parseContext.set(Parser.class, new EmptyParser()); if (ocrEnabled() && content instanceof AbstractFile) { AbstractFile file = ((AbstractFile) content); From 35e3934816256f20f569c5ada6cf738ef19d1c86 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Tue, 30 Apr 2019 12:28:04 -0400 Subject: [PATCH 2/5] Only turn off embedded extraction for known mime-types --- .../textextractors/TikaTextExtractor.java | 46 ++++++++++++------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index b7cab58101..fa1deaa9aa 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -120,6 +120,16 @@ final class TikaTextExtractor implements TextExtractor { "application/x-z", //NON-NLS "application/x-compress"); //NON-NLS + //Tika should ignore types with embedded files that can be handled by the unpacking modules + private static final List EMBEDDED_FILE_MIME_TYPES + = ImmutableList.of("application/msword", //NON-NLS + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", //NON-NLS + "application/vnd.ms-powerpoint", //NON-NLS + "application/vnd.openxmlformats-officedocument.presentationml.presentation", //NON-NLS + "application/vnd.ms-excel", //NON-NLS + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", //NON-NLS + "application/pdf"); //NON-NLS + private static final java.util.logging.Logger TIKA_LOGGER = java.util.logging.Logger.getLogger("Tika"); //NON-NLS private static final Logger AUTOPSY_LOGGER = Logger.getLogger(TikaTextExtractor.class.getName()); @@ -137,7 +147,7 @@ final class TikaTextExtractor implements TextExtractor { private static final File TESSERACT_PATH = locateTesseractExecutable(); private String languagePacks = formatLanguagePacks(PlatformUtil.getOcrLanguagePacks()); private static final String TESSERACT_OUTPUT_FILE_NAME = "tess_output"; //NON-NLS - + private ProcessTerminator processTerminator; private static final List TIKA_SUPPORTED_TYPES @@ -152,8 +162,8 @@ final class TikaTextExtractor implements TextExtractor { /** * If Tesseract has been installed and is set to be used through - * configuration, then ocr is enabled. OCR can only currently be run on - * 64 bit Windows OS. + * configuration, then ocr is enabled. OCR can only currently be run on 64 + * bit Windows OS. * * @return Flag indicating if OCR is set to be used. */ @@ -178,10 +188,14 @@ final class TikaTextExtractor implements TextExtractor { InputStream stream = null; ParseContext parseContext = new ParseContext(); - - //Disable appending embedded file text to output + + //Disable appending embedded file text to output for EFE supported types //JIRA-4975 - parseContext.set(Parser.class, new EmptyParser()); + if(content instanceof AbstractFile && EMBEDDED_FILE_MIME_TYPES.contains(((AbstractFile)content).getMIMEType())) { + parseContext.set(Parser.class, new EmptyParser()); + } else { + parseContext.set(Parser.class, parser); + } if (ocrEnabled() && content instanceof AbstractFile) { AbstractFile file = ((AbstractFile) content); @@ -205,7 +219,7 @@ final class TikaTextExtractor implements TextExtractor { TesseractOCRConfig ocrConfig = new TesseractOCRConfig(); String tesseractFolder = TESSERACT_PATH.getParent(); ocrConfig.setTesseractPath(tesseractFolder); - + ocrConfig.setLanguage(languagePacks); ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath()); parseContext.set(TesseractOCRConfig.class, ocrConfig); @@ -277,7 +291,7 @@ final class TikaTextExtractor implements TextExtractor { File outputFile = null; try { String tempDirectory = Case.getCurrentCaseThrows().getTempDirectory(); - + //Appending file id makes the name unique String tempFileName = FileUtil.escapeFileName(file.getId() + file.getName()); inputFile = Paths.get(tempDirectory, tempFileName).toFile(); @@ -318,7 +332,7 @@ final class TikaTextExtractor implements TextExtractor { } } } - + /** * Wraps the creation of a TikaReader into a Future so that it can be * cancelled. @@ -430,11 +444,11 @@ final class TikaTextExtractor implements TextExtractor { */ @Override public boolean isSupported() { - if(!(content instanceof AbstractFile)) { + if (!(content instanceof AbstractFile)) { return false; } - - String detectedType = ((AbstractFile)content).getMIMEType(); + + String detectedType = ((AbstractFile) content).getMIMEType(); if (detectedType == null || BINARY_MIME_TYPES.contains(detectedType) //any binary unstructured blobs (string extraction will be used) || ARCHIVE_MIME_TYPES.contains(detectedType) @@ -443,7 +457,7 @@ final class TikaTextExtractor implements TextExtractor { ) { return false; } - + return TIKA_SUPPORTED_TYPES.contains(detectedType); } @@ -493,11 +507,11 @@ final class TikaTextExtractor implements TextExtractor { if (context != null) { ImageConfig configInstance = context.lookup(ImageConfig.class); if (configInstance != null) { - if(Objects.nonNull(configInstance.getOCREnabled())) { + if (Objects.nonNull(configInstance.getOCREnabled())) { this.tesseractOCREnabled = configInstance.getOCREnabled(); } - - if(Objects.nonNull(configInstance.getOCRLanguages())) { + + if (Objects.nonNull(configInstance.getOCRLanguages())) { this.languagePacks = formatLanguagePacks(configInstance.getOCRLanguages()); } } From 68950b53722a8951155f661cd65abb2da2b096cd Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Tue, 30 Apr 2019 12:39:39 -0400 Subject: [PATCH 3/5] Added a comment and fixed a typo --- .../DocumentEmbeddedContentExtractor.java | 2 +- .../PDFAttachmentExtractor.java | 13 +++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java index c19ef48b2f..e6a0810e78 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java @@ -78,7 +78,7 @@ class DocumentEmbeddedContentExtractor { private final FileManager fileManager; private final IngestServices services; - private static final Logger LOGGER = Logger.getLogger(EmbeddedDocumentExtractor.class.getName()); + private static final Logger LOGGER = Logger.getLogger(DocumentEmbeddedContentExtractor.class.getName()); private final IngestJobContext context; private String parentFileName; private final String UNKNOWN_IMAGE_NAME_PREFIX = "image_"; //NON-NLS diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java index ae3d967ef3..7a0747b648 100755 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java @@ -61,15 +61,16 @@ final class PDFAttachmentExtractor { } /** - * The public endpoint + * Extracts PDF attachments from a given input and writes them to the supplied + * output directory. * - * @param input - * @param parentID - * @param outputDir - * @return + * @param input Input PDF to extract attachments from + * @param parentID ID for unique extraction names + * @param outputDir Directory to write attachments + * @return Map containing file name -> location on disk * @throws IOException * @throws SAXException - * @throws TikaException + * @throws TikaException */ public Map extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException { ExtractionPreconditions.checkArgument(Files.exists(outputDir), From a09d52dafc0a6242ec73f146cd5c96a9c61b2444 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Tue, 30 Apr 2019 12:41:39 -0400 Subject: [PATCH 4/5] One last comment --- .../DocumentEmbeddedContentExtractor.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java index e6a0810e78..a362c5789f 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java @@ -476,9 +476,10 @@ class DocumentEmbeddedContentExtractor { } /** + * Extracts embedded attachments from PDF files. * - * @param abstractFile - * @return + * @param abstractFile Input PDF file + * @return List of extracted files to be made into derived file instances. */ private List extractEmbeddedContentFromPDF(AbstractFile abstractFile) { PDFAttachmentExtractor pdfExtractor = new PDFAttachmentExtractor(parser); From ca028d478f1f4ce75626915d69c723d31039a57b Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Thu, 9 May 2019 16:31:11 -0400 Subject: [PATCH 5/5] Codacy fixes --- .../embeddedfileextractor/PDFAttachmentExtractor.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java index 7a0747b648..a36b5c365d 100755 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java @@ -49,7 +49,7 @@ import org.sleuthkit.datamodel.TskData; */ final class PDFAttachmentExtractor { - private static Logger logger = Logger.getLogger(PDFAttachmentExtractor.class.getName()); + private static final Logger logger = Logger.getLogger(PDFAttachmentExtractor.class.getName()); private final AutoDetectParser parser; public PDFAttachmentExtractor() { @@ -176,5 +176,8 @@ final class PDFAttachmentExtractor { throw new IOException(msg); } } + + private ExtractionPreconditions(){ + } } }