From 8a846b493752aa50a5efc2a98d0ab8ae629efdcc Mon Sep 17 00:00:00 2001
From: "U-BASIS\\dsmyda" <dsmyda@win-dsmyd-4990.basistech.net>
Date: Tue, 30 Apr 2019 12:14:47 -0400
Subject: [PATCH 1/5] Added support for extracting PDF attachments in EFE
 module and disabled embedded content extraction for Tika so that we do not
 duplicate solr text for documents supported by EFE

---
 ... => DocumentEmbeddedContentExtractor.java} |  49 ++++-
 .../EmbeddedFileExtractorIngestModule.java    |  10 +-
 .../PDFAttachmentExtractor.java               | 179 ++++++++++++++++++
 .../textextractors/TikaTextExtractor.java     |   6 +-
 4 files changed, 231 insertions(+), 13 deletions(-)
 rename Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/{MSOfficeEmbeddedContentExtractor.java => DocumentEmbeddedContentExtractor.java} (93%)
 create mode 100755 Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java

diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
similarity index 93%
rename from Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java
rename to Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
index d9c142563b..c19ef48b2f 100644
--- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
@@ -22,8 +22,10 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -33,13 +35,11 @@ import org.apache.commons.io.IOUtils;
 import org.apache.poi.hwpf.usermodel.Picture;
 import org.apache.poi.hslf.usermodel.HSLFPictureData;
 import org.apache.poi.hslf.usermodel.HSLFSlideShow;
-import org.apache.poi.hssf.record.RecordInputStream.LeftoverDataException;
 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.hwpf.model.PicturesTable;
 import org.apache.poi.sl.usermodel.PictureData.PictureType;
 import org.apache.poi.ss.usermodel.Workbook;
-import org.apache.poi.util.RecordFormatException;
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.Detector;
 import org.apache.tika.exception.TikaException;
@@ -72,13 +72,13 @@ import org.xml.sax.SAXException;
 
 /**
  * Extracts embedded content (e.g. images, audio, video) from Microsoft Office
- * documents (both original and OOXML forms).
+ * documents (both original and OOXML forms) and PDF documents.
  */
-class MSOfficeEmbeddedContentExtractor {
+class DocumentEmbeddedContentExtractor {
 
     private final FileManager fileManager;
     private final IngestServices services;
-    private static final Logger LOGGER = Logger.getLogger(MSOfficeEmbeddedContentExtractor.class.getName());
+    private static final Logger LOGGER = Logger.getLogger(EmbeddedDocumentExtractor.class.getName());
     private final IngestJobContext context;
     private String parentFileName;
     private final String UNKNOWN_IMAGE_NAME_PREFIX = "image_"; //NON-NLS
@@ -101,7 +101,8 @@ class MSOfficeEmbeddedContentExtractor {
         PPT("application/vnd.ms-powerpoint"), //NON-NLS
         PPTX("application/vnd.openxmlformats-officedocument.presentationml.presentation"), //NON-NLS
         XLS("application/vnd.ms-excel"), //NON-NLS
-        XLSX("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"); //NON-NLS
+        XLSX("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"), //NON-NLS
+        PDF("application/pdf"); //NON-NLS
 
         private final String mimeType;
 
@@ -116,7 +117,7 @@ class MSOfficeEmbeddedContentExtractor {
     }
     private SupportedExtractionFormats abstractFileExtractionFormat;
 
-    MSOfficeEmbeddedContentExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) throws NoCurrentCaseException {
+    DocumentEmbeddedContentExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector, String moduleDirRelative, String moduleDirAbsolute) throws NoCurrentCaseException {
 
         this.fileManager = Case.getCurrentCaseThrows().getServices().getFileManager();
         this.services = IngestServices.getInstance();
@@ -190,6 +191,9 @@ class MSOfficeEmbeddedContentExtractor {
             case XLS:
                 listOfExtractedImages = extractImagesFromXls(abstractFile);
                 break;
+            case PDF:
+                listOfExtractedImages = extractEmbeddedContentFromPDF(abstractFile);
+                break;
             default:
                 break;
         }
@@ -470,6 +474,37 @@ class MSOfficeEmbeddedContentExtractor {
         return listOfExtractedImages;
 
     }
+    
+    /**
+     * 
+     * @param abstractFile
+     * @return 
+     */
+    private List<ExtractedFile> extractEmbeddedContentFromPDF(AbstractFile abstractFile) {
+        PDFAttachmentExtractor pdfExtractor = new PDFAttachmentExtractor(parser);
+        try {
+            Path outputDirectory = Paths.get(getOutputFolderPath(parentFileName));
+            //Get map of attachment name -> location disk.
+            Map<String, Path> extractedAttachments = pdfExtractor.extract(
+                    new ReadContentInputStream(abstractFile), abstractFile.getId(),
+                    outputDirectory);
+            
+            //Convert output to hook into the existing logic for creating derived files
+            List<ExtractedFile> extractedFiles = new ArrayList<>();
+            extractedAttachments.entrySet().forEach((pathEntry) -> {
+                String fileName = pathEntry.getKey();
+                Path writeLocation = pathEntry.getValue();
+                extractedFiles.add(new ExtractedFile(fileName,
+                        getFileRelativePath(writeLocation.getFileName().toString()), 
+                        writeLocation.toFile().length()));
+            });
+            
+            return extractedFiles;
+        } catch (IOException | SAXException | TikaException ex) {
+            LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs", ex); //NON-NLS
+        }
+        return Collections.emptyList();
+    }
 
     /**
      * Writes image to the module output location.
diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java
index 66c7f7030d..fd833b59a7 100644
--- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/EmbeddedFileExtractorIngestModule.java
@@ -50,7 +50,7 @@ public final class EmbeddedFileExtractorIngestModule extends FileIngestModuleAda
     //Outer concurrent hashmap with keys of JobID, inner concurrentHashmap with keys of objectID
     private static final ConcurrentHashMap<Long, ConcurrentHashMap<Long, Archive>> mapOfDepthTrees = new ConcurrentHashMap<>();
     private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();
-    private MSOfficeEmbeddedContentExtractor officeExtractor;
+    private DocumentEmbeddedContentExtractor documentExtractor;
     private SevenZipExtractor archiveExtractor;
     private FileTypeDetector fileTypeDetector;
     private long jobId;
@@ -115,10 +115,10 @@ public final class EmbeddedFileExtractorIngestModule extends FileIngestModuleAda
         }
         /*
          * Construct an embedded content extractor for processing Microsoft
-         * Office documents.
+         * Office documents and PDF documents.
          */
         try {
-            this.officeExtractor = new MSOfficeEmbeddedContentExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute);
+            this.documentExtractor = new DocumentEmbeddedContentExtractor(context, fileTypeDetector, moduleDirRelative, moduleDirAbsolute);
         } catch (NoCurrentCaseException ex) {
             throw new IngestModuleException(Bundle.EmbeddedFileExtractorIngestModule_UnableToGetMSOfficeExtractor_errMsg(), ex);
         }
@@ -155,8 +155,8 @@ public final class EmbeddedFileExtractorIngestModule extends FileIngestModuleAda
          */
         if (archiveExtractor.isSevenZipExtractionSupported(abstractFile)) {
             archiveExtractor.unpack(abstractFile, mapOfDepthTrees.get(jobId));
-        } else if (officeExtractor.isContentExtractionSupported(abstractFile)) {
-            officeExtractor.extractEmbeddedContent(abstractFile);
+        } else if (documentExtractor.isContentExtractionSupported(abstractFile)) {
+            documentExtractor.extractEmbeddedContent(abstractFile);
         }
         return ProcessResult.OK;
     }
diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
new file mode 100755
index 0000000000..ae3d967ef3
--- /dev/null
+++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
@@ -0,0 +1,179 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2019 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.sleuthkit.autopsy.modules.embeddedfileextractor;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.logging.Level;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.sleuthkit.autopsy.coreutils.Logger;
+import org.sleuthkit.datamodel.EncodedFileOutputStream;
+import org.sleuthkit.datamodel.TskData;
+
+/**
+ * Facility for extracting and storing attachments from PDF documents.
+ * Implementation specifics, however, are generic enough to be used on any
+ * document with embedded resources. The current name reflects the only known
+ * use case for this class.
+ */
+final class PDFAttachmentExtractor {
+
+    private static Logger logger = Logger.getLogger(PDFAttachmentExtractor.class.getName());
+    private final AutoDetectParser parser;
+    
+    public PDFAttachmentExtractor() {
+        parser = new AutoDetectParser();
+    }
+    
+    public PDFAttachmentExtractor(AutoDetectParser parser) {
+        this.parser = parser;
+    }
+
+    /**
+     * The public endpoint 
+     * 
+     * @param input
+     * @param parentID
+     * @param outputDir
+     * @return
+     * @throws IOException
+     * @throws SAXException
+     * @throws TikaException
+     */
+    public Map<String, Path> extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException {
+        ExtractionPreconditions.checkArgument(Files.exists(outputDir), 
+                String.format("Output directory: %s, does not exist.", outputDir.toString())); //NON-NLS
+
+        ParseContext parseContext = new ParseContext();
+        parseContext.set(Parser.class, parser);
+
+        //Keep track of the attachment files as they are being extracted and written to disk.
+        NewResourceWatcher watcher = new NewResourceWatcher();
+        parseContext.set(EmbeddedDocumentExtractor.class, new EmbeddedAttachmentHandler(outputDir, parentID, watcher));
+
+        //Parse input with default params, except for our ParseContext
+        parser.parse(input, new BodyContentHandler(-1), new Metadata(), parseContext);
+
+        return watcher.getSnapshot();
+    }
+
+    /**
+     * Internal Tika class that is invoked upon encountering an embedded
+     * resource.
+     */
+    static class EmbeddedAttachmentHandler implements EmbeddedDocumentExtractor {
+
+        private final Path outputDirectory;
+        private final NewResourceWatcher watcher;
+        private final Long parentID;
+        private Integer attachmentCount;
+
+        public EmbeddedAttachmentHandler(Path outputDirectory, long parentID, NewResourceWatcher watcher) {
+            this.outputDirectory = outputDirectory;
+            this.watcher = watcher;
+            this.parentID = parentID;
+            attachmentCount = 0;
+        }
+
+        @Override
+        public boolean shouldParseEmbedded(Metadata mtdt) {
+            //Grab every available attachment
+            return true;
+        }
+
+        @Override
+        public void parseEmbedded(InputStream in, ContentHandler ch, Metadata mtdt, boolean bln) throws SAXException, IOException {
+            //Resource naming scheme is used internally in autopsy, therefore we can guarentee uniqueness.
+            String uniqueExtractedName = parentID + "_attch_" + attachmentCount++; //NON-NLS
+            
+            String name = mtdt.get(Metadata.RESOURCE_NAME_KEY);
+            String ext = FilenameUtils.getExtension(name);
+            
+            //Append the extension if we can.
+            if(ext == null) {
+                name = uniqueExtractedName;
+            } else if(!ext.isEmpty()) {
+                uniqueExtractedName += "." + ext;
+            }
+            
+            Path outputFile = outputDirectory.resolve(uniqueExtractedName);
+
+            try (EncodedFileOutputStream outputStream = new EncodedFileOutputStream(
+                    new FileOutputStream(outputFile.toFile()), TskData.EncodingType.XOR1)){
+                IOUtils.copy(in, outputStream);
+                watcher.notify(name, outputFile);
+            } catch (IOException ex) {
+                logger.log(Level.WARNING, String.format("Could not extract attachment %s into directory %s", //NON-NLS
+                        uniqueExtractedName, outputFile), ex);
+            }
+        }
+    }
+
+    /**
+     * Convenient wrapper for keeping track of new resource paths and the display
+     * name for each of these resources.
+     *
+     * It is necessary to maintain a snapshot of only our changes when the
+     * output directory is shared among other processes/threads.
+     */
+    static class NewResourceWatcher {
+
+        private final Map<String, Path> newResourcePaths;
+
+        public NewResourceWatcher() {
+            newResourcePaths = new HashMap<>();
+        }
+
+        public void notify(String name, Path newResource) {
+            newResourcePaths.put(name, newResource);
+        }
+
+        public Map<String, Path> getSnapshot() {
+            return newResourcePaths;
+        }
+    }
+    
+    /**
+     * Static convenience methods that ensure the PDF extractor is being invoked
+     * correctly.
+     */
+    static class ExtractionPreconditions {
+
+        public static void checkArgument(boolean expression, String msg) throws IOException {
+            if (!expression) {
+                throw new IOException(msg);
+            }
+        }
+    }
+}
diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
index b4a53b2e55..b7cab58101 100644
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
@@ -44,6 +44,7 @@ import java.util.stream.Stream;
 import org.apache.tika.Tika;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.EmptyParser;
 import org.apache.tika.parser.ParseContext;
 import org.apache.tika.parser.Parser;
 import org.apache.tika.parser.ParsingReader;
@@ -177,7 +178,10 @@ final class TikaTextExtractor implements TextExtractor {
         InputStream stream = null;
 
         ParseContext parseContext = new ParseContext();
-        parseContext.set(Parser.class, parser);
+        
+        //Disable appending embedded file text to output 
+        //JIRA-4975
+        parseContext.set(Parser.class, new EmptyParser());
 
         if (ocrEnabled() && content instanceof AbstractFile) {
             AbstractFile file = ((AbstractFile) content);

From 35e3934816256f20f569c5ada6cf738ef19d1c86 Mon Sep 17 00:00:00 2001
From: "U-BASIS\\dsmyda" <dsmyda@win-dsmyd-4990.basistech.net>
Date: Tue, 30 Apr 2019 12:28:04 -0400
Subject: [PATCH 2/5] Only turn off embedded extraction for known mime-types

---
 .../textextractors/TikaTextExtractor.java     | 46 ++++++++++++-------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
index b7cab58101..fa1deaa9aa 100644
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
@@ -120,6 +120,16 @@ final class TikaTextExtractor implements TextExtractor {
                     "application/x-z", //NON-NLS
                     "application/x-compress"); //NON-NLS
 
+    //Tika should ignore types with embedded files that can be handled by the unpacking modules
+    private static final List<String> EMBEDDED_FILE_MIME_TYPES
+            = ImmutableList.of("application/msword", //NON-NLS
+                    "application/vnd.openxmlformats-officedocument.wordprocessingml.document", //NON-NLS
+                    "application/vnd.ms-powerpoint", //NON-NLS
+                    "application/vnd.openxmlformats-officedocument.presentationml.presentation", //NON-NLS
+                    "application/vnd.ms-excel", //NON-NLS
+                    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", //NON-NLS
+                    "application/pdf"); //NON-NLS
+
     private static final java.util.logging.Logger TIKA_LOGGER = java.util.logging.Logger.getLogger("Tika"); //NON-NLS
     private static final Logger AUTOPSY_LOGGER = Logger.getLogger(TikaTextExtractor.class.getName());
 
@@ -137,7 +147,7 @@ final class TikaTextExtractor implements TextExtractor {
     private static final File TESSERACT_PATH = locateTesseractExecutable();
     private String languagePacks = formatLanguagePacks(PlatformUtil.getOcrLanguagePacks());
     private static final String TESSERACT_OUTPUT_FILE_NAME = "tess_output"; //NON-NLS
-    
+
     private ProcessTerminator processTerminator;
 
     private static final List<String> TIKA_SUPPORTED_TYPES
@@ -152,8 +162,8 @@ final class TikaTextExtractor implements TextExtractor {
 
     /**
      * If Tesseract has been installed and is set to be used through
-     * configuration, then ocr is enabled. OCR can only currently be run on
-     * 64 bit Windows OS.
+     * configuration, then ocr is enabled. OCR can only currently be run on 64
+     * bit Windows OS.
      *
      * @return Flag indicating if OCR is set to be used.
      */
@@ -178,10 +188,14 @@ final class TikaTextExtractor implements TextExtractor {
         InputStream stream = null;
 
         ParseContext parseContext = new ParseContext();
-        
-        //Disable appending embedded file text to output 
+
+        //Disable appending embedded file text to output for EFE supported types
         //JIRA-4975
-        parseContext.set(Parser.class, new EmptyParser());
+        if(content instanceof AbstractFile && EMBEDDED_FILE_MIME_TYPES.contains(((AbstractFile)content).getMIMEType())) {
+            parseContext.set(Parser.class, new EmptyParser());
+        } else {
+            parseContext.set(Parser.class, parser);
+        }
 
         if (ocrEnabled() && content instanceof AbstractFile) {
             AbstractFile file = ((AbstractFile) content);
@@ -205,7 +219,7 @@ final class TikaTextExtractor implements TextExtractor {
                 TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
                 String tesseractFolder = TESSERACT_PATH.getParent();
                 ocrConfig.setTesseractPath(tesseractFolder);
-                
+
                 ocrConfig.setLanguage(languagePacks);
                 ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath());
                 parseContext.set(TesseractOCRConfig.class, ocrConfig);
@@ -277,7 +291,7 @@ final class TikaTextExtractor implements TextExtractor {
         File outputFile = null;
         try {
             String tempDirectory = Case.getCurrentCaseThrows().getTempDirectory();
-            
+
             //Appending file id makes the name unique
             String tempFileName = FileUtil.escapeFileName(file.getId() + file.getName());
             inputFile = Paths.get(tempDirectory, tempFileName).toFile();
@@ -318,7 +332,7 @@ final class TikaTextExtractor implements TextExtractor {
             }
         }
     }
-    
+
     /**
      * Wraps the creation of a TikaReader into a Future so that it can be
      * cancelled.
@@ -430,11 +444,11 @@ final class TikaTextExtractor implements TextExtractor {
      */
     @Override
     public boolean isSupported() {
-        if(!(content instanceof AbstractFile)) {
+        if (!(content instanceof AbstractFile)) {
             return false;
         }
-        
-        String detectedType = ((AbstractFile)content).getMIMEType();
+
+        String detectedType = ((AbstractFile) content).getMIMEType();
         if (detectedType == null
                 || BINARY_MIME_TYPES.contains(detectedType) //any binary unstructured blobs (string extraction will be used)
                 || ARCHIVE_MIME_TYPES.contains(detectedType)
@@ -443,7 +457,7 @@ final class TikaTextExtractor implements TextExtractor {
                 ) {
             return false;
         }
-        
+
         return TIKA_SUPPORTED_TYPES.contains(detectedType);
     }
 
@@ -493,11 +507,11 @@ final class TikaTextExtractor implements TextExtractor {
         if (context != null) {
             ImageConfig configInstance = context.lookup(ImageConfig.class);
             if (configInstance != null) {
-                if(Objects.nonNull(configInstance.getOCREnabled())) {
+                if (Objects.nonNull(configInstance.getOCREnabled())) {
                     this.tesseractOCREnabled = configInstance.getOCREnabled();
                 }
-                
-                if(Objects.nonNull(configInstance.getOCRLanguages())) {
+
+                if (Objects.nonNull(configInstance.getOCRLanguages())) {
                     this.languagePacks = formatLanguagePacks(configInstance.getOCRLanguages());
                 }
             }

From 68950b53722a8951155f661cd65abb2da2b096cd Mon Sep 17 00:00:00 2001
From: "U-BASIS\\dsmyda" <dsmyda@win-dsmyd-4990.basistech.net>
Date: Tue, 30 Apr 2019 12:39:39 -0400
Subject: [PATCH 3/5] Added a comment and fixed a typo

---
 .../DocumentEmbeddedContentExtractor.java           |  2 +-
 .../PDFAttachmentExtractor.java                     | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
index c19ef48b2f..e6a0810e78 100644
--- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
@@ -78,7 +78,7 @@ class DocumentEmbeddedContentExtractor {
 
     private final FileManager fileManager;
     private final IngestServices services;
-    private static final Logger LOGGER = Logger.getLogger(EmbeddedDocumentExtractor.class.getName());
+    private static final Logger LOGGER = Logger.getLogger(DocumentEmbeddedContentExtractor.class.getName());
     private final IngestJobContext context;
     private String parentFileName;
     private final String UNKNOWN_IMAGE_NAME_PREFIX = "image_"; //NON-NLS
diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
index ae3d967ef3..7a0747b648 100755
--- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
@@ -61,15 +61,16 @@ final class PDFAttachmentExtractor {
     }
 
     /**
-     * The public endpoint 
+     * Extracts PDF attachments from a given input and writes them to the supplied
+     * output directory.
      * 
-     * @param input
-     * @param parentID
-     * @param outputDir
-     * @return
+     * @param input Input PDF to extract attachments from
+     * @param parentID ID for unique extraction names
+     * @param outputDir Directory to write attachments
+     * @return Map containing file name -> location on disk
      * @throws IOException
      * @throws SAXException
-     * @throws TikaException
+     * @throws TikaException 
      */
     public Map<String, Path> extract(InputStream input, long parentID, Path outputDir) throws IOException, SAXException, TikaException {
         ExtractionPreconditions.checkArgument(Files.exists(outputDir), 

From a09d52dafc0a6242ec73f146cd5c96a9c61b2444 Mon Sep 17 00:00:00 2001
From: "U-BASIS\\dsmyda" <dsmyda@win-dsmyd-4990.basistech.net>
Date: Tue, 30 Apr 2019 12:41:39 -0400
Subject: [PATCH 4/5] One last comment

---
 .../DocumentEmbeddedContentExtractor.java                    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
index e6a0810e78..a362c5789f 100644
--- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java
@@ -476,9 +476,10 @@ class DocumentEmbeddedContentExtractor {
     }
     
     /**
+     * Extracts embedded attachments from PDF files.
      * 
-     * @param abstractFile
-     * @return 
+     * @param abstractFile Input PDF file
+     * @return List of extracted files to be made into derived file instances.
      */
     private List<ExtractedFile> extractEmbeddedContentFromPDF(AbstractFile abstractFile) {
         PDFAttachmentExtractor pdfExtractor = new PDFAttachmentExtractor(parser);

From ca028d478f1f4ce75626915d69c723d31039a57b Mon Sep 17 00:00:00 2001
From: "U-BASIS\\dsmyda" <dsmyda@win-dsmyd-4990.basistech.net>
Date: Thu, 9 May 2019 16:31:11 -0400
Subject: [PATCH 5/5] Codacy fixes

---
 .../embeddedfileextractor/PDFAttachmentExtractor.java        | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
index 7a0747b648..a36b5c365d 100755
--- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/PDFAttachmentExtractor.java
@@ -49,7 +49,7 @@ import org.sleuthkit.datamodel.TskData;
  */
 final class PDFAttachmentExtractor {
 
-    private static Logger logger = Logger.getLogger(PDFAttachmentExtractor.class.getName());
+    private static final Logger logger = Logger.getLogger(PDFAttachmentExtractor.class.getName());
     private final AutoDetectParser parser;
     
     public PDFAttachmentExtractor() {
@@ -176,5 +176,8 @@ final class PDFAttachmentExtractor {
                 throw new IOException(msg);
             }
         }
+        
+        private ExtractionPreconditions(){
+        }
     }
 }