Lots of optimizations and bug fixes

2025-07-16 17:57:43 +00:00 · 2017-11-02 17:35:01 -04:00 · 2017-11-02 17:35:01 -04:00 · 17f8112f66
commit 17f8112f66
parent cb3ebedf9c
1 changed files with 95 additions and 56 deletions
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AddArchiveTask.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AddArchiveTask.java
@ -22,16 +22,19 @@ import java.io.File;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
 import java.util.logging.Level;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.FilenameUtils;
+import org.openide.util.Exceptions;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.casemodule.LocalDiskDSProcessor;
 import org.sleuthkit.autopsy.casemodule.LocalFilesDSProcessor;
 import org.sleuthkit.autopsy.corecomponentinterfaces.DataSourceProcessorCallback;
+import static org.sleuthkit.autopsy.corecomponentinterfaces.DataSourceProcessorCallback.DataSourceProcessorResult.CRITICAL_ERRORS;
 import org.sleuthkit.autopsy.corecomponentinterfaces.DataSourceProcessorProgressMonitor;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.datasourceprocessors.AutoIngestDataSourceProcessor;
@ -116,70 +119,45 @@ class AddArchiveTask implements Runnable {
            List<String> extractedFiles = ArchiveUtil.unpackArchiveFile(archivePath, destinationFolder.toString());

            // do processing
-            Map<AutoIngestDataSourceProcessor, Integer> validDataSourceProcessorsMap;
-            for (String file : extractedFiles) {
-                
-                // identify DSP for this file
-                try {
-                    // lookup all AutomatedIngestDataSourceProcessors and poll which ones are able to process the current data source
-                    validDataSourceProcessorsMap = DataSourceProcessorUtility.getDataSourceProcessor(Paths.get(file));
-                    if (validDataSourceProcessorsMap.isEmpty()) {
-                        continue;
-                    }
-                } catch (AutoIngestDataSourceProcessor.AutoIngestDataSourceProcessorException ex) {
-                    criticalErrorOccurred = true;
-                    errorMessages.add(ex.getMessage());
-                    logger.log(Level.SEVERE, String.format("Critical error occurred while extracting archive %s", archivePath), ex); //NON-NLS
-                    // continue to next extracted file
+            for (String file : extractedFiles) {                
+                // identify all "valid" DSPs that can process this file
+                List<AutoIngestDataSourceProcessor> validDataSourceProcessors = getValidDataSourceProcessors(Paths.get(file), errorMessages);
+                if (validDataSourceProcessors.isEmpty()) {
                    continue;
                }

-                // Get an ordered list of data source processors to try
-                List<AutoIngestDataSourceProcessor> validDataSourceProcessors = DataSourceProcessorUtility.orderDataSourceProcessorsByConfidence(validDataSourceProcessorsMap);
+                // identified a "valid" data source within the archive
+                progressMonitor.setProgressText(String.format("Adding: %s", file));
+
+                /*
+                 * NOTE: we have to move the valid data sources to a separate
+                 * folder and then add the data source from that folder. This is
+                 * necessary because after all valid data sources have been
+                 * identified, we are going to add the remaining extracted
+                 * contents of the archive as a single logacl file set. Hence,
+                 * if we do not move the data sources out of the extracted
+                 * contents folder, those data source files will get added twice
+                 * and can potentially result in duplicate keyword hits.
+                 */
+                Path newFolder = createDirectoryForFile(file, currentCase.getModuleDirectory());
+                if (newFolder.toString().isEmpty()) {
+                    // unable to create directory
+                    criticalErrorOccurred = true;
+                    errorMessages.add(String.format("Unable to create directory {0} to extract content of archive {1} ", new Object[]{newFolder.toString(), archivePath}));
+                    logger.log(Level.SEVERE, String.format("Unable to create directory {0} to extract content of archive {1} ", new Object[]{newFolder.toString(), archivePath}));
+                    return;
+                }
+
+                // Copy it to a different folder                     
+                FileUtils.copyFileToDirectory(new File(file), newFolder.toFile());
+                Path newFilePath = Paths.get(newFolder.toString(), FilenameUtils.getName(file));

                // Try each DSP in decreasing order of confidence
+                boolean success = false;
                for (AutoIngestDataSourceProcessor selectedProcessor : validDataSourceProcessors) {

-                    // skip local files and local disk DSPs, only looking for "valid" data sources
-                    if (selectedProcessor instanceof LocalDiskDSProcessor) {
-                        continue;
-                    }
-                    if (selectedProcessor instanceof LocalFilesDSProcessor) {
-                        continue;
-                    }
-                    // also skip nested archive files, those will be ingested as logical files and extracted during ingest
-                    if (selectedProcessor instanceof ArchiveExtractorDSProcessor) {
-                        continue;
-                    }
-
-                    // identified a "valid" data source within the archive
-                    progressMonitor.setProgressText(String.format("Adding: %s", file));
                    logger.log(Level.INFO, "Using {0} to process extracted file {1} ", new Object[]{selectedProcessor.getDataSourceType(), file}); 

-                    /*
-                     * NOTE: we have to move the valid data sources to a
-                     * separate folder and then add the data source from that
-                     * folder. This is necessary because after all valid data
-                     * sources have been identified, we are going to add the
-                     * remaining extracted contents of the archive as a single
-                     * logacl file set. Hence, if we do not move the data
-                     * sources out of the extracted contents folder, those data
-                     * source files will get added twice and can potentially
-                     * result in duplicate keyword hits.
-                     */
-                    Path newFolder = createDirectoryForFile(file, currentCase.getModuleDirectory());
-                    if (newFolder.toString().isEmpty()) {
-                        // unable to create directory
-                        criticalErrorOccurred = true;
-                        errorMessages.add(String.format("Unable to create directory {0} to extract content of archive {1} ", new Object[]{newFolder.toString(), archivePath}));
-                        logger.log(Level.SEVERE, String.format("Unable to create directory {0} to extract content of archive {1} ", new Object[]{newFolder.toString(), archivePath}));
-                        return;
-                    }
-
-                    // Move it to a different folder                     
-                    FileUtils.moveFileToDirectory(new File(file), newFolder.toFile(), false);
-                    Path newFilePath = Paths.get(newFolder.toString(), FilenameUtils.getName(file));
-
                    // ELTBD - do we want to log this in case log and/or system admin log?
                    synchronized (archiveDspLock) {
                        try {
@ -188,7 +166,16 @@ class AddArchiveTask implements Runnable {
                            selectedProcessor.process(deviceId, newFilePath, progressMonitor, internalArchiveDspCallBack);
                            archiveDspLock.wait();

-                            // at this point we got the content object(s) from the current DSP
+                            // at this point we got the content object(s) from the current DSP.
+                            // check whether the data source was processed successfully
+                            if ((internalDataSource.getResultDataSourceProcessorResultCode() == CRITICAL_ERRORS) ||
+                                    internalDataSource.getContent().isEmpty()) {
+                                // move onto the the next DSP that can process this data source
+                                continue;
+                            }
+                            
+                            // if we are here it means the data source was addedd successfully
+                            success = true;
                            newDataSources.addAll(internalDataSource.getContent());

                            // skip all other DSPs for this data source
@ -203,6 +190,18 @@ class AddArchiveTask implements Runnable {
                        }
                    }
                }
+                
+                if (success) {
+                    // one of the DSPs successfully processed the data source. delete the 
+                    // copy of the data source in the original extracted archive folder. 
+                    // otherwise the data source is going to be added again as a logical file.
+                    FileUtils.deleteQuietly(Paths.get(file).toFile());
+                } else {
+                    // none of the DSPs were able to process the data source. delete the 
+                    // copy of the data source in the temporary folder. the data source is 
+                    // going to be added as a logical file with the rest of the extracted contents.
+                    FileUtils.deleteQuietly(newFolder.toFile());
+                }
            }

            // after all archive contents have been examined (and moved to separate folders if necessary), 
@ -245,6 +244,46 @@ class AddArchiveTask implements Runnable {
            callback.done(result, errorMessages, newDataSources);
        }
    }
+    
+    
+    private List<AutoIngestDataSourceProcessor> getValidDataSourceProcessors(Path dataSourcePath, List<String> errorMessages) {
+        Map<AutoIngestDataSourceProcessor, Integer> validDataSourceProcessorsMap;
+        try {
+            validDataSourceProcessorsMap = DataSourceProcessorUtility.getDataSourceProcessor(dataSourcePath);
+        } catch (AutoIngestDataSourceProcessor.AutoIngestDataSourceProcessorException ex) {
+            criticalErrorOccurred = true;
+            errorMessages.add(ex.getMessage());
+            logger.log(Level.SEVERE, String.format("Critical error occurred while extracting archive %s", archivePath), ex); //NON-NLS
+            return Collections.emptyList();
+        }
+        if (validDataSourceProcessorsMap.isEmpty()) {
+            return Collections.emptyList();
+        }
+
+        // Get an ordered list of data source processors to try
+        List<AutoIngestDataSourceProcessor> validDataSourceProcessors = DataSourceProcessorUtility.orderDataSourceProcessorsByConfidence(validDataSourceProcessorsMap);
+
+        for (AutoIngestDataSourceProcessor selectedProcessor : validDataSourceProcessors) {
+
+            // skip local files and local disk DSPs, only looking for "valid" data sources
+            if (selectedProcessor instanceof LocalDiskDSProcessor) {
+                validDataSourceProcessors.remove(selectedProcessor);
+                continue;
+            }
+            if (selectedProcessor instanceof LocalFilesDSProcessor) {
+                validDataSourceProcessors.remove(selectedProcessor);
+                continue;
+            }
+            // also skip nested archive files, those will be ingested as logical files and extracted during ingest
+            if (selectedProcessor instanceof ArchiveExtractorDSProcessor) {
+                validDataSourceProcessors.remove(selectedProcessor);
+                continue;
+            }
+        }
+        
+        return validDataSourceProcessors;
+    }
+    

    private Path createDirectoryForFile(String fileName, String baseDirectory) {
        // get file name without full path or extension