From 90a103455bebd33a1b8cc0f8a813f37839c194e6 Mon Sep 17 00:00:00 2001 From: Richard Cordovano Date: Sun, 2 Nov 2014 17:51:41 -0500 Subject: [PATCH 01/10] Partially implement multi-stage ingest --- ...ampleExecutableDataSourceIngestModule.java | 1 - .../DataSourceIngestModuleProgress.java | 2 +- .../sleuthkit/autopsy/ingest/IngestJob.java | 844 ++++++++++++------ .../autopsy/ingest/IngestJobContext.java | 27 +- .../autopsy/ingest/IngestManager.java | 14 +- .../ingest/IngestProgressSnapshotPanel.java | 29 +- .../autopsy/ingest/IngestScheduler.java | 678 -------------- .../autopsy/ingest/IngestTasksScheduler.java | 757 ++++++++++++++++ 8 files changed, 1344 insertions(+), 1008 deletions(-) delete mode 100755 Core/src/org/sleuthkit/autopsy/ingest/IngestScheduler.java create mode 100755 Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java diff --git a/Core/src/org/sleuthkit/autopsy/examples/SampleExecutableDataSourceIngestModule.java b/Core/src/org/sleuthkit/autopsy/examples/SampleExecutableDataSourceIngestModule.java index 83641fbb93..98e8029932 100755 --- a/Core/src/org/sleuthkit/autopsy/examples/SampleExecutableDataSourceIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/examples/SampleExecutableDataSourceIngestModule.java @@ -61,7 +61,6 @@ import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE; import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Image; -import org.sleuthkit.datamodel.TskCoreException; import org.w3c.dom.Document; import org.w3c.dom.Element; diff --git a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java index a972fd86a4..1fa66aa07c 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java @@ -67,7 +67,7 @@ public class DataSourceIngestModuleProgress { * @param message Message to display */ public void progress(String message) { - this.job.advanceDataSourceIngestProgressBar(message); + this.job.advanceDataSourceIngestProgressBar(message); // RJCTODO: Is this right? } /** diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java index 65fb083a8a..c1fe37824f 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java @@ -21,7 +21,9 @@ package org.sleuthkit.autopsy.ingest; import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Level; import javax.swing.JOptionPane; import org.netbeans.api.progress.ProgressHandle; @@ -39,13 +41,29 @@ import org.sleuthkit.datamodel.Content; final class IngestJob { private static final Logger logger = Logger.getLogger(IngestJob.class.getName()); - private static final IngestScheduler ingestScheduler = IngestScheduler.getInstance(); + private static final IngestTasksScheduler ingestScheduler = IngestTasksScheduler.getInstance(); + + // These static fields are used for the creation and management of ingest + // jobs in progress. + private static volatile boolean jobCreationIsEnabled; + private static final AtomicLong nextIngestJobId = new AtomicLong(0L); + private static final ConcurrentHashMap ingestJobsById = new ConcurrentHashMap<>(); + + // An ingest job may have multiple stages. + private enum Stages { + + FIRST, // High priority data source ingest modules plus file ingest modules + SECOND // Low priority data source ingest modules + }; // These fields define the ingest job and the work it entails. private final long id; private final Content dataSource; private final boolean processUnallocatedSpace; + private Stages stage; private DataSourceIngestPipeline dataSourceIngestPipeline; + private DataSourceIngestPipeline firstStageDataSourceIngestPipeline; + private DataSourceIngestPipeline secondStageDataSourceIngestPipeline; private final LinkedBlockingQueue fileIngestPipelines; // These fields are used to update the ingest progress UI components. The @@ -68,6 +86,74 @@ final class IngestJob { // This field is used for generating ingest job diagnostic data. private final long startTime; + /** + * Enables and disables ingest job creation. + * + * @param enabled True or false. + */ + static void jobCreationEnabled(boolean enabled) { + IngestJob.jobCreationIsEnabled = enabled; + } + + /** + * Creates an ingest job for a data source. + * + * @param dataSource The data source to ingest. + * @param ingestModuleTemplates The ingest module templates to use to create + * the ingest pipelines for the job. + * @param processUnallocatedSpace Whether or not the job should include + * processing of unallocated space. + * + * @return A collection of ingest module start up errors, empty on success. + * + * @throws InterruptedException + */ + static List startJob(Content dataSource, List ingestModuleTemplates, boolean processUnallocatedSpace) throws InterruptedException { + List errors = new ArrayList<>(); + if (IngestJob.jobCreationIsEnabled) { + long jobId = nextIngestJobId.incrementAndGet(); + IngestJob job = new IngestJob(jobId, dataSource, processUnallocatedSpace); + errors = job.start(ingestModuleTemplates); + if (errors.isEmpty() && (job.hasDataSourceIngestPipeline() || job.hasFileIngestPipeline())) { + ingestJobsById.put(jobId, job); + IngestManager.getInstance().fireIngestJobStarted(jobId); + IngestJob.ingestScheduler.scheduleIngestTasks(job); + logger.log(Level.INFO, "Ingest job {0} started", jobId); + } + } + return errors; + } + + /** + * Queries whether or not ingest jobs are running. + * + * @return True or false. + */ + static boolean ingestJobsAreRunning() { + return !ingestJobsById.isEmpty(); + } + + /** + * RJCTODO + * @return + */ + static List getJobSnapshots() { + List snapShots = new ArrayList<>(); + for (IngestJob job : IngestJob.ingestJobsById.values()) { + snapShots.add(job.getIngestJobSnapshot()); + } + return snapShots; + } + + /** + * RJCTODO + */ + static void cancelAllJobs() { + for (IngestJob job : ingestJobsById.values()) { + job.cancel(); + } + } + /** * Constructs an ingest job. * @@ -80,6 +166,7 @@ final class IngestJob { this.id = id; this.dataSource = dataSource; this.processUnallocatedSpace = processUnallocatedSpace; + this.stage = IngestJob.Stages.FIRST; this.fileIngestPipelines = new LinkedBlockingQueue<>(); this.filesInProgress = new ArrayList<>(); this.dataSourceIngestProgressLock = new Object(); @@ -122,19 +209,344 @@ final class IngestJob { * @throws InterruptedException */ List start(List ingestModuleTemplates) throws InterruptedException { - createIngestPipelines(ingestModuleTemplates); + this.createIngestPipelines(ingestModuleTemplates); List errors = startUpIngestPipelines(); if (errors.isEmpty()) { if (!this.dataSourceIngestPipeline.isEmpty()) { - startDataSourceIngestProgressBar(); + this.startDataSourceIngestProgressBar(); } if (!this.fileIngestPipelines.peek().isEmpty()) { - startFileIngestProgressBar(); + this.startFileIngestProgressBar(); } } return errors; } + /** + * Checks to see if this job has a data source ingest pipeline. + * + * @return True or false. + */ + boolean hasDataSourceIngestPipeline() { + return (this.dataSourceIngestPipeline.isEmpty() == false); + } + + /** + * Checks to see if the job has a file ingest pipeline. + * + * @return True or false. + */ + boolean hasFileIngestPipeline() { + return (this.fileIngestPipelines.peek().isEmpty() == false); + } + + /** + * Passes the data source for this job through the data source ingest + * pipeline. + * + * @param task A data source ingest task wrapping the data source. + * @throws InterruptedException + */ + void process(DataSourceIngestTask task) throws InterruptedException { + try { + if (!this.isCancelled() && !this.dataSourceIngestPipeline.isEmpty()) { + List errors = new ArrayList<>(); + errors.addAll(this.dataSourceIngestPipeline.process(task)); + if (!errors.isEmpty()) { + logIngestModuleErrors(errors); + } + } + + // Shut down the data source ingest progress bar right away. + synchronized (this.dataSourceIngestProgressLock) { + if (null != this.dataSourceIngestProgress) { + this.dataSourceIngestProgress.finish(); + this.dataSourceIngestProgress = null; + } + } + } finally { + // No matter what happens, let the ingest scheduler know that this + // task is completed. + IngestJob.ingestScheduler.notifyTaskCompleted(task); + } + } + + /** + * Passes the a file from the data source for this job through the file + * ingest pipeline. + * + * @param task A file ingest task. + * @throws InterruptedException + */ + void process(FileIngestTask task) throws InterruptedException { + try { + if (!this.isCancelled()) { + // Get a file ingest pipeline not currently in use by another + // file ingest thread. + FileIngestPipeline pipeline = this.fileIngestPipelines.take(); + if (!pipeline.isEmpty()) { + // Get the file to process. + AbstractFile file = task.getFile(); + + // Update the file ingest progress bar. + synchronized (this.fileIngestProgressLock) { + ++this.processedFiles; + if (this.processedFiles <= this.estimatedFilesToProcess) { + this.fileIngestProgress.progress(file.getName(), (int) this.processedFiles); + } else { + this.fileIngestProgress.progress(file.getName(), (int) this.estimatedFilesToProcess); + } + this.filesInProgress.add(file.getName()); + } + + // Run the file through the pipeline. + List errors = new ArrayList<>(); + errors.addAll(pipeline.process(task)); + if (!errors.isEmpty()) { + logIngestModuleErrors(errors); + } + + // Update the file ingest progress bar again in case the + // file was being displayed. + if (!this.cancelled) { + synchronized (this.fileIngestProgressLock) { + this.filesInProgress.remove(file.getName()); + if (this.filesInProgress.size() > 0) { + this.fileIngestProgress.progress(this.filesInProgress.get(0)); + } else { + this.fileIngestProgress.progress(""); + } + } + } + } + + // Relinquish the pipeline so it can be reused by another file + // ingest thread. + this.fileIngestPipelines.put(pipeline); + } + } finally { + // No matter what happens, let the ingest scheduler know that this + // task is completed. + IngestJob.ingestScheduler.notifyTaskCompleted(task); + } + } + + /** + * + * @param file + */ + void addFiles(List files) { + // RJCTODO: Add handling of lack of support for file ingest in second stage + for (AbstractFile file : files) { + try { + // RJCTODO: Deal with possible IllegalStateException; maybe don't need logging here + IngestJob.ingestScheduler.scheduleFileIngestTask(this, file); + } catch (InterruptedException ex) { + // Handle the unexpected interrupt here rather than make ingest + // module writers responsible for writing this exception handler. + // The interrupt flag of the thread is reset for detection by + // the thread task code. + Thread.currentThread().interrupt(); + IngestJob.logger.log(Level.SEVERE, "File task scheduling unexpectedly interrupted", ex); //NON-NLS + } + } + } + + /** + * Allows the ingest tasks scheduler to notify this ingest job whenever all + * the scheduled tasks for this ingest job have been completed. + */ + void notifyTasksCompleted() { + switch (this.stage) { + case FIRST: + this.finishFirstStage(); + this.startSecondStage(); + break; + case SECOND: + this.finish(); + break; + } + } + + /** + * Updates the display name of the data source ingest progress bar. + * + * @param displayName The new display name. + */ + void updateDataSourceIngestProgressBarDisplayName(String displayName) { + if (!this.cancelled) { + synchronized (this.dataSourceIngestProgressLock) { + this.dataSourceIngestProgress.setDisplayName(displayName); + } + } + } + + /** + * Switches the data source progress bar to determinate mode. This should be + * called if the total work units to process the data source is known. + * + * @param workUnits Total number of work units for the processing of the + * data source. + */ + void switchDataSourceIngestProgressBarToDeterminate(int workUnits) { + if (!this.cancelled) { + synchronized (this.dataSourceIngestProgressLock) { + if (null != this.dataSourceIngestProgress) { + this.dataSourceIngestProgress.switchToDeterminate(workUnits); + } + } + } + } + + /** + * Switches the data source ingest progress bar to indeterminate mode. This + * should be called if the total work units to process the data source is + * unknown. + */ + void switchDataSourceIngestProgressBarToIndeterminate() { + if (!this.cancelled) { + synchronized (this.dataSourceIngestProgressLock) { + if (null != this.dataSourceIngestProgress) { + this.dataSourceIngestProgress.switchToIndeterminate(); + } + } + } + } + + /** + * Updates the data source ingest progress bar with the number of work units + * performed, if in the determinate mode. + * + * @param workUnits Number of work units performed. + */ + void advanceDataSourceIngestProgressBar(int workUnits) { + if (!this.cancelled) { + synchronized (this.dataSourceIngestProgressLock) { + if (null != this.dataSourceIngestProgress) { + this.dataSourceIngestProgress.progress("", workUnits); + } + } + } + } + + // RJCTODO: Is this right? + /** + * Updates the data source ingest progress bar display name. + * + * @param displayName The new display name. + */ + void advanceDataSourceIngestProgressBar(String displayName) { + if (!this.cancelled) { + synchronized (this.dataSourceIngestProgressLock) { + if (null != this.dataSourceIngestProgress) { + this.dataSourceIngestProgress.progress(displayName); + } + } + } + } + + /** + * Updates the progress bar with the number of work units performed, if in + * the determinate mode. + * + * @param message Message to display in sub-title + * @param workUnits Number of work units performed. + */ + void advanceDataSourceIngestProgressBar(String message, int workUnits) { + if (!this.cancelled) { + synchronized (this.fileIngestProgressLock) { + this.dataSourceIngestProgress.progress(message, workUnits); + } + } + } + + /** + * Determines whether or not a temporary cancellation of data source ingest + * in order to stop the currently executing data source ingest module is in + * effect. + * + * @return True or false. + */ + boolean currentDataSourceIngestModuleIsCancelled() { + return this.currentDataSourceIngestModuleCancelled; + } + + /** + * Rescind a temporary cancellation of data source ingest in order to stop + * the currently executing data source ingest module. + */ + void currentDataSourceIngestModuleCancellationCompleted() { + this.currentDataSourceIngestModuleCancelled = false; + + // A new progress bar must be created because the cancel button of the + // previously constructed component is disabled by NetBeans when the + // user selects the "OK" button of the cancellation confirmation dialog + // popped up by NetBeans when the progress bar cancel button was + // pressed. + synchronized (this.dataSourceIngestProgressLock) { + this.dataSourceIngestProgress.finish(); + this.dataSourceIngestProgress = null; + this.startDataSourceIngestProgressBar(); + } + } + + /** + * Requests cancellation of ingest, i.e., a shutdown of the data source and + * file ingest pipelines. + */ + void cancel() { + // Put a cancellation message on data source ingest progress bar, + // if it is still running. + synchronized (this.dataSourceIngestProgressLock) { + if (dataSourceIngestProgress != null) { + final String displayName = NbBundle.getMessage(this.getClass(), + "IngestJob.progress.dataSourceIngest.initialDisplayName", + dataSource.getName()); + dataSourceIngestProgress.setDisplayName( + NbBundle.getMessage(this.getClass(), + "IngestJob.progress.cancelling", + displayName)); + } + } + + // Put a cancellation message on the file ingest progress bar, + // if it is still running. + synchronized (this.fileIngestProgressLock) { + if (this.fileIngestProgress != null) { + final String displayName = NbBundle.getMessage(this.getClass(), + "IngestJob.progress.fileIngest.displayName", + this.dataSource.getName()); + this.fileIngestProgress.setDisplayName( + NbBundle.getMessage(this.getClass(), "IngestJob.progress.cancelling", + displayName)); + } + } + + this.cancelled = true; + + // Tell the ingest scheduler to cancel all pending tasks. + IngestJob.ingestScheduler.cancelPendingTasksForIngestJob(this); + } + + /** + * Queries whether or not cancellation of ingest i.e., a shutdown of the + * data source and file ingest pipelines, has been requested + * + * @return True or false. + */ + boolean isCancelled() { + return this.cancelled; + } + + /** + * Get some basic performance statistics on this job. + * + * @return An ingest job statistics object. + */ + IngestJobSnapshot getIngestJobSnapshot() { + return new IngestJobSnapshot(); + } + /** * Creates the file and data source ingest pipelines. * @@ -143,10 +555,28 @@ final class IngestJob { * @throws InterruptedException */ private void createIngestPipelines(List ingestModuleTemplates) throws InterruptedException { - this.dataSourceIngestPipeline = new DataSourceIngestPipeline(this, ingestModuleTemplates); + // RJCTODO: Use config file + // Sort the ingest module templates as required for the pipelines. + List firstStageDataSourceModuleTemplates = new ArrayList<>(); + List secondStageDataSourceModuleTemplates = new ArrayList<>(); + List fileIngestModuleTemplates = new ArrayList<>(); + for (IngestModuleTemplate template : ingestModuleTemplates) { + if (template.isDataSourceIngestModuleTemplate()) { + firstStageDataSourceModuleTemplates.add(template); + } else { + firstStageDataSourceModuleTemplates.add(template); + } + } + + // Contruct the pipelines. + this.firstStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, firstStageDataSourceModuleTemplates); + this.secondStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, secondStageDataSourceModuleTemplates); + this.dataSourceIngestPipeline = firstStageDataSourceIngestPipeline; + + // Construct the file ingest pipelines. int numberOfFileIngestThreads = IngestManager.getInstance().getNumberOfFileIngestThreads(); for (int i = 0; i < numberOfFileIngestThreads; ++i) { - this.fileIngestPipelines.put(new FileIngestPipeline(this, ingestModuleTemplates)); + this.fileIngestPipelines.put(new FileIngestPipeline(this, fileIngestModuleTemplates)); } } @@ -160,9 +590,12 @@ final class IngestJob { private List startUpIngestPipelines() throws InterruptedException { List errors = new ArrayList<>(); - // Start up the data source ingest pipeline. + // Start up the first stage data source ingest pipeline. errors.addAll(this.dataSourceIngestPipeline.startUp()); + // Start up the second stage data source ingest pipeline. + errors.addAll(this.secondStageDataSourceIngestPipeline.startUp()); + // Start up the file ingest pipelines (one per file ingest thread). for (FileIngestPipeline pipeline : this.fileIngestPipelines) { errors.addAll(pipeline.startUp()); @@ -249,201 +682,10 @@ final class IngestJob { } /** - * Checks to see if this job has a data source ingest pipeline. - * - * @return True or false. + * Shuts down the file ingest pipelines and current progress bars, if any, + * for this job. */ - boolean hasDataSourceIngestPipeline() { - return (this.dataSourceIngestPipeline.isEmpty() == false); - } - - /** - * Checks to see if the job has a file ingest pipeline. - * - * @return True or false. - */ - boolean hasFileIngestPipeline() { - return (this.fileIngestPipelines.peek().isEmpty() == false); - } - - /** - * Passes the data source for this job through the data source ingest - * pipeline. - * - * @param task A data source ingest task wrapping the data source. - * @throws InterruptedException - */ - void process(DataSourceIngestTask task) throws InterruptedException { - try { - if (!this.isCancelled() && !this.dataSourceIngestPipeline.isEmpty()) { - List errors = new ArrayList<>(); - errors.addAll(this.dataSourceIngestPipeline.process(task)); - if (!errors.isEmpty()) { - logIngestModuleErrors(errors); - } - } - - // The single data source ingest task for this job is done, so shut - // down the data source ingest progress bar right away. - synchronized (this.dataSourceIngestProgressLock) { - if (null != this.dataSourceIngestProgress) { - this.dataSourceIngestProgress.finish(); - this.dataSourceIngestProgress = null; - } - } - } finally { - // No matter what happens, let the ingest scheduler know that this - // task is completed. - IngestJob.ingestScheduler.notifyTaskCompleted(task); - } - } - - /** - * Updates the display name of the data source ingest progress bar. - * - * @param displayName The new display name. - */ - void updateDataSourceIngestProgressBarDisplayName(String displayName) { - if (!this.cancelled) { - synchronized (this.dataSourceIngestProgressLock) { - this.dataSourceIngestProgress.setDisplayName(displayName); - } - } - } - - /** - * Updates the data source progress bar and switches it to determinate mode. - * - * @param workUnits Total number of work units for the processing of the - * data source. - */ - void switchDataSourceIngestProgressBarToDeterminate(int workUnits) { - if (!this.cancelled) { - synchronized (this.dataSourceIngestProgressLock) { - this.dataSourceIngestProgress.switchToDeterminate(workUnits); - } - } - } - - /** - * Switches the data source ingest progress bar to indeterminate mode. This - * should be called if the total work units to process the data source is - * unknown. - */ - void switchDataSourceIngestProgressBarToIndeterminate() { - if (!this.cancelled) { - synchronized (this.dataSourceIngestProgressLock) { - this.dataSourceIngestProgress.switchToIndeterminate(); - } - } - } - - /** - * Updates the data source ingest progress bar with the number of work units - * performed, if in the determinate mode. - * - * @param workUnits Number of work units performed. - */ - void advanceDataSourceIngestProgressBar(int workUnits) { - if (!this.cancelled) { - synchronized (this.dataSourceIngestProgressLock) { - this.dataSourceIngestProgress.progress("", workUnits); - } - } - } - - /** - * Updates the data source ingest progress bar display name. - * - * @param displayName The new display name. - */ - void advanceDataSourceIngestProgressBar(String displayName) { - if (!this.cancelled) { - synchronized (this.dataSourceIngestProgressLock) { - this.dataSourceIngestProgress.progress(displayName); - } - } - } - - /** - * Updates the progress bar with the number of work units performed, if in - * the determinate mode. - * - * @param message Message to display in sub-title - * @param workUnits Number of work units performed. - */ - void advanceDataSourceIngestProgressBar(String message, int workUnits) { - if (!this.cancelled) { - synchronized (this.fileIngestProgressLock) { - this.dataSourceIngestProgress.progress(message, workUnits); - } - } - } - - /** - * Passes the a file from the data source for this job through the file - * ingest pipeline. - * - * @param task A file ingest task. - * @throws InterruptedException - */ - void process(FileIngestTask task) throws InterruptedException { - try { - if (!this.isCancelled()) { - // Get a file ingest pipeline not currently in use by another - // file ingest thread. - FileIngestPipeline pipeline = this.fileIngestPipelines.take(); - if (!pipeline.isEmpty()) { - // Get the file to process. - AbstractFile file = task.getFile(); - - // Update the file ingest progress bar. - synchronized (this.fileIngestProgressLock) { - ++this.processedFiles; - if (this.processedFiles <= this.estimatedFilesToProcess) { - this.fileIngestProgress.progress(file.getName(), (int) this.processedFiles); - } else { - this.fileIngestProgress.progress(file.getName(), (int) this.estimatedFilesToProcess); - } - this.filesInProgress.add(file.getName()); - } - - // Run the file through the pipeline. - List errors = new ArrayList<>(); - errors.addAll(pipeline.process(task)); - if (!errors.isEmpty()) { - logIngestModuleErrors(errors); - } - - // Update the file ingest progress bar again in case the - // file was being displayed. - if (!this.cancelled) { - synchronized (this.fileIngestProgressLock) { - this.filesInProgress.remove(file.getName()); - if (this.filesInProgress.size() > 0) { - this.fileIngestProgress.progress(this.filesInProgress.get(0)); - } else { - this.fileIngestProgress.progress(""); - } - } - } - } - - // Relinquish the pipeline so it can be reused by another file - // ingest thread. - this.fileIngestPipelines.put(pipeline); - } - } finally { - // No matter what happens, let the ingest scheduler know that this - // task is completed. - IngestJob.ingestScheduler.notifyTaskCompleted(task); - } - } - - /** - * Shuts down the ingest pipelines and progress bars for this job. - */ - void finish() { + private void finishFirstStage() { // Shut down the file ingest pipelines. Note that no shut down is // required for the data source ingest pipeline because data source // ingest modules do not have a shutdown() method. @@ -456,8 +698,8 @@ final class IngestJob { logIngestModuleErrors(errors); } - // Finish the data source ingest progress bar, if it hasn't already - // been finished. + // Finish the first stage data source ingest progress bar, if it hasn't + // already been finished. synchronized (this.dataSourceIngestProgressLock) { if (this.dataSourceIngestProgress != null) { this.dataSourceIngestProgress.finish(); @@ -475,6 +717,48 @@ final class IngestJob { } } + /** + * RJCTODO + */ + private void startSecondStage() { + this.stage = IngestJob.Stages.SECOND; + if (!this.cancelled && !this.secondStageDataSourceIngestPipeline.isEmpty()) { + this.dataSourceIngestPipeline = this.secondStageDataSourceIngestPipeline; + this.startDataSourceIngestProgressBar(); + try { + IngestJob.ingestScheduler.scheduleDataSourceIngestTask(this); + } catch (InterruptedException ex) { + // RJCTODO: + this.finish(); + } + } else { + this.finish(); + } + } + + /** + * Shuts down the ingest pipelines and progress bars for this job. + */ + private void finish() { + // Finish the second stage data source ingest progress bar, if it hasn't + // already been finished. + synchronized (this.dataSourceIngestProgressLock) { + if (this.dataSourceIngestProgress != null) { + this.dataSourceIngestProgress.finish(); + this.dataSourceIngestProgress = null; + } + } + + IngestJob.ingestJobsById.remove(this.id); + if (!this.isCancelled()) { + logger.log(Level.INFO, "Ingest job {0} completed", this.id); + IngestManager.getInstance().fireIngestJobCompleted(this.id); + } else { + logger.log(Level.INFO, "Ingest job {0} cancelled", this.id); + IngestManager.getInstance().fireIngestJobCancelled(this.id); + } + } + /** * Write ingest module errors to the log. * @@ -494,116 +778,51 @@ final class IngestJob { this.currentDataSourceIngestModuleCancelled = true; } - /** - * Determines whether or not a temporary cancellation of data source ingest - * in order to stop the currently executing data source ingest module is in - * effect. - * - * @return True or false. - */ - boolean currentDataSourceIngestModuleIsCancelled() { - return this.currentDataSourceIngestModuleCancelled; - } - - /** - * Rescind a temporary cancellation of data source ingest in order to stop - * the currently executing data source ingest module. - */ - void currentDataSourceIngestModuleCancellationCompleted() { - this.currentDataSourceIngestModuleCancelled = false; - - // A new progress bar must be created because the cancel button of the - // previously constructed component is disabled by NetBeans when the - // user selects the "OK" button of the cancellation confirmation dialog - // popped up by NetBeans when the progress bar cancel button was - // pressed. - synchronized (this.dataSourceIngestProgressLock) { - this.dataSourceIngestProgress.finish(); - this.dataSourceIngestProgress = null; - this.startDataSourceIngestProgressBar(); - } - } - - /** - * Requests cancellation of ingest, i.e., a shutdown of the data source and - * file ingest pipelines. - */ - void cancel() { - // Put a cancellation message on data source ingest progress bar, - // if it is still running. - synchronized (this.dataSourceIngestProgressLock) { - if (dataSourceIngestProgress != null) { - final String displayName = NbBundle.getMessage(this.getClass(), - "IngestJob.progress.dataSourceIngest.initialDisplayName", - dataSource.getName()); - dataSourceIngestProgress.setDisplayName( - NbBundle.getMessage(this.getClass(), - "IngestJob.progress.cancelling", - displayName)); - } - } - - // Put a cancellation message on the file ingest progress bar, - // if it is still running. - synchronized (this.fileIngestProgressLock) { - if (this.fileIngestProgress != null) { - final String displayName = NbBundle.getMessage(this.getClass(), - "IngestJob.progress.fileIngest.displayName", - this.dataSource.getName()); - this.fileIngestProgress.setDisplayName( - NbBundle.getMessage(this.getClass(), "IngestJob.progress.cancelling", - displayName)); - } - } - - this.cancelled = true; - - // Tell the ingest scheduler to cancel all pending tasks. - IngestJob.ingestScheduler.cancelPendingTasksForIngestJob(this); - } - - /** - * Queries whether or not cancellation of ingest i.e., a shutdown of the - * data source and file ingest pipelines, has been requested - * - * @return True or false. - */ - boolean isCancelled() { - return this.cancelled; - } - - /** - * Get some basic performance statistics on this job. - * - * @return An ingest job statistics object. - */ - IngestJobStats getStats() { - return new IngestJobStats(); - } - /** * Stores basic diagnostic statistics for an ingest job. */ - class IngestJobStats { + class IngestJobSnapshot { + private final long jobId; + private final String dataSource; private final long startTime; private final long processedFiles; private final long estimatedFilesToProcess; private final long snapShotTime; + private final IngestTasksScheduler.IngestJobTasksSnapshot tasksSnapshot; /** * Constructs an object to stores basic diagnostic statistics for an * ingest job. */ - IngestJobStats() { + IngestJobSnapshot() { + this.jobId = IngestJob.this.id; + this.dataSource = IngestJob.this.dataSource.getName(); this.startTime = IngestJob.this.startTime; synchronized (IngestJob.this.fileIngestProgressLock) { this.processedFiles = IngestJob.this.processedFiles; this.estimatedFilesToProcess = IngestJob.this.estimatedFilesToProcess; this.snapShotTime = new Date().getTime(); } + this.tasksSnapshot = IngestJob.ingestScheduler.getTasksSnapshotForJob(this.jobId); } + /** + * RJCTODO + * @return + */ + long getJobId() { + return this.jobId; + } + + /** + * RJCTODO + * @return + */ + String getDataSource() { + return dataSource; + } + /** * Gets files per second throughput since job started. * @@ -651,6 +870,47 @@ final class IngestJob { long getFilesEstimated() { return estimatedFilesToProcess; } + + /** + * RJCTODO + * @return + */ + long getRootQueueSize() { + return this.tasksSnapshot.getRootQueueSize(); + } + + /** + * RJCTODO + * @return + */ + long getDirQueueSize() { + return this.tasksSnapshot.getDirQueueSize(); + } + + /** + * RJCTODO + * @return + */ + long getFileQueueSize() { + return this.tasksSnapshot.getFileQueueSize(); + } + + /** + * RJCTODO + * @return + */ + long getDsQueueSize() { + return this.tasksSnapshot.getDsQueueSize(); + } + + /** + * RJCTODO + * @return + */ + long getRunningListSize() { + return this.tasksSnapshot.getRunningListSize(); + } + } } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java index dc0248ad11..6d81f9db79 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java @@ -19,7 +19,6 @@ package org.sleuthkit.autopsy.ingest; import java.util.List; -import java.util.logging.Level; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.Content; @@ -31,7 +30,6 @@ import org.sleuthkit.datamodel.Content; public final class IngestJobContext { private static final Logger logger = Logger.getLogger(IngestJobContext.class.getName()); - private static final IngestScheduler scheduler = IngestScheduler.getInstance(); private final IngestJob ingestJob; IngestJobContext(IngestJob ingestJob) { @@ -107,20 +105,21 @@ public final class IngestJobContext { * pipeline of the ingest job associated with this context. * * @param files The files to be processed by the file ingest pipeline. + * @deprecated use addFilesToJob() instead */ + @Deprecated public void scheduleFiles(List files) { - for (AbstractFile file : files) { - try { - IngestJobContext.scheduler.scheduleAdditionalFileIngestTask(this.ingestJob, file); - } catch (InterruptedException ex) { - // Handle the unexpected interrupt here rather than make ingest - // module writers responsible for writing this exception handler. - // The interrupt flag of the thread is reset for detection by - // the thread task code. - Thread.currentThread().interrupt(); - IngestJobContext.logger.log(Level.SEVERE, "File task scheduling unexpectedly interrupted", ex); //NON-NLS - } - } + this.addFilesToJob(files); + } + + /** + * Adds one or more files to the files to be passed through the file ingest + * pipeline of the ingest job associated with this context. + * + * @param files The files to be processed by the file ingest pipeline. + */ + public void addFilesToJob(List files) { + this.ingestJob.addFiles(files); } } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java index a86b9ff78c..b04f973d70 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java @@ -134,7 +134,7 @@ public class IngestManager { */ private void startDataSourceIngestTask() { long threadId = nextThreadId.incrementAndGet(); - dataSourceIngestThreadPool.submit(new ExecuteIngestTasksTask(threadId, IngestScheduler.getInstance().getDataSourceIngestTaskQueue())); + dataSourceIngestThreadPool.submit(new ExecuteIngestTasksTask(threadId, IngestTasksScheduler.getInstance().getDataSourceIngestTaskQueue())); ingestThreadActivitySnapshots.put(threadId, new IngestThreadActivitySnapshot(threadId)); } @@ -144,7 +144,7 @@ public class IngestManager { */ private void startFileIngestTask() { long threadId = nextThreadId.incrementAndGet(); - fileIngestThreadPool.submit(new ExecuteIngestTasksTask(threadId, IngestScheduler.getInstance().getFileIngestTaskQueue())); + fileIngestThreadPool.submit(new ExecuteIngestTasksTask(threadId, IngestTasksScheduler.getInstance().getFileIngestTaskQueue())); ingestThreadActivitySnapshots.put(threadId, new IngestThreadActivitySnapshot(threadId)); } @@ -174,12 +174,12 @@ public class IngestManager { } void handleCaseOpened() { - IngestScheduler.getInstance().setEnabled(true); + IngestJob.jobCreationEnabled(true); clearIngestMessageBox(); } void handleCaseClosed() { - IngestScheduler.getInstance().setEnabled(false); + IngestJob.jobCreationEnabled(false); cancelAllIngestJobs(); clearIngestMessageBox(); } @@ -197,7 +197,7 @@ public class IngestManager { * @return True if any ingest jobs are in progress, false otherwise. */ public boolean isIngestRunning() { - return IngestScheduler.getInstance().ingestJobsAreRunning(); + return IngestJob.ingestJobsAreRunning(); } @@ -293,7 +293,7 @@ public class IngestManager { } // Cancel all the jobs already created. - IngestScheduler.getInstance().cancelAllIngestJobs(); + IngestJob.cancelAllJobs(); } /** @@ -555,7 +555,7 @@ public class IngestManager { } // Start an ingest job for the data source. - List errors = IngestScheduler.getInstance().startIngestJob(dataSource, moduleTemplates, processUnallocatedSpace); + List errors = IngestJob.startJob(dataSource, moduleTemplates, processUnallocatedSpace); if (!errors.isEmpty()) { // Report the errors to the user. They have already been logged. StringBuilder moduleStartUpErrors = new StringBuilder(); diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestProgressSnapshotPanel.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestProgressSnapshotPanel.java index dff5c2c856..61dd097f6f 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestProgressSnapshotPanel.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestProgressSnapshotPanel.java @@ -29,7 +29,6 @@ import javax.swing.table.AbstractTableModel; import javax.swing.table.TableColumn; import org.apache.commons.lang3.time.DurationFormatUtils; import org.openide.util.NbBundle; -import org.sleuthkit.autopsy.ingest.IngestScheduler.IngestJobSchedulerStats; public class IngestProgressSnapshotPanel extends javax.swing.JPanel { @@ -161,20 +160,20 @@ public class IngestProgressSnapshotPanel extends javax.swing.JPanel { private final String[] columnNames = {"Job ID", "Data Source", "Start", "Num Processed", "Files/Sec", "In Progress", "Files Queued", "Dir Queued", "Root Queued", "DS Queued"}; - private List schedStats; + private List jobSnapshots; private IngestJobTableModel() { refresh(); } private void refresh() { - schedStats = IngestScheduler.getInstance().getJobStats(); + jobSnapshots = IngestJob.getJobSnapshots(); fireTableDataChanged(); } @Override public int getRowCount() { - return schedStats.size(); + return jobSnapshots.size(); } @Override @@ -189,39 +188,39 @@ public class IngestProgressSnapshotPanel extends javax.swing.JPanel { @Override public Object getValueAt(int rowIndex, int columnIndex) { - IngestJobSchedulerStats schedStat = schedStats.get(rowIndex); + IngestJob.IngestJobSnapshot snapShot = jobSnapshots.get(rowIndex); Object cellValue; switch (columnIndex) { case 0: - cellValue = schedStat.getJobId(); + cellValue = snapShot.getJobId(); break; case 1: - cellValue = schedStat.getDataSource(); + cellValue = snapShot.getDataSource(); break; case 2: SimpleDateFormat dateFormat = new SimpleDateFormat("HH:mm:ss"); - cellValue = dateFormat.format(new Date(schedStat.getIngestJobStats().getStartTime())); + cellValue = dateFormat.format(new Date(snapShot.getStartTime())); break; case 3: - cellValue = schedStat.getIngestJobStats().getFilesProcessed(); + cellValue = snapShot.getFilesProcessed(); break; case 4: - cellValue = schedStat.getIngestJobStats().getSpeed(); + cellValue = snapShot.getSpeed(); break; case 5: - cellValue = schedStat.getRunningListSize(); + cellValue = snapShot.getRunningListSize(); break; case 6: - cellValue = schedStat.getFileQueueSize(); + cellValue = snapShot.getFileQueueSize(); break; case 7: - cellValue = schedStat.getDirQueueSize(); + cellValue = snapShot.getDirQueueSize(); break; case 8: - cellValue = schedStat.getRootQueueSize(); + cellValue = snapShot.getRootQueueSize(); break; case 9: - cellValue = schedStat.getDsQueueSize(); + cellValue = snapShot.getDsQueueSize(); break; default: cellValue = null; diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestScheduler.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestScheduler.java deleted file mode 100755 index 4fef5eaad9..0000000000 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestScheduler.java +++ /dev/null @@ -1,678 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2012-2014 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.ingest; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Comparator; -import java.util.Iterator; -import java.util.List; -import java.util.TreeSet; -import java.util.concurrent.BlockingDeque; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.LinkedBlockingDeque; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.atomic.AtomicLong; -import java.util.logging.Level; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.sleuthkit.autopsy.coreutils.Logger; -import org.sleuthkit.autopsy.ingest.IngestJob.IngestJobStats; -import org.sleuthkit.datamodel.AbstractFile; -import org.sleuthkit.datamodel.Content; -import org.sleuthkit.datamodel.File; -import org.sleuthkit.datamodel.FileSystem; -import org.sleuthkit.datamodel.TskCoreException; -import org.sleuthkit.datamodel.TskData; - -/** - * Creates ingest jobs and their constituent ingest tasks, queuing the tasks in - * priority order for execution by the ingest manager's ingest threads. - */ -final class IngestScheduler { - - private static final Logger logger = Logger.getLogger(IngestScheduler.class.getName()); - - private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue(); - - private static IngestScheduler instance = null; - - private final AtomicLong nextIngestJobId = new AtomicLong(0L); - - private final ConcurrentHashMap ingestJobsById = new ConcurrentHashMap<>(); - - private volatile boolean enabled = false; - - private final DataSourceIngestTaskQueue dataSourceTaskDispenser = new DataSourceIngestTaskQueue(); - - private final FileIngestTaskQueue fileTaskDispenser = new FileIngestTaskQueue(); - - // The following five collections lie at the heart of the scheduler. - // The pending tasks queues are used to schedule tasks for an ingest job. If - // multiple jobs are scheduled, tasks from different jobs may become - // interleaved in these queues. - // FIFO queue for data source-level tasks. - private final LinkedBlockingQueue pendingDataSourceTasks = new LinkedBlockingQueue<>(); // Guarded by this - - // File tasks are "shuffled" - // through root directory (priority queue), directory (LIFO), and file tasks - // queues (LIFO). If a file task makes it into the pending file tasks queue, - // it is consumed by the ingest threads. - private final TreeSet pendingRootDirectoryTasks = new TreeSet<>(new RootDirectoryTaskComparator()); // Guarded by this - - private final List pendingDirectoryTasks = new ArrayList<>(); // Guarded by this - - private final BlockingDeque pendingFileTasks = new LinkedBlockingDeque<>(); // Not guarded - - // The "tasks in progress" list has: - // - File and data source tasks that are running - // - File tasks that are in the pending file queue - // It is used to determine when a job is done. It has both pending and running - // tasks because we do not lock the 'pendingFileTasks' and a task needs to be in - // at least one of the pending or inprogress lists at all times before it is completed. - // files are added to this when the are added to pendingFilesTasks and removed when they complete - private final List tasksInProgressAndPending = new ArrayList<>(); // Guarded by this - - synchronized static IngestScheduler getInstance() { - if (instance == null) { - instance = new IngestScheduler(); - } - return instance; - } - - private IngestScheduler() { - } - - void setEnabled(boolean enabled) { - this.enabled = enabled; - } - - /** - * Creates an ingest job for a data source. - * - * @param dataSource The data source to ingest. - * @param ingestModuleTemplates The ingest module templates to use to create - * the ingest pipelines for the job. - * @param processUnallocatedSpace Whether or not the job should include - * processing of unallocated space. - * - * @return A collection of ingest module start up errors, empty on success. - * - * @throws InterruptedException - */ - List startIngestJob(Content dataSource, List ingestModuleTemplates, boolean processUnallocatedSpace) throws InterruptedException { - List errors = new ArrayList<>(); - if (enabled) { - long jobId = nextIngestJobId.incrementAndGet(); - IngestJob job = new IngestJob(jobId, dataSource, processUnallocatedSpace); - errors = job.start(ingestModuleTemplates); - if (errors.isEmpty() && (job.hasDataSourceIngestPipeline() || job.hasFileIngestPipeline())) { - ingestJobsById.put(jobId, job); - IngestManager.getInstance().fireIngestJobStarted(jobId); - scheduleIngestTasks(job); - logger.log(Level.INFO, "Ingest job {0} started", jobId); - } - } - return errors; - } - - synchronized private void scheduleIngestTasks(IngestJob job) throws InterruptedException { - // This is synchronized to guard the task queues and make ingest - // scheduling for a job an an atomic operation. Otherwise, the data - // source task might be completed before the file tasks were scheduled, - // resulting in a false positive for a job completion check. - if (job.hasDataSourceIngestPipeline()) { - scheduleDataSourceIngestTask(job); - } - if (job.hasFileIngestPipeline()) { - scheduleFileIngestTasks(job); - } - } - - synchronized private void scheduleDataSourceIngestTask(IngestJob job) throws InterruptedException { - DataSourceIngestTask task = new DataSourceIngestTask(job); - tasksInProgressAndPending.add(task); - try { - // Should not block, queue is (theoretically) unbounded. - pendingDataSourceTasks.put(task); - } catch (InterruptedException ex) { - tasksInProgressAndPending.remove(task); - Logger.getLogger(IngestScheduler.class.getName()).log(Level.SEVERE, "Interruption of unexpected block on pending data source tasks queue", ex); //NON-NLS - throw ex; - } - } - - synchronized private void scheduleFileIngestTasks(IngestJob job) throws InterruptedException { - List topLevelFiles = getTopLevelFiles(job.getDataSource()); - for (AbstractFile firstLevelFile : topLevelFiles) { - FileIngestTask task = new FileIngestTask(job, firstLevelFile); - if (shouldEnqueueFileTask(task)) { - pendingRootDirectoryTasks.add(task); - } - } - updatePendingFileTasksQueues(); - } - - private static List getTopLevelFiles(Content dataSource) { - List topLevelFiles = new ArrayList<>(); - Collection rootObjects = dataSource.accept(new GetRootDirectoryVisitor()); - if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) { - // The data source is itself a file to be processed. - topLevelFiles.add((AbstractFile) dataSource); - } else { - for (AbstractFile root : rootObjects) { - List children; - try { - children = root.getChildren(); - if (children.isEmpty()) { - // Add the root object itself, it could be an unallocated - // space file, or a child of a volume or an image. - topLevelFiles.add(root); - } else { - // The root object is a file system root directory, get - // the files within it. - for (Content child : children) { - if (child instanceof AbstractFile) { - topLevelFiles.add((AbstractFile) child); - } - } - } - } catch (TskCoreException ex) { - logger.log(Level.WARNING, "Could not get children of root to enqueue: " + root.getId() + ": " + root.getName(), ex); //NON-NLS - } - } - } - return topLevelFiles; - } - - synchronized private void updatePendingFileTasksQueues() throws InterruptedException { - // This is synchronized to guard the pending file tasks queues and make - // this an atomic operation. - if (enabled) { - while (true) { - // Loop until either the pending file tasks queue is NOT empty - // or the upstream queues that feed into it ARE empty. - if (pendingFileTasks.isEmpty() == false) { - return; - } - if (pendingDirectoryTasks.isEmpty()) { - if (pendingRootDirectoryTasks.isEmpty()) { - return; - } - pendingDirectoryTasks.add(pendingRootDirectoryTasks.pollFirst()); - } - - // Try to add the most recently added from the pending directory tasks queue to - // the pending file tasks queue. - boolean tasksEnqueuedForDirectory = false; - FileIngestTask directoryTask = pendingDirectoryTasks.remove(pendingDirectoryTasks.size() - 1); - if (shouldEnqueueFileTask(directoryTask)) { - addToPendingFileTasksQueue(directoryTask); - tasksEnqueuedForDirectory = true; - } - - // If the directory contains subdirectories or files, try to - // enqueue tasks for them as well. - final AbstractFile directory = directoryTask.getFile(); - try { - for (Content child : directory.getChildren()) { - if (child instanceof AbstractFile) { - AbstractFile file = (AbstractFile) child; - FileIngestTask childTask = new FileIngestTask(directoryTask.getIngestJob(), file); - if (file.hasChildren()) { - // Found a subdirectory, put the task in the - // pending directory tasks queue. - pendingDirectoryTasks.add(childTask); - tasksEnqueuedForDirectory = true; - } else if (shouldEnqueueFileTask(childTask)) { - // Found a file, put the task directly into the - // pending file tasks queue. - addToPendingFileTasksQueue(childTask); - tasksEnqueuedForDirectory = true; - } - } - } - } catch (TskCoreException ex) { - String errorMessage = String.format("An error occurred getting the children of %s", directory.getName()); //NON-NLS - logger.log(Level.SEVERE, errorMessage, ex); - } - - // In the case where the directory task is not pushed into the - // the pending file tasks queue and has no children, check to - // see if the job is completed - the directory task might have - // been the last task for the job. - if (!tasksEnqueuedForDirectory) { - IngestJob job = directoryTask.getIngestJob(); - if (ingestJobIsComplete(job)) { - finishIngestJob(job); - } - } - } - } - } - - private static boolean shouldEnqueueFileTask(final FileIngestTask processTask) { - final AbstractFile aFile = processTask.getFile(); - //if it's unalloc file, skip if so scheduled - if (processTask.getIngestJob().shouldProcessUnallocatedSpace() == false && aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) { - return false; - } - String fileName = aFile.getName(); - if (fileName.equals(".") || fileName.equals("..")) { - return false; - } else if (aFile instanceof org.sleuthkit.datamodel.File) { - final org.sleuthkit.datamodel.File f = (File) aFile; - //skip files in root dir, starting with $, containing : (not default attributes) - //with meta address < 32, i.e. some special large NTFS and FAT files - FileSystem fs = null; - try { - fs = f.getFileSystem(); - } catch (TskCoreException ex) { - logger.log(Level.SEVERE, "Could not get FileSystem for " + f, ex); //NON-NLS - } - TskData.TSK_FS_TYPE_ENUM fsType = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_UNSUPP; - if (fs != null) { - fsType = fs.getFsType(); - } - if ((fsType.getValue() & FAT_NTFS_FLAGS) == 0) { - //not fat or ntfs, accept all files - return true; - } - boolean isInRootDir = false; - try { - isInRootDir = f.getParentDirectory().isRoot(); - } catch (TskCoreException ex) { - logger.log(Level.WARNING, "Could not check if should enqueue the file: " + f.getName(), ex); //NON-NLS - } - if (isInRootDir && f.getMetaAddr() < 32) { - String name = f.getName(); - if (name.length() > 0 && name.charAt(0) == '$' && name.contains(":")) { - return false; - } - } else { - return true; - } - } - return true; - } - - synchronized private void addToPendingFileTasksQueue(FileIngestTask task) throws IllegalStateException { - tasksInProgressAndPending.add(task); - try { - // Should not block, queue is (theoretically) unbounded. - /* add to top of list because we had one image that had a folder - * with - * lots of zip files. This queue had thousands of entries because - * it just kept on getting bigger and bigger. So focus on pushing - * out - * the ZIP file contents out of the queue to try to keep it small. - */ - pendingFileTasks.addFirst(task); - } catch (IllegalStateException ex) { - tasksInProgressAndPending.remove(task); - Logger.getLogger(IngestScheduler.class.getName()).log(Level.SEVERE, "Interruption of unexpected block on pending file tasks queue", ex); //NON-NLS - throw ex; - } - } - - void scheduleAdditionalFileIngestTask(IngestJob job, AbstractFile file) throws InterruptedException { - if (enabled) { - FileIngestTask task = new FileIngestTask(job, file); - if (shouldEnqueueFileTask(task)) { - // Send the file task directly to file tasks queue, no need to - // update the pending root directory or pending directory tasks - // queues. - addToPendingFileTasksQueue(task); - } - } - } - - IngestTaskQueue getDataSourceIngestTaskQueue() { - return dataSourceTaskDispenser; - } - - IngestTaskQueue getFileIngestTaskQueue() { - return fileTaskDispenser; - } - - void notifyTaskCompleted(IngestTask task) { - boolean jobIsCompleted; - IngestJob job = task.getIngestJob(); - synchronized (this) { - tasksInProgressAndPending.remove(task); - jobIsCompleted = ingestJobIsComplete(job); - } - if (jobIsCompleted) { - // The lock does not need to be held for the job shut down. - finishIngestJob(job); - } - } - - /** - * Queries whether or not ingest jobs are running. - * - * @return True or false. - */ - boolean ingestJobsAreRunning() { - return !ingestJobsById.isEmpty(); - } - - /** - * Clears the pending ingest task queues for an ingest job. If job is - * complete (no pending or in progress tasks) the job is finished up. - * Otherwise, the last worker thread with an in progress task will finish / - * clean up the job. - * - * @param job The job to cancel. - */ - synchronized void cancelPendingTasksForIngestJob(IngestJob job) { - long jobId = job.getId(); - removeAllPendingTasksForJob(pendingRootDirectoryTasks, jobId); - removeAllPendingTasksForJob(pendingDirectoryTasks, jobId); - removeAllPendingTasksForJob(pendingFileTasks, jobId); - removeAllPendingTasksForJob(pendingDataSourceTasks, jobId); - if (ingestJobIsComplete(job)) { - finishIngestJob(job); - } - } - - /** - * Return the number of tasks in the queue for the given job ID - * - * @param - * @param queue - * @param jobId - * - * @return - */ - int countJobsInCollection(Collection queue, long jobId) { - Iterator iterator = queue.iterator(); - int count = 0; - while (iterator.hasNext()) { - IngestTask task = (IngestTask) iterator.next(); - if (task.getIngestJob().getId() == jobId) { - count++; - } - } - return count; - } - - synchronized private void removeAllPendingTasksForJob(Collection taskQueue, long jobId) { - Iterator iterator = taskQueue.iterator(); - while (iterator.hasNext()) { - IngestTask task = iterator.next(); - if (task.getIngestJob().getId() == jobId) { - tasksInProgressAndPending.remove(task); - iterator.remove(); - } - } - } - - void cancelAllIngestJobs() { - synchronized (this) { - removeAllPendingTasks(pendingRootDirectoryTasks); - removeAllPendingTasks(pendingDirectoryTasks); - removeAllPendingTasks(pendingFileTasks); - removeAllPendingTasks(pendingDataSourceTasks); - for (IngestJob job : ingestJobsById.values()) { - job.cancel(); - if (ingestJobIsComplete(job)) { - finishIngestJob(job); - } - } - } - } - - synchronized private void removeAllPendingTasks(Collection taskQueue) { - Iterator iterator = taskQueue.iterator(); - while (iterator.hasNext()) { - tasksInProgressAndPending.remove((IngestTask) iterator.next()); - iterator.remove(); - } - } - - synchronized private boolean ingestJobIsComplete(IngestJob job) { - for (IngestTask task : tasksInProgressAndPending) { - if (task.getIngestJob().getId() == job.getId()) { - return false; - } - } - return true; - } - - /** - * Called after all work is completed to free resources. - * - * @param job - */ - private void finishIngestJob(IngestJob job) { - job.finish(); - long jobId = job.getId(); - ingestJobsById.remove(jobId); - if (!job.isCancelled()) { - logger.log(Level.INFO, "Ingest job {0} completed", jobId); - IngestManager.getInstance().fireIngestJobCompleted(job.getId()); - } else { - logger.log(Level.INFO, "Ingest job {0} cancelled", jobId); - IngestManager.getInstance().fireIngestJobCancelled(job.getId()); - } - } - - private static class RootDirectoryTaskComparator implements Comparator { - - @Override - public int compare(FileIngestTask q1, FileIngestTask q2) { - AbstractFilePriority.Priority p1 = AbstractFilePriority.getPriority(q1.getFile()); - AbstractFilePriority.Priority p2 = AbstractFilePriority.getPriority(q2.getFile()); - if (p1 == p2) { - return (int) (q2.getFile().getId() - q1.getFile().getId()); - } else { - return p2.ordinal() - p1.ordinal(); - } - } - - private static class AbstractFilePriority { - - enum Priority { - - LAST, LOW, MEDIUM, HIGH - } - - static final List LAST_PRI_PATHS = new ArrayList<>(); - - static final List LOW_PRI_PATHS = new ArrayList<>(); - - static final List MEDIUM_PRI_PATHS = new ArrayList<>(); - - static final List HIGH_PRI_PATHS = new ArrayList<>(); - /* prioritize root directory folders based on the assumption that we - * are - * looking for user content. Other types of investigations may want - * different - * priorities. */ - - static /* prioritize root directory - * folders based on the assumption that we are - * looking for user content. Other types of investigations may want - * different - * priorities. */ { - // these files have no structure, so they go last - //unalloc files are handled as virtual files in getPriority() - //LAST_PRI_PATHS.schedule(Pattern.compile("^\\$Unalloc", Pattern.CASE_INSENSITIVE)); - //LAST_PRI_PATHS.schedule(Pattern.compile("^\\Unalloc", Pattern.CASE_INSENSITIVE)); - LAST_PRI_PATHS.add(Pattern.compile("^pagefile", Pattern.CASE_INSENSITIVE)); - LAST_PRI_PATHS.add(Pattern.compile("^hiberfil", Pattern.CASE_INSENSITIVE)); - // orphan files are often corrupt and windows does not typically have - // user content, so put them towards the bottom - LOW_PRI_PATHS.add(Pattern.compile("^\\$OrphanFiles", Pattern.CASE_INSENSITIVE)); - LOW_PRI_PATHS.add(Pattern.compile("^Windows", Pattern.CASE_INSENSITIVE)); - // all other files go into the medium category too - MEDIUM_PRI_PATHS.add(Pattern.compile("^Program Files", Pattern.CASE_INSENSITIVE)); - // user content is top priority - HIGH_PRI_PATHS.add(Pattern.compile("^Users", Pattern.CASE_INSENSITIVE)); - HIGH_PRI_PATHS.add(Pattern.compile("^Documents and Settings", Pattern.CASE_INSENSITIVE)); - HIGH_PRI_PATHS.add(Pattern.compile("^home", Pattern.CASE_INSENSITIVE)); - HIGH_PRI_PATHS.add(Pattern.compile("^ProgramData", Pattern.CASE_INSENSITIVE)); - } - - /** - * Get the enabled priority for a given file. - * - * @param abstractFile - * - * @return - */ - static AbstractFilePriority.Priority getPriority(final AbstractFile abstractFile) { - if (!abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.FS)) { - //quickly filter out unstructured content - //non-fs virtual files and dirs, such as representing unalloc space - return AbstractFilePriority.Priority.LAST; - } - //determine the fs files priority by name - final String path = abstractFile.getName(); - if (path == null) { - return AbstractFilePriority.Priority.MEDIUM; - } - for (Pattern p : HIGH_PRI_PATHS) { - Matcher m = p.matcher(path); - if (m.find()) { - return AbstractFilePriority.Priority.HIGH; - } - } - for (Pattern p : MEDIUM_PRI_PATHS) { - Matcher m = p.matcher(path); - if (m.find()) { - return AbstractFilePriority.Priority.MEDIUM; - } - } - for (Pattern p : LOW_PRI_PATHS) { - Matcher m = p.matcher(path); - if (m.find()) { - return AbstractFilePriority.Priority.LOW; - } - } - for (Pattern p : LAST_PRI_PATHS) { - Matcher m = p.matcher(path); - if (m.find()) { - return AbstractFilePriority.Priority.LAST; - } - } - //default is medium - return AbstractFilePriority.Priority.MEDIUM; - } - } - } - - private final class DataSourceIngestTaskQueue implements IngestTaskQueue { - - @Override - public IngestTask getNextTask() throws InterruptedException { - return pendingDataSourceTasks.take(); - } - } - - private final class FileIngestTaskQueue implements IngestTaskQueue { - - @Override - public IngestTask getNextTask() throws InterruptedException { - FileIngestTask task = pendingFileTasks.takeFirst(); - updatePendingFileTasksQueues(); - return task; - } - } - - /** - * Stores basic stats for a given job - */ - class IngestJobSchedulerStats { - - private final IngestJobStats ingestJobStats; - - private final long jobId; - - private final String dataSource; - - private final long rootQueueSize; - - private final long dirQueueSize; - - private final long fileQueueSize; - - private final long dsQueueSize; - - private final long runningListSize; - - IngestJobSchedulerStats(IngestJob job) { - ingestJobStats = job.getStats(); - jobId = job.getId(); - dataSource = job.getDataSource().getName(); - rootQueueSize = countJobsInCollection(pendingRootDirectoryTasks, jobId); - dirQueueSize = countJobsInCollection(pendingDirectoryTasks, jobId); - fileQueueSize = countJobsInCollection(pendingFileTasks, jobId); - dsQueueSize = countJobsInCollection(pendingDataSourceTasks, jobId); - runningListSize = countJobsInCollection(tasksInProgressAndPending, jobId) - fileQueueSize - dsQueueSize; - } - - protected long getJobId() { - return jobId; - } - - protected String getDataSource() { - return dataSource; - } - - protected long getRootQueueSize() { - return rootQueueSize; - } - - protected long getDirQueueSize() { - return dirQueueSize; - } - - protected long getFileQueueSize() { - return fileQueueSize; - } - - protected long getDsQueueSize() { - return dsQueueSize; - } - - protected long getRunningListSize() { - return runningListSize; - } - - protected IngestJobStats getIngestJobStats() { - return ingestJobStats; - } - } - - /** - * Get basic performance / stats on all running jobs - * - * @return - */ - synchronized List getJobStats() { - List stats = new ArrayList<>(); - for (IngestJob job : Collections.list(ingestJobsById.elements())) { - stats.add(new IngestJobSchedulerStats(job)); - } - return stats; - } -} diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java new file mode 100755 index 0000000000..a9abef15f7 --- /dev/null +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java @@ -0,0 +1,757 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2012-2014 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.ingest; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; +import java.util.TreeSet; +import java.util.concurrent.BlockingDeque; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.logging.Level; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.datamodel.AbstractFile; +import org.sleuthkit.datamodel.Content; +import org.sleuthkit.datamodel.FileSystem; +import org.sleuthkit.datamodel.TskCoreException; +import org.sleuthkit.datamodel.TskData; + +/** + * Creates ingest tasks for ingest jobs, queuing the tasks in priority order for + * execution by the ingest manager's ingest threads. + */ +final class IngestTasksScheduler { + + private static final Logger logger = Logger.getLogger(IngestTasksScheduler.class.getName()); + private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue(); + private static IngestTasksScheduler instance; + + // Scheduling of data source ingest tasks is accomplished by putting them + // in a FIFO queue to be consumed by the ingest threads. The pending data + // tasks queue is therefore wrapped in a "dispenser" that implements the + // IngestTaskQueue interface and is exposed via a getter method. + private final LinkedBlockingQueue pendingDataSourceTasks; + private final DataSourceIngestTaskQueue dataSourceTasksDispenser; + + // Scheduling of file ingest tasks is accomplished by "shuffling" them + // through a sequence of internal queues that allows for the interleaving of + // tasks from different ingest jobs based on priority. These scheduling + // queues are: + // 1. root directory tasks (priority queue) + // 2. directory tasks (FIFO queue) + // 3. pending file tasks (LIFO queue) + // Tasks in the pending file tasks queue are ready to be consumed by the + // ingest threads. The pending file tasks queue is therefore wrapped in a + // "dispenser" that implements the IngestTaskQueue interface and is exposed + // via a getter method. + private final TreeSet rootDirectoryTasks; + private final List directoryTasks; + private final BlockingDeque pendingFileTasks; + private final FileIngestTaskQueue fileTasksDispenser; + + // The ingest scheduler is responsible for notifying an ingest jobs whenever + // all of the ingest tasks currently associated with the job are complete. + // To make this possible, the ingest tasks scheduler needs to keep track not + // only of the tasks in its queues, but also of the tasks that have been + // handed out for processing by code running on the ingest manager's ingest + // threads. Therefore all ingest tasks are added to this list and are not + // removed when an ingest thread takes an ingest task. Instead, the ingest + // thread calls back into the scheduler when the task is completed, at + // which time the task will be removed from this list. + private final List tasksInProgressAndPending; + + /** + * Gets the ingest tasks scheduler singleton. + */ + synchronized static IngestTasksScheduler getInstance() { + if (IngestTasksScheduler.instance == null) { + IngestTasksScheduler.instance = new IngestTasksScheduler(); + } + return IngestTasksScheduler.instance; + } + + /** + * Constructs an ingest tasks scheduler. + */ + private IngestTasksScheduler() { + this.pendingDataSourceTasks = new LinkedBlockingQueue<>(); + this.dataSourceTasksDispenser = new DataSourceIngestTaskQueue(); + this.rootDirectoryTasks = new TreeSet<>(new RootDirectoryTaskComparator()); + this.directoryTasks = new ArrayList<>(); + this.pendingFileTasks = new LinkedBlockingDeque<>(); + this.fileTasksDispenser = new FileIngestTaskQueue(); + this.tasksInProgressAndPending = new ArrayList<>(); + } + + /** + * Gets this ingest task scheduler's implementation of the IngestTaskQueue + * interface for data source ingest tasks. + * + * @return The data source ingest tasks queue. + */ + IngestTaskQueue getDataSourceIngestTaskQueue() { + return this.dataSourceTasksDispenser; + } + + /** + * Gets this ingest task scheduler's implementation of the IngestTaskQueue + * interface for file ingest tasks. + * + * @return The file ingest tasks queue. + */ + IngestTaskQueue getFileIngestTaskQueue() { + return this.fileTasksDispenser; + } + + /** + * Schedules a data source ingest task and file ingest tasks for an ingest + * job. + * + * @param job The job for which the tasks are to be scheduled. + * @throws InterruptedException if the calling thread is blocked due to a + * full tasks queue and is interrupted. + */ + synchronized void scheduleIngestTasks(IngestJob job) throws InterruptedException { + // The initial ingest scheduling for a job an an atomic operation. + // Otherwise, the data source task might be completed before the file + // tasks are created, resulting in a potential false positive when this + // task scheduler checks whether or not all the tasks for the job are + // completed. + if (job.hasDataSourceIngestPipeline()) { + scheduleDataSourceIngestTask(job); + } + if (job.hasFileIngestPipeline()) { + scheduleFileIngestTasks(job); + } + } + + /** + * Schedules a data source ingest task for an ingest job. + * + * @param job The job for which the tasks are to be scheduled. + * @throws InterruptedException if the calling thread is blocked due to a + * full tasks queue and is interrupted. + */ + synchronized void scheduleDataSourceIngestTask(IngestJob job) throws InterruptedException { + // Create a data source ingest task for the data source associated with + // the ingest job and add the task to the pending data source tasks + // queue. Data source tasks are scheduled on a first come, first served + // basis. + DataSourceIngestTask task = new DataSourceIngestTask(job); + this.tasksInProgressAndPending.add(task); + try { + // This call should not block because the queue is (theoretically) + // unbounded. + this.pendingDataSourceTasks.put(task); + } catch (InterruptedException ex) { + this.tasksInProgressAndPending.remove(task); + IngestTasksScheduler.logger.log(Level.SEVERE, "Interruption of unexpected block on pending data source tasks queue", ex); //NON-NLS + throw ex; + } + } + + /** + * Schedules file ingest tasks for an ingest job. + * + * @param job The job for which the tasks are to be scheduled. + * @throws InterruptedException if the calling thread is blocked due to a + * full tasks queue and is interrupted. + */ + synchronized void scheduleFileIngestTasks(IngestJob job) throws InterruptedException { + // Get the top level files for the data source associated with this job + // and add them to the root directories priority queue. The file tasks + // may be interleaved with file tasks from other jobs, based on priority. + List topLevelFiles = getTopLevelFiles(job.getDataSource()); + for (AbstractFile firstLevelFile : topLevelFiles) { + FileIngestTask task = new FileIngestTask(job, firstLevelFile); + if (IngestTasksScheduler.shouldEnqueueFileTask(task)) { + this.tasksInProgressAndPending.add(task); + this.rootDirectoryTasks.add(task); + } + } + shuffleFileTaskQueues(); + } + + /** + * Schedules a file ingest task for an ingest job. + * + * @param job The job for which the tasks are to be scheduled. + * @param file The file associated with the task. + * @throws InterruptedException if the calling thread is blocked due to a + * full tasks queue and is interrupted. + */ + void scheduleFileIngestTask(IngestJob job, AbstractFile file) throws InterruptedException, IllegalStateException { + FileIngestTask task = new FileIngestTask(job, file); + if (IngestTasksScheduler.shouldEnqueueFileTask(task)) { + // This synchronized method sends the file task directly to the + // pending file tasks queue. This is done to prioritize derived + // and carved files generated by a file ingest task in progress. + addToPendingFileTasksQueue(task); + } + } + + /** + * Allows an ingest thread to notify this ingest task scheduler that a task + * has been completed. + * + * @param task The completed task. + */ + synchronized void notifyTaskCompleted(IngestTask task) throws InterruptedException { + tasksInProgressAndPending.remove(task); + IngestJob job = task.getIngestJob(); + if (this.tasksForJobAreCompleted(job)) { + job.notifyTasksCompleted(); + } + } + + /** + * Clears the task scheduling queues for an ingest job, but does nothing + * about tasks that have already been taken by ingest threads. Those tasks + * will be flushed out when the ingest threads call back with their task + * completed notifications. + * + * @param job The job for which the tasks are to to canceled. + */ + synchronized void cancelPendingTasksForIngestJob(IngestJob job) { + // The scheduling queues are cleared of tasks for the job, and the tasks + // that are removed from the scheduling queues are also removed from the + // tasks in progress list. However, a tasks in progress check for the + // job may still return true since the tasks that have been taken by the + // ingest threads are still in the tasks in progress list. + long jobId = job.getId(); + this.removeTasksForJob(this.rootDirectoryTasks, jobId); + this.removeTasksForJob(this.directoryTasks, jobId); + this.removeTasksForJob(this.pendingFileTasks, jobId); + this.removeTasksForJob(this.pendingDataSourceTasks, jobId); + if (this.tasksForJobAreCompleted(job)) { + job.notifyTasksCompleted(); + } + } + + /** + * A helper that gets the top level files such as file system root + * directories, layout files and virtual directories for a data source. Used + * to create file tasks to put into the root directories queue. + * + * @param dataSource The data source. + * @return A list of top level files. + */ + private static List getTopLevelFiles(Content dataSource) { + List topLevelFiles = new ArrayList<>(); + Collection rootObjects = dataSource.accept(new GetRootDirectoryVisitor()); + if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) { + // The data source is itself a file to be processed. + topLevelFiles.add((AbstractFile) dataSource); + } else { + for (AbstractFile root : rootObjects) { + List children; + try { + children = root.getChildren(); + if (children.isEmpty()) { + // Add the root object itself, it could be an unallocated + // space file, or a child of a volume or an image. + topLevelFiles.add(root); + } else { + // The root object is a file system root directory, get + // the files within it. + for (Content child : children) { + if (child instanceof AbstractFile) { + topLevelFiles.add((AbstractFile) child); + } + } + } + } catch (TskCoreException ex) { + logger.log(Level.WARNING, "Could not get children of root to enqueue: " + root.getId() + ": " + root.getName(), ex); //NON-NLS + } + } + } + return topLevelFiles; + } + + /** + * A helper that "shuffles" the file task queues to ensure that there is at + * least one task in the pending file ingest tasks queue, as long as there + * are still file ingest tasks to be performed. + * + * @throws InterruptedException if the calling thread is blocked due to a + * full tasks queue and is interrupted. + */ + synchronized private void shuffleFileTaskQueues() throws InterruptedException, IllegalStateException { + // This is synchronized because it is called both by synchronized + // methods of this ingest scheduler and an unsynchronized method of its + // file tasks "dispenser". + while (true) { + // Loop until either the pending file tasks queue is NOT empty + // or the upstream queues that feed into it ARE empty. + if (!this.pendingFileTasks.isEmpty()) { + // There are file tasks ready to be consumed, exit. + return; + } + if (this.directoryTasks.isEmpty()) { + if (this.rootDirectoryTasks.isEmpty()) { + // There are no root directory tasks to move into the + // directory queue, exit. + return; + } else { + // Move the next root directory task into the + // directories queue. Note that the task was already + // added to the tasks in progress list when the task was + // created in scheduleFileIngestTasks(). + this.directoryTasks.add(this.rootDirectoryTasks.pollFirst()); + } + } + + // Try to add the most recently added directory from the + // directory tasks queue to the pending file tasks queue. Note + // the removal of the task from the tasks in progress list. If + // the task is enqueued, it will be put back in the list by + // the addToPendingFileTasksQueue() helper. + boolean tasksEnqueuedForDirectory = false; + FileIngestTask directoryTask = this.directoryTasks.remove(this.directoryTasks.size() - 1); + this.tasksInProgressAndPending.remove(directoryTask); + if (shouldEnqueueFileTask(directoryTask)) { + addToPendingFileTasksQueue(directoryTask); + tasksEnqueuedForDirectory = true; + } + + // If the directory contains subdirectories or files, try to + // enqueue tasks for them as well. + final AbstractFile directory = directoryTask.getFile(); + try { + for (Content child : directory.getChildren()) { + if (child instanceof AbstractFile) { + AbstractFile file = (AbstractFile) child; + FileIngestTask childTask = new FileIngestTask(directoryTask.getIngestJob(), file); + if (file.hasChildren()) { + // Found a subdirectory, put the task in the + // pending directory tasks queue. Note the + // addition of the task to the tasks in progress + // list. This is necessary because this is the + // first appearance of this task in the queues. + this.tasksInProgressAndPending.add(childTask); + this.directoryTasks.add(childTask); + tasksEnqueuedForDirectory = true; + } else if (shouldEnqueueFileTask(childTask)) { + // Found a file, put the task directly into the + // pending file tasks queue. The new task will + // be put into the tasks in progress list by the + // addToPendingFileTasksQueue() helper. + addToPendingFileTasksQueue(childTask); + tasksEnqueuedForDirectory = true; + } + } + } + } catch (TskCoreException ex) { + String errorMessage = String.format("An error occurred getting the children of %s", directory.getName()); //NON-NLS + logger.log(Level.SEVERE, errorMessage, ex); + } + + // In the case where the directory task is not pushed into the + // the pending file tasks queue and has no children, check to + // see if the job is completed - the directory task might have + // been the last task for the job. + if (!tasksEnqueuedForDirectory) { + IngestJob job = directoryTask.getIngestJob(); + if (this.tasksForJobAreCompleted(job)) { + job.notifyTasksCompleted(); + } + } + } + } + + /** + * A helper method that examines the file associated with a file ingest task + * to determine whether or not the file should be processed and therefore + * the task should be enqueued. + * + * @param task The task to be scrutinized. + * @return True or false. + */ + private static boolean shouldEnqueueFileTask(final FileIngestTask task) { + final AbstractFile file = task.getFile(); + + // Skip the task if the file is an unallocated space file and the + // process unallocated space flag is not set for this job. + if (!task.getIngestJob().shouldProcessUnallocatedSpace() + && file.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) { + return false; + } + + // Skip the task if the file is actually the pseudo-file for the parent + // or current directory. + String fileName = file.getName(); + if (fileName.equals(".") || fileName.equals("..")) { + return false; + } + + // Skip the task if the file is one of a select group of special, large + // NTFS or FAT file system files. + // the file is in the root directory, has a file name + // starting with $, containing : (not default attributes) + //with meta address < 32, i.e. some special large NTFS and FAT files + if (file instanceof org.sleuthkit.datamodel.File) { + final org.sleuthkit.datamodel.File f = (org.sleuthkit.datamodel.File) file; + + // Get the type of the file system, if any, that owns the file. + TskData.TSK_FS_TYPE_ENUM fsType = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_UNSUPP; + try { + FileSystem fs = f.getFileSystem(); + if (fs != null) { + fsType = fs.getFsType(); + } + } catch (TskCoreException ex) { + logger.log(Level.SEVERE, "Error querying file system for " + f, ex); //NON-NLS + } + + // If the file system is not NTFS or FAT, don't skip the file. + if ((fsType.getValue() & FAT_NTFS_FLAGS) == 0) { + return true; + } + + // Find out whether the file is in a root directory. + boolean isInRootDir = false; + try { + AbstractFile parent = f.getParentDirectory(); + isInRootDir = parent.isRoot(); + } catch (TskCoreException ex) { + logger.log(Level.WARNING, "Error querying parent directory for" + f.getName(), ex); //NON-NLS + } + + // If the file is in the root directory of an NTFS or FAT file + // system, check its meta-address and check its name for the '$' + // character and a ':' character (not a default attribute). + if (isInRootDir && f.getMetaAddr() < 32) { + String name = f.getName(); + if (name.length() > 0 && name.charAt(0) == '$' && name.contains(":")) { + return false; + } + } + } + + return true; + } + + /** + * A helper method to safely add a file ingest task to the blocking pending + * tasks queue. + * + * @param task + * @throws IllegalStateException + */ + synchronized private void addToPendingFileTasksQueue(FileIngestTask task) throws IllegalStateException { + tasksInProgressAndPending.add(task); + try { + // The file is added to the front of the pending files queue because + // at least one image has been processed that had a folder full of + // archive files. The queue grew to have thousands of entries, so + // this (might) help with pushing those files through ingest. + this.pendingFileTasks.addFirst(task); + } catch (IllegalStateException ex) { + tasksInProgressAndPending.remove(task); + Logger.getLogger(IngestTasksScheduler.class.getName()).log(Level.SEVERE, "Pending file tasks queue is full", ex); //NON-NLS + throw ex; + } + } + + /** + * Determines whether or not all current ingest tasks for an ingest job are + * completed. + * + * @param job The job for which the query is to be performed. + * @return True or false. + */ + private boolean tasksForJobAreCompleted(IngestJob job) { + for (IngestTask task : tasksInProgressAndPending) { + if (task.getIngestJob().getId() == job.getId()) { + return false; + } + } + return true; + } + + /** + * A helper that removes all of the ingest tasks associated with an ingest + * job from a tasks queue. The task is removed from the the tasks in + * progress list as well. + * + * @param taskQueue The queue from which to remove the tasks. + * @param jobId The id of the job for which the tasks are to be removed. + */ + private void removeTasksForJob(Collection taskQueue, long jobId) { + Iterator iterator = taskQueue.iterator(); + while (iterator.hasNext()) { + IngestTask task = iterator.next(); + if (task.getIngestJob().getId() == jobId) { + this.tasksInProgressAndPending.remove(task); + iterator.remove(); + } + } + } + + /** + * A helper that counts the number of ingest tasks in a task queue for a + * given job. + * + * @param queue The queue for which to count tasks. + * @param jobId The id of the job for which the tasks are to be counted. + * @return The count. + */ + private static int countTasksForJob(Collection queue, long jobId) { + Iterator iterator = queue.iterator(); + int count = 0; + while (iterator.hasNext()) { + IngestTask task = (IngestTask) iterator.next(); + if (task.getIngestJob().getId() == jobId) { + count++; + } + } + return count; + } + + /** + * RJCTODO + * + * @param jobId + * @return + */ + synchronized IngestJobTasksSnapshot getTasksSnapshotForJob(long jobId) { + return new IngestJobTasksSnapshot(jobId); + } + + /** + * Prioritizes tasks for the root directories file ingest tasks queue (file + * system root directories, layout files and virtual directories). + */ + private static class RootDirectoryTaskComparator implements Comparator { + + @Override + public int compare(FileIngestTask q1, FileIngestTask q2) { + AbstractFilePriority.Priority p1 = AbstractFilePriority.getPriority(q1.getFile()); + AbstractFilePriority.Priority p2 = AbstractFilePriority.getPriority(q2.getFile()); + if (p1 == p2) { + return (int) (q2.getFile().getId() - q1.getFile().getId()); + } else { + return p2.ordinal() - p1.ordinal(); + } + } + + private static class AbstractFilePriority { + + enum Priority { + + LAST, LOW, MEDIUM, HIGH + } + + static final List LAST_PRI_PATHS = new ArrayList<>(); + + static final List LOW_PRI_PATHS = new ArrayList<>(); + + static final List MEDIUM_PRI_PATHS = new ArrayList<>(); + + static final List HIGH_PRI_PATHS = new ArrayList<>(); + /* prioritize root directory folders based on the assumption that we + * are + * looking for user content. Other types of investigations may want + * different + * priorities. */ + + static /* prioritize root directory + * folders based on the assumption that we are + * looking for user content. Other types of investigations may want + * different + * priorities. */ { + // these files have no structure, so they go last + //unalloc files are handled as virtual files in getPriority() + //LAST_PRI_PATHS.schedule(Pattern.compile("^\\$Unalloc", Pattern.CASE_INSENSITIVE)); + //LAST_PRI_PATHS.schedule(Pattern.compile("^\\Unalloc", Pattern.CASE_INSENSITIVE)); + LAST_PRI_PATHS.add(Pattern.compile("^pagefile", Pattern.CASE_INSENSITIVE)); + LAST_PRI_PATHS.add(Pattern.compile("^hiberfil", Pattern.CASE_INSENSITIVE)); + // orphan files are often corrupt and windows does not typically have + // user content, so put them towards the bottom + LOW_PRI_PATHS.add(Pattern.compile("^\\$OrphanFiles", Pattern.CASE_INSENSITIVE)); + LOW_PRI_PATHS.add(Pattern.compile("^Windows", Pattern.CASE_INSENSITIVE)); + // all other files go into the medium category too + MEDIUM_PRI_PATHS.add(Pattern.compile("^Program Files", Pattern.CASE_INSENSITIVE)); + // user content is top priority + HIGH_PRI_PATHS.add(Pattern.compile("^Users", Pattern.CASE_INSENSITIVE)); + HIGH_PRI_PATHS.add(Pattern.compile("^Documents and Settings", Pattern.CASE_INSENSITIVE)); + HIGH_PRI_PATHS.add(Pattern.compile("^home", Pattern.CASE_INSENSITIVE)); + HIGH_PRI_PATHS.add(Pattern.compile("^ProgramData", Pattern.CASE_INSENSITIVE)); + } + + /** + * Get the enabled priority for a given file. + * + * @param abstractFile + * + * @return + */ + static AbstractFilePriority.Priority getPriority(final AbstractFile abstractFile) { + if (!abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.FS)) { + //quickly filter out unstructured content + //non-fs virtual files and dirs, such as representing unalloc space + return AbstractFilePriority.Priority.LAST; + } + //determine the fs files priority by name + final String path = abstractFile.getName(); + if (path == null) { + return AbstractFilePriority.Priority.MEDIUM; + } + for (Pattern p : HIGH_PRI_PATHS) { + Matcher m = p.matcher(path); + if (m.find()) { + return AbstractFilePriority.Priority.HIGH; + } + } + for (Pattern p : MEDIUM_PRI_PATHS) { + Matcher m = p.matcher(path); + if (m.find()) { + return AbstractFilePriority.Priority.MEDIUM; + } + } + for (Pattern p : LOW_PRI_PATHS) { + Matcher m = p.matcher(path); + if (m.find()) { + return AbstractFilePriority.Priority.LOW; + } + } + for (Pattern p : LAST_PRI_PATHS) { + Matcher m = p.matcher(path); + if (m.find()) { + return AbstractFilePriority.Priority.LAST; + } + } + //default is medium + return AbstractFilePriority.Priority.MEDIUM; + } + } + } + + /** + * Wraps access to pending data source ingest tasks in the interface + * required by the ingest threads. + */ + private final class DataSourceIngestTaskQueue implements IngestTaskQueue { + + /** + * @inheritDoc + */ + @Override + public IngestTask getNextTask() throws InterruptedException { + return IngestTasksScheduler.this.pendingDataSourceTasks.take(); + } + } + + /** + * Wraps access to pending file ingest tasks in the interface required by + * the ingest threads. + */ + private final class FileIngestTaskQueue implements IngestTaskQueue { + + /** + * @inheritDoc + */ + @Override + public IngestTask getNextTask() throws InterruptedException { + FileIngestTask task = IngestTasksScheduler.this.pendingFileTasks.takeFirst(); + shuffleFileTaskQueues(); + return task; + } + + } + + /** + * A snapshot of ingest tasks data for an ingest job. + */ + class IngestJobTasksSnapshot { + private final long jobId; + private final long rootQueueSize; + private final long dirQueueSize; + private final long fileQueueSize; + private final long dsQueueSize; + private final long runningListSize; + + /** + * RJCTODO + * @param jobId + */ + IngestJobTasksSnapshot(long jobId) { + this.jobId = jobId; + this.rootQueueSize = countTasksForJob(IngestTasksScheduler.this.rootDirectoryTasks, jobId); + this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryTasks, jobId); + this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingFileTasks, jobId); + this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingDataSourceTasks, jobId); + this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgressAndPending, jobId) - fileQueueSize - dsQueueSize; + } + + /** + * RJCTODO + * @return + */ + long getJobId() { + return jobId; + } + + /** + * RJCTODO + * @return + */ + long getRootQueueSize() { + return rootQueueSize; + } + + /** + * RJCTODO + * @return + */ + long getDirQueueSize() { + return dirQueueSize; + } + + /** + * RJCTODO + * @return + */ + long getFileQueueSize() { + return fileQueueSize; + } + + /** + * RJCTODO + * @return + */ + long getDsQueueSize() { + return dsQueueSize; + } + + /** + * RJCTODO + * @return + */ + long getRunningListSize() { + return runningListSize; + } + } + +} From b6ca38fef4d6b75f69d32e912bfd70a33ec8231a Mon Sep 17 00:00:00 2001 From: Richard Cordovano Date: Sun, 2 Nov 2014 23:25:47 -0500 Subject: [PATCH 02/10] Complete most of the multi-stage pipeline feature --- .../ingest/DataSourceIngestPipeline.java | 2 +- .../sleuthkit/autopsy/ingest/IngestJob.java | 102 ++++++++++---- .../ingest/IngestModuleFactoryLoader.java | 2 + .../ingest/IngestPipelinesConfiguration.java | 130 +++++++++++------- .../autopsy/ingest/PipelineConfig.xml | 25 ++++ .../autopsy/ingest/pipeline_config.xml | 18 --- 6 files changed, 186 insertions(+), 93 deletions(-) create mode 100644 Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml delete mode 100644 Core/src/org/sleuthkit/autopsy/ingest/pipeline_config.xml diff --git a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java index f39ec0494b..b603166981 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java @@ -43,7 +43,7 @@ final class DataSourceIngestPipeline { DataSourceIngestModuleDecorator module = new DataSourceIngestModuleDecorator(template.createDataSourceIngestModule(), template.getModuleName()); modules.add(module); } - } + } } boolean isEmpty() { diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java index c1fe37824f..c050fd0a1c 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java @@ -20,7 +20,9 @@ package org.sleuthkit.autopsy.ingest; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicLong; @@ -114,7 +116,7 @@ final class IngestJob { long jobId = nextIngestJobId.incrementAndGet(); IngestJob job = new IngestJob(jobId, dataSource, processUnallocatedSpace); errors = job.start(ingestModuleTemplates); - if (errors.isEmpty() && (job.hasDataSourceIngestPipeline() || job.hasFileIngestPipeline())) { + if (errors.isEmpty() && (job.hasDataSourceIngestPipeline() || job.hasFileIngestPipeline())) { // RJCTODO: What about 2nd stage only? ingestJobsById.put(jobId, job); IngestManager.getInstance().fireIngestJobStarted(jobId); IngestJob.ingestScheduler.scheduleIngestTasks(job); @@ -135,7 +137,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ static List getJobSnapshots() { List snapShots = new ArrayList<>(); @@ -144,7 +147,7 @@ final class IngestJob { } return snapShots; } - + /** * RJCTODO */ @@ -555,20 +558,38 @@ final class IngestJob { * @throws InterruptedException */ private void createIngestPipelines(List ingestModuleTemplates) throws InterruptedException { - // RJCTODO: Use config file - // Sort the ingest module templates as required for the pipelines. - List firstStageDataSourceModuleTemplates = new ArrayList<>(); - List secondStageDataSourceModuleTemplates = new ArrayList<>(); - List fileIngestModuleTemplates = new ArrayList<>(); + // RJCTODO: Improve variable names! + + // Make mappings of ingest module factory class names to templates. + Map dataSourceModuleTemplates = new HashMap<>(); + Map fileModuleTemplates = new HashMap<>(); for (IngestModuleTemplate template : ingestModuleTemplates) { if (template.isDataSourceIngestModuleTemplate()) { - firstStageDataSourceModuleTemplates.add(template); - } else { - firstStageDataSourceModuleTemplates.add(template); + dataSourceModuleTemplates.put(template.getModuleFactory().getClass().getCanonicalName(), template); + } + if (template.isFileIngestModuleTemplate()) { + fileModuleTemplates.put(template.getModuleFactory().getClass().getCanonicalName(), template); } } - // Contruct the pipelines. + // Use the mappings and the ingest pipelines configuration to create + // ordered lists of ingest module templates for each ingest pipeline. + IngestPipelinesConfiguration pipelineConfigs = IngestPipelinesConfiguration.getInstance(); + List firstStageDataSourceModuleTemplates = this.getConfiguredIngestModuleTemplates(dataSourceModuleTemplates, pipelineConfigs.getStageOneDataSourceIngestPipelineConfig()); + List fileIngestModuleTemplates = this.getConfiguredIngestModuleTemplates(fileModuleTemplates, pipelineConfigs.getFileIngestPipelineConfig()); + List secondStageDataSourceModuleTemplates = this.getConfiguredIngestModuleTemplates(dataSourceModuleTemplates, pipelineConfigs.getStageTwoDataSourceIngestPipelineConfig()); + + // Add any module templates that were not specified in the pipeline + // configurations to an appropriate pipeline - either the first stage + // data source ingest pipeline or the file ingest pipeline. + for (IngestModuleTemplate template : dataSourceModuleTemplates.values()) { + firstStageDataSourceModuleTemplates.add(template); + } + for (IngestModuleTemplate template : fileModuleTemplates.values()) { + fileIngestModuleTemplates.add(template); + } + + // Contruct the data source ingest pipelines. this.firstStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, firstStageDataSourceModuleTemplates); this.secondStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, secondStageDataSourceModuleTemplates); this.dataSourceIngestPipeline = firstStageDataSourceIngestPipeline; @@ -580,6 +601,28 @@ final class IngestJob { } } + /** + * Use an ordered list of ingest module factory class names to create an + * ordered subset of a collection ingest module templates. The ingest module + * templates are removed from the input collection as they are added to the + * output collection. + * + * @param ingestModuleTemplates A mapping of ingest module factory class + * names to ingest module templates. + * @param pipelineConfig An ordered list of ingest module factory class + * names representing an ingest pipeline. + * @return + */ + List getConfiguredIngestModuleTemplates(Map ingestModuleTemplates, List pipelineConfig) { + List templates = new ArrayList<>(); + for (String moduleClassName : pipelineConfig) { + if (ingestModuleTemplates.containsKey(moduleClassName)) { + templates.add(ingestModuleTemplates.remove(moduleClassName)); + } + } + return templates; + } + /** * Starts up each of the file and data source ingest modules to collect * possible errors. @@ -748,7 +791,7 @@ final class IngestJob { this.dataSourceIngestProgress = null; } } - + IngestJob.ingestJobsById.remove(this.id); if (!this.isCancelled()) { logger.log(Level.INFO, "Ingest job {0} completed", this.id); @@ -784,7 +827,7 @@ final class IngestJob { class IngestJobSnapshot { private final long jobId; - private final String dataSource; + private final String dataSource; private final long startTime; private final long processedFiles; private final long estimatedFilesToProcess; @@ -809,7 +852,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getJobId() { return this.jobId; @@ -817,12 +861,13 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ String getDataSource() { return dataSource; - } - + } + /** * Gets files per second throughput since job started. * @@ -870,10 +915,11 @@ final class IngestJob { long getFilesEstimated() { return estimatedFilesToProcess; } - + /** * RJCTODO - * @return + * + * @return */ long getRootQueueSize() { return this.tasksSnapshot.getRootQueueSize(); @@ -881,7 +927,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getDirQueueSize() { return this.tasksSnapshot.getDirQueueSize(); @@ -889,7 +936,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getFileQueueSize() { return this.tasksSnapshot.getFileQueueSize(); @@ -897,7 +945,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getDsQueueSize() { return this.tasksSnapshot.getDsQueueSize(); @@ -905,12 +954,13 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getRunningListSize() { return this.tasksSnapshot.getRunningListSize(); - } - + } + } } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java index e7ce367a64..e7c71bab23 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java @@ -51,6 +51,8 @@ final class IngestModuleFactoryLoader { private static final String SAMPLE_EXECUTABLE_MODULE_FACTORY_CLASS_NAME = SampleExecutableIngestModuleFactory.class.getCanonicalName(); private static final ArrayList coreModuleOrdering = new ArrayList() { { + // RJCTODO: Find out wherer ot put the photorec carver + // The ordering of the core ingest module factories implemented // using Java is hard-coded. add("org.sleuthkit.autopsy.recentactivity.RecentActivityExtracterModuleFactory"); //NON-NLS diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java index d829a7e300..f9fe24fb55 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java @@ -18,13 +18,13 @@ */ package org.sleuthkit.autopsy.ingest; -import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import org.sleuthkit.autopsy.coreutils.Logger; -import org.sleuthkit.autopsy.coreutils.ModuleSettings; import org.sleuthkit.autopsy.coreutils.PlatformUtil; import org.sleuthkit.autopsy.coreutils.XMLUtil; import org.w3c.dom.Document; @@ -33,32 +33,36 @@ import org.w3c.dom.NodeList; /** * Provides data source and file ingest pipeline configurations as ordered lists - * of ingest module class names. The order of the module class names indicates - * the desired sequence of ingest module instances in an ingest modules - * pipeline. + * of ingest module factory class names. */ final class IngestPipelinesConfiguration { private static final Logger logger = Logger.getLogger(IngestPipelinesConfiguration.class.getName()); - private static final String PIPELINE_CONFIG_FILE_VERSION_KEY = "PipelineConfigFileVersion"; //NON-NLS - private static final String PIPELINE_CONFIG_FILE_VERSION_NO_STRING = "1"; - private static final int PIPELINE_CONFIG_FILE_VERSION_NO = 1; - private static final String PIPELINES_CONFIG_FILE = "pipeline_config.xml"; //NON-NLS - private static final String PIPELINES_CONFIG_FILE_XSD = "PipelineConfigSchema.xsd"; //NON-NLS - private static final String XML_PIPELINE_ELEM = "PIPELINE"; //NON-NLS - private static final String XML_PIPELINE_TYPE_ATTR = "type"; //NON-NLS - private static final String DATA_SOURCE_INGEST_PIPELINE_TYPE = "ImageAnalysis"; //NON-NLS + private static final String PIPELINES_CONFIG_FILE = "PipelineConfig.xml"; //NON-NLS + private static final String PIPELINE_ELEM = "PIPELINE"; //NON-NLS + private static final int NUMBER_OF_PIPELINE_DEFINITIONS = 3; + private static final String PIPELINE_TYPE_ATTR = "type"; //NON-NLS + private static final String STAGE_ONE_DATA_SOURCE_INGEST_PIPELINE_ELEM = "ImageAnalysisStageOne"; //NON-NLS + private static final String STAGE_TWO_DATA_SOURCE_INGEST_PIPELINE_ELEM = "ImageAnalysisStageTwo"; //NON-NLS private static final String FILE_INGEST_PIPELINE_TYPE = "FileAnalysis"; //NON-NLS - private static final String XML_MODULE_ELEM = "MODULE"; //NON-NLS + private static final String INGEST_MODULE_ELEM = "MODULE"; //NON-NLS private static final String XML_MODULE_CLASS_NAME_ATTR = "location"; //NON-NLS + private static IngestPipelinesConfiguration instance; - private final List dataSourceIngestPipelineConfig = new ArrayList<>(); + + private final List stageOneDataSourceIngestPipelineConfig = new ArrayList<>(); private final List fileIngestPipelineConfig = new ArrayList<>(); + private final List stageTwoDataSourceIngestPipelineConfig = new ArrayList<>(); - private IngestPipelinesConfiguration() { - readPipelinesConfigurationFile(); - } - + // RJCTODO: Bring this code back into use, use it in IngestJob to sort things + // into the now three pipelines. Other NBMs built on top of Autopsy that + // have custom pipeline config files can do a PlatformUtil.extractResourceToUserConfigDir() + // before this is called. + /** + * Gets the ingest pipelines configuration singleton. + * + * @return The singleton. + */ synchronized static IngestPipelinesConfiguration getInstance() { if (instance == null) { Logger.getLogger(IngestPipelinesConfiguration.class.getName()).log(Level.INFO, "Creating ingest module loader instance"); //NON-NLS @@ -67,57 +71,90 @@ final class IngestPipelinesConfiguration { return instance; } - List getDataSourceIngestPipelineConfig() { - return new ArrayList<>(dataSourceIngestPipelineConfig); + /** + * Constructs an object that provides data source and file ingest pipeline + * configurations as ordered lists of ingest module factory class names. + */ + private IngestPipelinesConfiguration() { + this.readPipelinesConfigurationFile(); } + /** + * Gets the ordered list of ingest module factory class names for the + * file ingest pipeline. + * + * @return An ordered list of ingest module factory class names. + */ + List getStageOneDataSourceIngestPipelineConfig() { + return new ArrayList<>(stageOneDataSourceIngestPipelineConfig); + } + + /** + * Gets the ordered list of ingest module factory class names for the + * first stage data source ingest pipeline. + * + * @return An ordered list of ingest module factory class names. + */ List getFileIngestPipelineConfig() { return new ArrayList<>(fileIngestPipelineConfig); } + /** + * Gets the ordered list of ingest module factory class names for the + * second stage data source ingest pipeline. + * + * @return An ordered list of ingest module factory class names. + */ + List getStageTwoDataSourceIngestPipelineConfig() { + return new ArrayList<>(stageTwoDataSourceIngestPipelineConfig); + } + + /** + * Attempts to read the ingest pipeline configuration data from an XML file. + */ private void readPipelinesConfigurationFile() { try { - boolean overWrite; - if (!ModuleSettings.settingExists(this.getClass().getSimpleName(), PIPELINE_CONFIG_FILE_VERSION_KEY)) { - ModuleSettings.setConfigSetting(this.getClass().getSimpleName(), PIPELINE_CONFIG_FILE_VERSION_KEY, PIPELINE_CONFIG_FILE_VERSION_NO_STRING); - overWrite = true; - } else { - int versionNumber = Integer.parseInt(ModuleSettings.getConfigSetting(this.getClass().getSimpleName(), PIPELINE_CONFIG_FILE_VERSION_KEY)); - overWrite = versionNumber < PIPELINE_CONFIG_FILE_VERSION_NO; - // TODO: Migrate user edits - } - PlatformUtil.extractResourceToUserConfigDir(IngestPipelinesConfiguration.class, PIPELINES_CONFIG_FILE, overWrite); + PlatformUtil.extractResourceToUserConfigDir(IngestPipelinesConfiguration.class, PIPELINES_CONFIG_FILE, false); - String configFilePath = PlatformUtil.getUserConfigDirectory() + File.separator + PIPELINES_CONFIG_FILE; - Document doc = XMLUtil.loadDoc(IngestPipelinesConfiguration.class, configFilePath); + Path configFilePath = Paths.get(PlatformUtil.getUserConfigDirectory(), PIPELINES_CONFIG_FILE); + Document doc = XMLUtil.loadDoc(IngestPipelinesConfiguration.class, configFilePath.toAbsolutePath().toString()); if (doc == null) { return; } + // Get the document root element. Element rootElement = doc.getDocumentElement(); - if (rootElement == null) { + if (null == rootElement) { logger.log(Level.SEVERE, "Invalid pipelines config file"); //NON-NLS return; } - NodeList pipelineElements = rootElement.getElementsByTagName(XML_PIPELINE_ELEM); + // Get the pipeline elements and confirm that the correct number is + // present. + NodeList pipelineElements = rootElement.getElementsByTagName(IngestPipelinesConfiguration.PIPELINE_ELEM); int numPipelines = pipelineElements.getLength(); - if (numPipelines < 1 || numPipelines > 2) { + if (numPipelines != IngestPipelinesConfiguration.NUMBER_OF_PIPELINE_DEFINITIONS) { logger.log(Level.SEVERE, "Invalid pipelines config file"); //NON-NLS return; } + // Parse the pipeline elements to populate the pipeline + // configuration lists. + // RJCTODO: SHould check that each element is unique. Or could try the XSD bit. List pipelineConfig = null; for (int pipelineNum = 0; pipelineNum < numPipelines; ++pipelineNum) { Element pipelineElement = (Element) pipelineElements.item(pipelineNum); - String pipelineTypeAttr = pipelineElement.getAttribute(XML_PIPELINE_TYPE_ATTR); - if (pipelineTypeAttr != null) { + String pipelineTypeAttr = pipelineElement.getAttribute(PIPELINE_TYPE_ATTR); + if (null != pipelineTypeAttr) { switch (pipelineTypeAttr) { - case DATA_SOURCE_INGEST_PIPELINE_TYPE: - pipelineConfig = dataSourceIngestPipelineConfig; + case STAGE_ONE_DATA_SOURCE_INGEST_PIPELINE_ELEM: + pipelineConfig = this.stageOneDataSourceIngestPipelineConfig; break; case FILE_INGEST_PIPELINE_TYPE: - pipelineConfig = fileIngestPipelineConfig; + pipelineConfig = this.fileIngestPipelineConfig; + break; + case STAGE_TWO_DATA_SOURCE_INGEST_PIPELINE_ELEM: + pipelineConfig = this.stageTwoDataSourceIngestPipelineConfig; break; default: logger.log(Level.SEVERE, "Invalid pipelines config file"); //NON-NLS @@ -128,16 +165,13 @@ final class IngestPipelinesConfiguration { // Create an ordered list of class names. The sequence of class // names defines the sequence of modules in the pipeline. if (pipelineConfig != null) { - NodeList modulesElems = pipelineElement.getElementsByTagName(XML_MODULE_ELEM); + NodeList modulesElems = pipelineElement.getElementsByTagName(INGEST_MODULE_ELEM); int numModules = modulesElems.getLength(); - if (numModules == 0) { - break; - } for (int moduleNum = 0; moduleNum < numModules; ++moduleNum) { Element moduleElement = (Element) modulesElems.item(moduleNum); - final String moduleClassName = moduleElement.getAttribute(XML_MODULE_CLASS_NAME_ATTR); - if (moduleClassName != null) { - pipelineConfig.add(moduleClassName); + String className = moduleElement.getTextContent(); + if (null != className && !className.isEmpty()) { + pipelineConfig.add(className); } } } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml b/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml new file mode 100644 index 0000000000..36de99011f --- /dev/null +++ b/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml @@ -0,0 +1,25 @@ + + + + + org.sleuthkit.autopsy.recentactivity.RecentActivityExtracterModuleFactory + org.sleuthkit.autopsy.modules.android.AndroidModuleFactory + + + + org.sleuthkit.autopsy.modules.hashdatabase.HashLookupModuleFactory + org.sleuthkit.autopsy.modules.filetypeid.FileTypeIdModuleFactory + org.sleuthkit.autopsy.modules.sevenzip.ArchiveFileExtractorModuleFactory + org.sleuthkit.autopsy.modules.exif.ExifParserModuleFactory + org.sleuthkit.autopsy.keywordsearch.KeywordSearchModuleFactory + org.sleuthkit.autopsy.thunderbirdparser.EmailParserModuleFactory + org.sleuthkit.autopsy.modules.fileextmismatch.FileExtMismatchDetectorModuleFactory + org.sleuthkit.autopsy.modules.interestingitems.InterestingItemsIngestModuleFactory + + + + org.sleuthkit.autopsy.modules.e01verify.E01VerifierModuleFactory + + + diff --git a/Core/src/org/sleuthkit/autopsy/ingest/pipeline_config.xml b/Core/src/org/sleuthkit/autopsy/ingest/pipeline_config.xml deleted file mode 100644 index 542f168986..0000000000 --- a/Core/src/org/sleuthkit/autopsy/ingest/pipeline_config.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - - From 3605f29b296d6996fc1e0e06115cb94bb2281778 Mon Sep 17 00:00:00 2001 From: Brian Carrier Date: Mon, 3 Nov 2014 10:59:38 -0500 Subject: [PATCH 03/10] Updated user docs --- docs/doxygen-user/Doxyfile | 2 +- docs/doxygen-user/footer.html | 7 + docs/doxygen-user/quick_start.dox | 267 ++++++++++-------------------- docs/doxygen/Doxyfile | 2 +- docs/doxygen/main.dox | 2 +- 5 files changed, 101 insertions(+), 179 deletions(-) create mode 100644 docs/doxygen-user/footer.html diff --git a/docs/doxygen-user/Doxyfile b/docs/doxygen-user/Doxyfile index 9d26ffe51c..c32fdc4e55 100644 --- a/docs/doxygen-user/Doxyfile +++ b/docs/doxygen-user/Doxyfile @@ -900,7 +900,7 @@ HTML_HEADER = # each generated HTML page. If it is left blank doxygen will generate a # standard footer. -HTML_FOOTER = +HTML_FOOTER = footer.html # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to diff --git a/docs/doxygen-user/footer.html b/docs/doxygen-user/footer.html new file mode 100644 index 0000000000..f62135d90b --- /dev/null +++ b/docs/doxygen-user/footer.html @@ -0,0 +1,7 @@ +
+

Copyright © 2012-2014 Basis Technology
+This work is licensed under a +Creative Commons Attribution-Share Alike 3.0 United States License. +

+ + diff --git a/docs/doxygen-user/quick_start.dox b/docs/doxygen-user/quick_start.dox index 9f326a271b..ace3dbe513 100644 --- a/docs/doxygen-user/quick_start.dox +++ b/docs/doxygen-user/quick_start.dox @@ -1,200 +1,115 @@ /*! \page quick_start_page Quick Start Guide +\section s1 Adding a Data Source (image, local disk, logical files) -

Adding a Data Source (image, local disk, logical files)

-

- Data sources are added to a case. A case can have a single data source or it can have multiple data source if they are related. - Currently, a single report is generated for an entire case, so if you need to report on individual data sources, then you should use one data source per case. -

+Data sources are added to a case. A case can have a single data source or it can have multiple data source if they are related. Currently, a single report is generated for an entire case, so if you need to report on individual data sources, then you should use one data source per case. -

Creating a Case

-

- To create a case, use either the "Create New Case" option on the Welcome screen or from the "File" menu. - This will start the New Case Wizard. You will need to supply it with the name of the case and a directory to store the case results into. - You can optionally provide case numbers and other details. -

+\subsection s2 Creating a Case +To create a case, use either the "Create New Case" option on the Welcome screen or from the "File" menu. This will start the New Case Wizard. You will need to supply it with the name of the case and a directory to store the case results into. You can optionally provide case numbers and other details. + +\subsection s3 Adding a Data Source +The next step is to add input data source to the case. The Add Data Source Wizard will start automatically after the case is created or you can manually start it from the "File" menu or toolbar. You will need to choose the type of input data source to add (image, local disk or logical files and folders). Next, supply it with the location of the source to add. + + +- For a disk image, browse to the first file in the set (Autopsy will find the rest of the files). Autopsy currently supports E01 and raw (dd) files. +- For local disk, select one of the detected disks. Autopsy will add the current view of the disk to the case (i.e. snapshot of the meta-data). However, the individual file content (not meta-data) does get updated with the changes made to the disk. Note, you may need run Autopsy as an Administrator to detect all disks. +- For logical files (a single file or folder of files), use the "Add" button to add one or more files or folders on your system to the case. Folders will be recursively added to the case. + +There are a couple of options in the wizard that will allow you to make the ingest process faster. These typically deal with deleted files. It will take longer if unallocated space is analyzed and the entire drive is searched for deleted files. In some scenarios, these recovery steps must be performed and in other scenarios these steps are not needed and instead fast results on the allocated files are needed. Use these options to control how long the analysis will take. + +Autopsy will start to analyze these data sources and add them to the case and internal database. While it is doing that, it will prompt you to configure the Ingest Modules. -

Adding a Data Source

-

- The next step is to add input data source to the case. - The Add Data Source Wizard will start automatically after the case is created or you can manually start it from the "File" menu or toolbar. - You will need to choose the type of input data source to add (image, local disk or logical files and folders). - Next, supply it with the location of the source to add. -

-
    -
  • For a disk image, browse to the first file in the set (Autopsy will find the rest of the files). Autopsy currently supports E01 and raw (dd) files. -
  • -
  • - For local disk, select one of the detected disks. - Autopsy will add the current view of the disk to the case (i.e. snapshot of the meta-data). - However, the individual file content (not meta-data) does get updated with the changes made to the disk. - Note, you may need run Autopsy as an Administrator to detect all disks. -
  • -
  • For logical files (a single file or folder of files), use the "Add" button to add one or more files or folders on your system to the case. Folders will be recursively added to the case.
  • -
- - -

- There are a couple of options in the wizard that will allow you to make the ingest process faster. - These typically deal with deleted files. - It will take longer if unallocated space is analyzed and the entire drive is searched for deleted files. - In some scenarios, these recovery steps must be performed and in other scenarios these steps are not needed and instead fast results on the allocated files are needed. - Use these options to control how long the analysis will take. -

+\subsection s4 Ingest Modules -

- Autopsy will start to analyze these data sources and add them to the case and internal database. While it is doing that, it will prompt you to configure the Ingest Modules.

+You will next be prompted to configure the Ingest Modules. Ingest modules will run in the background and perform specific tasks. The Ingest Modules analyze files in a prioritized order so that files in a user's directory are analyzed before files in other folders. Ingest modules can be developed by third-parties and here are some of the standard ingest modules that come with Autopsy: + +- Recent Activity extracts user activity as saved by web browsers and the OS. Also runs regripper on the registry hive. +- Hash Lookup uses hash databases to ignore known files from the NIST NSRL and flag known bad files. Use the "Advanced" button to add and configure the hash databases to use during this process. You will get updates on known bad file hits as the ingest occurs. You can later add hash databases via the Tools -> Options menu in the main UI. You can download an index of the NIST NSRL from http://sourceforge.net/projects/autopsy/files/NSRL/ +- Keyword Search uses keyword lists to identify files with specific words in them. You can select the keyword lists to search for automatically and you can create new lists using the "Advanced" button. Note that with keyword search, you can always conduct searches after ingest has finished. The keyword lists that you select during ingest will be searched for at periodic intervals and you will get the results in real-time. You do not need to wait for all files to be indexed. +- Archive Extractor opens ZIP, RAR, and other archive formats and sends the files from those archive files back through the pipelines for analysis. +- Exif Image Parser extracts EXIF information from JPEG files and posts the results into the tree in the main UI. +- Thunderbird Parser Identifies Thunderbird MBOX files and extracts the e-mails from them. -

Ingest Modules

-

- You will next be prompted to configure the Ingest Modules. - Ingest modules will run in the background and perform specific tasks. - The Ingest Modules analyze files in a prioritized order so that files in a user's directory are analyzed before files in other folders. - Ingest modules can be developed by third-parties and here are some of the standard ingest modules that come with Autopsy: -

-
    -
  • Recent Activity - extracts user activity as saved by web browsers and the OS. Also runs regripper on the registry hive. -
  • -
  • Hash Lookup - uses hash databases to ignore known files from the NIST NSRL and flag known bad files. - Use the "Advanced" button to add and configure the hash databases to use during this process. - You will get updates on known bad file hits as the ingest occurs. You can later add hash databases - via the Tools -> Options menu in the main UI. You can download an index of the NIST NSRL from - here. -
  • -
  • Keyword Search - uses keyword lists to identify files with specific words in them. - You can select the keyword lists to search for automatically and you can create new lists using the "Advanced" button. - Note that with keyword search, you can always conduct searches after ingest has finished. - The keyword lists that you select during ingest will be searched for at periodic intervals and you will get the results in real-time. - You do not need to wait for all files to be indexed. -
  • -
  • Archive Extractor opens ZIP, RAR, and other archive formats and sends the files from those archive files back - through the pipelines for analysis.
  • -
  • Exif Image Parser extracts EXIF information from JPEG files and posts the results into the tree in the main UI.
  • -
  • Thunderbird Parser Identifies Thunderbird MBOX files and extracts the e-mails from them.
  • -
-

- When you select a module, you will have the option to change its settings. - For example, you can configure which keyword search lists to use during ingest and which hash databases to use. - Refer to the help system inside of Autopsy for details on configuring each module. -

-

- While ingest modules are running in the background, you will see a progress bar in the lower right. - You can use the GUI to review incoming results and perform other tasks while ingest at that time. -

+When you select a module, you will have the option to change its settings. For example, you can configure which keyword search lists to use during ingest and which hash databases to use. Refer to the help system inside of Autopsy for details on configuring each module. + +While ingest modules are running in the background, you will see a progress bar in the lower right. You can use the GUI to review incoming results and perform other tasks while ingest at that time. + +\section s1a Analysis Basics + +\image html screenshot.png + +You will start all of your analysis techniques from the tree on the left. + +- The Data Sources root node shows all data in the case. + - The individual image nodes show the file system structure of the disk images or local disks in the case. + - The LogicalFileSet nodes show the logical files in the case. +- The Views node shows the same data from a file type or timeline perspective. +- The Results node shows the output from the ingest modules. + +When you select a node from the tree on the left, a list of files will be shown in the upper right. You can use the Thumbnail view in the upper right to view the pictures. When you select a file from the upper right, its contents will be shown in the lower right. You can use the tabs in the lower right to view the text of the file, an image, or the hex data. + +If you are viewing files from the Views and Results nodes, you can right-click on a file to go to its file system location. This feature is useful to see what else the user stored in the same folder as the file that you are currently looking at. You can also right click on a file to extract it to the local system. + +If you want to search for single keywords, then you can use the search box in the upper right of the program. The results will be shown in a table in the upper right. + +You can tag (or bookmark) arbitrary files so that you can more quickly find them later or so that you can include them specifically in a report. + +\subsection s2a Ingest Inbox + +As you are going through the results in the tree, the ingest modules are running in the background. +The results are shown in the tree as soon as the ingest modules find them and report them. + +The Ingest Inbox receives messages from the ingest modules as they find results. +You can open the inbox to see what has been recently found. +It keeps track of what messages you have read. + +The intended use of this inbox is that you can focus on some data for a while and then check back on the inbox at a time that is convenient for them. +You can then see what else was found while you were focused on the previous task. +You may learn that a known bad file was found or that a file was found with a relevant keyword and then decide to focus on that for a while. + +When you select a message, you can then jump to the Results tree where more details can be found or jump to the file's location in the filesystem. + +\subsection s2b Timeline (beta) +There is a basic timeline view that you can access via the Tools -> Make Timeline feature. This will take a few minutes to create the timeline for analysis. Its features are still in development. -

Analysis Basics

+\section s5 Example Use Cases +In this section, we will provide examples of how to do common analysis tasks. +\subsection s5a Web Artifacts - Autopsy Screenshot -

You will start all of your analysis techniques from the tree on the left.

-
    -
  • The Data Sources root node shows all data in the case.
  • -
      -
    • The individual image nodes show the file system structure of the disk images or local disks in the case.
    • -
    • The LogicalFileSet nodes show the logical files in the case.
    • -
    -
  • The Views node shows the same data from a file type or timeline perspective.
  • -
  • The Results node shows the output from the ingest modules.
  • -
+If you want to view the user's recent web activity, make sure that the Recent Activity ingest module was enabled. +You can then go to the "Results " node in the tree on the left and then into the "Extracted Data" node. +There, you can find bookmarks, cookies, downloads, and history. -

- When you select a node from the tree on the left, a list of files will be shown in the upper right. - You can use the Thumbnail view in the upper right to view the pictures. - When you select a file from the upper right, its contents will be shown in the lower right. - You can use the tabs in the lower right to view the text of the file, an image, or the hex data. -

+\subsection s5b Known Bad Hash Files -

- If you are viewing files from the Views and Results nodes, you can right-click on a file to go to its file system location. - This feature is useful to see what else the user stored in the same folder as the file that you are currently looking at. - You can also right click on a file to extract it to the local system. -

-

- If you want to search for single keywords, then you can use the search box in the upper right of the program. - The results will be shown in a table in the upper right. -

+If you want to see if the data source had known bad files, make sure that the Hash Lookup ingest module was enabled. +You can then view the "Hashset Hits" section in the "Results" area of the tree on the left. +Note that hash lookup can take a long time, so this section will be updated as long as the ingest process is occurring. +Use the Ingest Inbox to keep track of what known bad files were recently found. -

You can tag (or bookmark) arbitrary files so that you can more quickly find them later or so that you can include them specifically in a report.

+When you find a known bad file in this interface, you may want to right click on the file to also view the file's original location. +You may find additional files that are relevant and stored in the same folder as this file. -

Ingest Inbox

-

- As you are going through the results in the tree, the ingest modules are running in the background. - The results are shown in the tree as soon as the ingest modules find them and report them. -

-

- The Ingest Inbox receives messages from the ingest modules as they find results. - You can open the inbox to see what has been recently found. - It keeps track of what messages you have read. -

-

- The intended use of this inbox is that you can focus on some data for a while and then check back on the inbox at a time that is convenient for them. - You can then see what else was found while you were focused on the previous task. - You may learn that a known bad file was found or that a file was found with a relevant keyword and then decide to focus on that for a while. -

-

When you select a message, you can then jump to the Results tree where more details can be found or jump to the file's location in the filesystem.

+\subsection s5c Media: Images and Videos -

Timeline (Beta)

-

There is a basic timeline view that you can access via the Tools -> Make Timeline feature. This will take a few minutes to create the timeline for analysis. Its features are still in development.

+If you want to see all images and video on the disk image, then go to the "Views" section in the tree on the left and then "File Types". +Select either "Images" or "Videos". +You can use the thumbnail option in the upper right to view thumbnails of all images. +Note: We are working on making this more efficient when there are lots of images and we are working on the feature to display video thumbnails. -

Example Use Cases

-

In this section, we will provide examples of how to do common analysis tasks.

- -

Web Artifacts

-

- If you want to view the user's recent web activity, make sure that the Recent Activity ingest module was enabled. - You can then go to the "Results " node in the tree on the left and then into the "Extracted Data" node. - There, you can find bookmarks, cookies, downloads, and history. -

- -

Known Bad Hash Files

-

- If you want to see if the data source had known bad files, make sure that the Hash Lookup ingest module was enabled. - You can then view the "Hashset Hits" section in the "Results" area of the tree on the left. - Note that hash lookup can take a long time, so this section will be updated as long as the ingest process is occurring. - Use the Ingest Inbox to keep track of what known bad files were recently found. -

-

- When you find a known bad file in this interface, you may want to right click on the file to also view the file's original location. - You may find additional files that are relevant and stored in the same folder as this file. -

- -

Media: Images and Videos

-

- If you want to see all images and video on the disk image, then go to the "Views" section in the tree on the left and then "File Types". - Select either "Images" or "Videos". - You can use the thumbnail option in the upper right to view thumbnails of all images. -

-
    -
  • Note: - We are working on making this more efficient when there are lots of images and we are working on the feature to display video thumbnails. -
  • -
-

You can select an image or video from the upper right and view the video or image in the lower right. Video will be played with sound.

- - -

Reporting

-

- A final report can be generated that will include all analysis results. - Use the "Generate Report" button to create this. - It will create an HTML or XLS report in the Reports folder of the case folder. - If you forgot the location of your case folder, you can determine it using the "Case Properties" option in the "File" menu. - There is also an option to export report files to a separate folder outside of the case folder. -

- -
-

Copyright © 2012-2013 Basis Technology.

-

- This work is licensed under a - Creative Commons Attribution-Share Alike 3.0 United States License. -

+You can select an image or video from the upper right and view the video or image in the lower right. Video will be played with sound. +\section s6 Reporting +A final report can be generated that will include all analysis results. +Use the "Generate Report" button to create this. +It will create an HTML or XLS report in the Reports folder of the case folder. +If you forgot the location of your case folder, you can determine it using the "Case Properties" option in the "File" menu. +There is also an option to export report files to a separate folder outside of the case folder. */ diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile index f0306e6804..5b0b6be632 100644 --- a/docs/doxygen/Doxyfile +++ b/docs/doxygen/Doxyfile @@ -915,7 +915,7 @@ HTML_HEADER = # each generated HTML page. If it is left blank doxygen will generate a # standard footer. -HTML_FOOTER = +HTML_FOOTER = footer.html # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to diff --git a/docs/doxygen/main.dox b/docs/doxygen/main.dox index feffe9894d..4829af7959 100644 --- a/docs/doxygen/main.dox +++ b/docs/doxygen/main.dox @@ -11,7 +11,7 @@ If you want to write modules, then these pages are for you: - \subpage platform_page - Setup - \subpage mod_dev_page - - \subpage mod_dev_python_page + - \subpage mod_dev_py_page - The following are based on specific types of modules: - \subpage mod_ingest_page - \subpage mod_report_page From 1485b0214bf10fbd22270c5168f67428d6bd5157 Mon Sep 17 00:00:00 2001 From: Richard Cordovano Date: Mon, 3 Nov 2014 11:13:58 -0500 Subject: [PATCH 04/10] Add TODO comment for the next round of work on multi-stage ingest --- Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java index a9abef15f7..c9c97d8ab7 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java @@ -452,6 +452,8 @@ final class IngestTasksScheduler { return true; } + // RJCTODO: Is this still necessary? There is code elsewhere to remove and + // re-add the task to the tasks in progress list. /** * A helper method to safely add a file ingest task to the blocking pending * tasks queue. From 6192244236c11e2e79e109faa6d0dab59adcc7ed Mon Sep 17 00:00:00 2001 From: Brian Carrier Date: Mon, 3 Nov 2014 17:52:25 -0500 Subject: [PATCH 05/10] Added reference to addReport --- docs/doxygen/modReport.dox | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/doxygen/modReport.dox b/docs/doxygen/modReport.dox index a5425f5e44..c61471f5f1 100644 --- a/docs/doxygen/modReport.dox +++ b/docs/doxygen/modReport.dox @@ -66,6 +66,10 @@ Autopsy will also display the panel returned by getConfigurationPanel() in the g Typically a general report module should interact with both the Blackboard API in the org.sleuthkit.datamodel.SleuthkitCase class, in addition to an API (possibly external/thirdparty) to convert Blackboard Artifacts to the desired reporting format. +\subsection report_create_module_showing Showing Results + +You should call Case.addReport() with the path to your report so that it is shown in the Autopsy tree. You can specify a specific file or folder and the user can then view it later. + \subsection report_create_module_layer Installing your Report Module Report modules developed using Java must be registered in a layer.xml file. This file allows Autopsy to find the report module. From 3b9b10ea2ae4488a858de5ef030e3edbc598be83 Mon Sep 17 00:00:00 2001 From: Richard Cordovano Date: Mon, 3 Nov 2014 23:56:12 -0500 Subject: [PATCH 06/10] Complete first version of multi-stage ingest --- .../DataSourceIngestModuleProgress.java | 2 +- .../sleuthkit/autopsy/ingest/IngestJob.java | 442 ++++++++++-------- .../autopsy/ingest/IngestJobContext.java | 18 +- .../autopsy/ingest/IngestManager.java | 79 ++-- .../ingest/IngestModuleFactoryLoader.java | 4 +- .../ingest/IngestPipelinesConfiguration.java | 10 +- .../autopsy/ingest/IngestTasksScheduler.java | 324 ++++++------- .../autopsy/ingest/PipelineConfig.xml | 1 + .../PhotoRecCarverIngestModuleFactory.java | 1 - 9 files changed, 442 insertions(+), 439 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java index 1fa66aa07c..a972fd86a4 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestModuleProgress.java @@ -67,7 +67,7 @@ public class DataSourceIngestModuleProgress { * @param message Message to display */ public void progress(String message) { - this.job.advanceDataSourceIngestProgressBar(message); // RJCTODO: Is this right? + this.job.advanceDataSourceIngestProgressBar(message); } /** diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java index c050fd0a1c..fce16f5bb6 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java @@ -42,23 +42,35 @@ import org.sleuthkit.datamodel.Content; */ final class IngestJob { - private static final Logger logger = Logger.getLogger(IngestJob.class.getName()); - private static final IngestTasksScheduler ingestScheduler = IngestTasksScheduler.getInstance(); - - // These static fields are used for the creation and management of ingest - // jobs in progress. - private static volatile boolean jobCreationIsEnabled; - private static final AtomicLong nextIngestJobId = new AtomicLong(0L); - private static final ConcurrentHashMap ingestJobsById = new ConcurrentHashMap<>(); - - // An ingest job may have multiple stages. + /** + * An ingest job may have multiple stages. + */ private enum Stages { - FIRST, // High priority data source ingest modules plus file ingest modules - SECOND // Low priority data source ingest modules + /** + * High priority data source ingest modules and file ingest modules. + */ + FIRST, + /** + * Lower priority, usually long-running, data source ingest modules. + */ + SECOND }; - // These fields define the ingest job and the work it entails. + private static final Logger logger = Logger.getLogger(IngestJob.class.getName()); + private static final IngestTasksScheduler taskScheduler = IngestTasksScheduler.getInstance(); + + /** + * These static fields are used for the creation and management of ingest + * jobs in progress. + */ + private static volatile boolean jobCreationIsEnabled; + private static final AtomicLong nextJobId = new AtomicLong(0L); + private static final ConcurrentHashMap jobsById = new ConcurrentHashMap<>(); + + /** + * These fields define the ingest job and the work it entails. + */ private final long id; private final Content dataSource; private final boolean processUnallocatedSpace; @@ -68,10 +80,12 @@ final class IngestJob { private DataSourceIngestPipeline secondStageDataSourceIngestPipeline; private final LinkedBlockingQueue fileIngestPipelines; - // These fields are used to update the ingest progress UI components. The - // filesInProgress collection contains the names of the files that are in - // the file ingest pipelines and the two file counter fields are used to - // update the file ingest progress bar. + /** + * These fields are used to update ingest progress UI components for the + * job. The filesInProgress collection contains the names of the files that + * are in the file ingest pipelines and the two file counter fields are used + * to update the file ingest progress bar. + */ private ProgressHandle dataSourceIngestProgress; private final Object dataSourceIngestProgressLock; private final List filesInProgress; @@ -80,12 +94,16 @@ final class IngestJob { private ProgressHandle fileIngestProgress; private final Object fileIngestProgressLock; - // These fields support cancellation of either the currently running data - // source ingest module or the entire ingest job. + /** + * These fields support cancellation of either the currently running data + * source ingest module or the entire ingest job. + */ private volatile boolean currentDataSourceIngestModuleCancelled; private volatile boolean cancelled; - // This field is used for generating ingest job diagnostic data. + /** + * This field is used for generating ingest job diagnostic data. + */ private final long startTime; /** @@ -98,29 +116,27 @@ final class IngestJob { } /** - * Creates an ingest job for a data source. + * Starts an ingest job for a data source. * * @param dataSource The data source to ingest. * @param ingestModuleTemplates The ingest module templates to use to create * the ingest pipelines for the job. * @param processUnallocatedSpace Whether or not the job should include * processing of unallocated space. - * * @return A collection of ingest module start up errors, empty on success. - * - * @throws InterruptedException */ - static List startJob(Content dataSource, List ingestModuleTemplates, boolean processUnallocatedSpace) throws InterruptedException { + static List startJob(Content dataSource, List ingestModuleTemplates, boolean processUnallocatedSpace) { List errors = new ArrayList<>(); if (IngestJob.jobCreationIsEnabled) { - long jobId = nextIngestJobId.incrementAndGet(); + long jobId = nextJobId.incrementAndGet(); IngestJob job = new IngestJob(jobId, dataSource, processUnallocatedSpace); + IngestJob.jobsById.put(jobId, job); errors = job.start(ingestModuleTemplates); - if (errors.isEmpty() && (job.hasDataSourceIngestPipeline() || job.hasFileIngestPipeline())) { // RJCTODO: What about 2nd stage only? - ingestJobsById.put(jobId, job); + if (errors.isEmpty() && job.hasIngestPipeline()) { IngestManager.getInstance().fireIngestJobStarted(jobId); - IngestJob.ingestScheduler.scheduleIngestTasks(job); - logger.log(Level.INFO, "Ingest job {0} started", jobId); + IngestJob.logger.log(Level.INFO, "Ingest job {0} started", jobId); + } else { + IngestJob.jobsById.remove(jobId); } } return errors; @@ -132,27 +148,27 @@ final class IngestJob { * @return True or false. */ static boolean ingestJobsAreRunning() { - return !ingestJobsById.isEmpty(); + return !jobsById.isEmpty(); } /** - * RJCTODO + * Gets snapshots of the state of all running ingest jobs. * - * @return + * @return A list of ingest job state snapshots. */ static List getJobSnapshots() { List snapShots = new ArrayList<>(); - for (IngestJob job : IngestJob.ingestJobsById.values()) { + for (IngestJob job : IngestJob.jobsById.values()) { snapShots.add(job.getIngestJobSnapshot()); } return snapShots; } /** - * RJCTODO + * Cancels all running ingest jobs. */ static void cancelAllJobs() { - for (IngestJob job : ingestJobsById.values()) { + for (IngestJob job : jobsById.values()) { job.cancel(); } } @@ -165,7 +181,7 @@ final class IngestJob { * @param processUnallocatedSpace Whether or not unallocated space should be * processed during the ingest job. */ - IngestJob(long id, Content dataSource, boolean processUnallocatedSpace) { + private IngestJob(long id, Content dataSource, boolean processUnallocatedSpace) { this.id = id; this.dataSource = dataSource; this.processUnallocatedSpace = processUnallocatedSpace; @@ -178,9 +194,9 @@ final class IngestJob { } /** - * Gets the identifier assigned to the ingest job. + * Gets the identifier assigned to this job. * - * @return The ingest job identifier. + * @return The job identifier. */ long getId() { return this.id; @@ -206,51 +222,12 @@ final class IngestJob { } /** - * Starts up the ingest pipelines and ingest progress bars. - * - * @return A collection of ingest module startup errors, empty on success. - * @throws InterruptedException - */ - List start(List ingestModuleTemplates) throws InterruptedException { - this.createIngestPipelines(ingestModuleTemplates); - List errors = startUpIngestPipelines(); - if (errors.isEmpty()) { - if (!this.dataSourceIngestPipeline.isEmpty()) { - this.startDataSourceIngestProgressBar(); - } - if (!this.fileIngestPipelines.peek().isEmpty()) { - this.startFileIngestProgressBar(); - } - } - return errors; - } - - /** - * Checks to see if this job has a data source ingest pipeline. - * - * @return True or false. - */ - boolean hasDataSourceIngestPipeline() { - return (this.dataSourceIngestPipeline.isEmpty() == false); - } - - /** - * Checks to see if the job has a file ingest pipeline. - * - * @return True or false. - */ - boolean hasFileIngestPipeline() { - return (this.fileIngestPipelines.peek().isEmpty() == false); - } - - /** - * Passes the data source for this job through the data source ingest + * Passes the data source for this job through a data source ingest * pipeline. * * @param task A data source ingest task wrapping the data source. - * @throws InterruptedException */ - void process(DataSourceIngestTask task) throws InterruptedException { + void process(DataSourceIngestTask task) { try { if (!this.isCancelled() && !this.dataSourceIngestPipeline.isEmpty()) { List errors = new ArrayList<>(); @@ -268,9 +245,12 @@ final class IngestJob { } } } finally { - // No matter what happens, let the ingest scheduler know that this - // task is completed. - IngestJob.ingestScheduler.notifyTaskCompleted(task); + // No matter what happens, let the task scheduler know that this + // task is completed and check for job completion. + IngestJob.taskScheduler.notifyTaskCompleted(task); + if (IngestJob.taskScheduler.tasksForJobAreCompleted(this)) { + this.handleTasksCompleted(); + } } } @@ -284,11 +264,15 @@ final class IngestJob { void process(FileIngestTask task) throws InterruptedException { try { if (!this.isCancelled()) { - // Get a file ingest pipeline not currently in use by another - // file ingest thread. + /** + * Get a file ingest pipeline not currently in use by another + * file ingest thread. + */ FileIngestPipeline pipeline = this.fileIngestPipelines.take(); if (!pipeline.isEmpty()) { - // Get the file to process. + /** + * Get the file to process. + */ AbstractFile file = task.getFile(); // Update the file ingest progress bar. @@ -328,46 +312,28 @@ final class IngestJob { this.fileIngestPipelines.put(pipeline); } } finally { - // No matter what happens, let the ingest scheduler know that this - // task is completed. - IngestJob.ingestScheduler.notifyTaskCompleted(task); - } - } - - /** - * - * @param file - */ - void addFiles(List files) { - // RJCTODO: Add handling of lack of support for file ingest in second stage - for (AbstractFile file : files) { - try { - // RJCTODO: Deal with possible IllegalStateException; maybe don't need logging here - IngestJob.ingestScheduler.scheduleFileIngestTask(this, file); - } catch (InterruptedException ex) { - // Handle the unexpected interrupt here rather than make ingest - // module writers responsible for writing this exception handler. - // The interrupt flag of the thread is reset for detection by - // the thread task code. - Thread.currentThread().interrupt(); - IngestJob.logger.log(Level.SEVERE, "File task scheduling unexpectedly interrupted", ex); //NON-NLS + // No matter what happens, let the task scheduler know that this + // task is completed and check for job completion. + IngestJob.taskScheduler.notifyTaskCompleted(task); + if (IngestJob.taskScheduler.tasksForJobAreCompleted(this)) { + this.handleTasksCompleted(); } } } /** - * Allows the ingest tasks scheduler to notify this ingest job whenever all - * the scheduled tasks for this ingest job have been completed. + * Adds more files to an ingest job, i.e., derived or carved files. Not + * currently supported for the second stage of the job. + * + * @param files A list of files to add. */ - void notifyTasksCompleted() { - switch (this.stage) { - case FIRST: - this.finishFirstStage(); - this.startSecondStage(); - break; - case SECOND: - this.finish(); - break; + void addFiles(List files) { + if (IngestJob.Stages.FIRST == this.stage) { + for (AbstractFile file : files) { + IngestJob.taskScheduler.scheduleFileIngestTask(this, file); + } + } else { + IngestJob.logger.log(Level.SEVERE, "Adding files during second stage not supported"); //NON-NLS } } @@ -432,7 +398,6 @@ final class IngestJob { } } - // RJCTODO: Is this right? /** * Updates the data source ingest progress bar display name. * @@ -527,8 +492,10 @@ final class IngestJob { this.cancelled = true; - // Tell the ingest scheduler to cancel all pending tasks. - IngestJob.ingestScheduler.cancelPendingTasksForIngestJob(this); + /** + * Tell the task scheduler to cancel all pending tasks. + */ + IngestJob.taskScheduler.cancelPendingTasksForIngestJob(this); } /** @@ -541,25 +508,13 @@ final class IngestJob { return this.cancelled; } - /** - * Get some basic performance statistics on this job. - * - * @return An ingest job statistics object. - */ - IngestJobSnapshot getIngestJobSnapshot() { - return new IngestJobSnapshot(); - } - /** * Creates the file and data source ingest pipelines. * * @param ingestModuleTemplates Ingest module templates to use to populate * the pipelines. - * @throws InterruptedException */ - private void createIngestPipelines(List ingestModuleTemplates) throws InterruptedException { - // RJCTODO: Improve variable names! - + private void createIngestPipelines(List ingestModuleTemplates) { // Make mappings of ingest module factory class names to templates. Map dataSourceModuleTemplates = new HashMap<>(); Map fileModuleTemplates = new HashMap<>(); @@ -595,9 +550,18 @@ final class IngestJob { this.dataSourceIngestPipeline = firstStageDataSourceIngestPipeline; // Construct the file ingest pipelines. - int numberOfFileIngestThreads = IngestManager.getInstance().getNumberOfFileIngestThreads(); - for (int i = 0; i < numberOfFileIngestThreads; ++i) { - this.fileIngestPipelines.put(new FileIngestPipeline(this, fileIngestModuleTemplates)); + try { + int numberOfFileIngestThreads = IngestManager.getInstance().getNumberOfFileIngestThreads(); + for (int i = 0; i < numberOfFileIngestThreads; ++i) { + this.fileIngestPipelines.put(new FileIngestPipeline(this, fileIngestModuleTemplates)); + } + } catch (InterruptedException ex) { + /** + * The current thread was interrupted while blocked on a full queue. + * Blocking should never happen here, but reset the interrupted flag + * rather than just swallowing the exception. + */ + Thread.currentThread().interrupt(); } } @@ -623,14 +587,121 @@ final class IngestJob { return templates; } + /** + * Starts up the ingest pipelines and ingest progress bars. + * + * @return A collection of ingest module startup errors, empty on success. + */ + private List start(List ingestModuleTemplates) { + this.createIngestPipelines(ingestModuleTemplates); + List errors = startUpIngestPipelines(); + if (errors.isEmpty()) { + if (this.hasFirstStageDataSourceIngestPipeline() || this.hasFileIngestPipeline()) { + // There is at least one first stage pipeline. + this.startFirstStage(); + } else if (this.hasSecondStageDataSourceIngestPipeline()) { + // There is no first stage pipeline, but there is a second stage + // ingest pipeline. + this.startSecondStage(); + } + } + return errors; + } + + /** + * Starts the first stage of the job. + */ + private void startFirstStage() { + this.stage = IngestJob.Stages.FIRST; + + /** + * Start one or both of the first stage progress bars. + */ + if (this.hasFirstStageDataSourceIngestPipeline()) { + this.startDataSourceIngestProgressBar(); + } + if (this.hasFileIngestPipeline()) { + this.startFileIngestProgressBar(); + } + + /** + * Schedule the first stage tasks. + */ + if (this.hasFirstStageDataSourceIngestPipeline() && this.hasFileIngestPipeline()) { + IngestJob.taskScheduler.scheduleIngestTasks(this); + } else if (this.hasFirstStageDataSourceIngestPipeline()) { + IngestJob.taskScheduler.scheduleDataSourceIngestTask(this); + } else { + IngestJob.taskScheduler.scheduleFileIngestTasks(this); + + /** + * No data source ingest task has been scheduled for this stage, and + * it is possible, if unlikely, that no file ingest tasks were + * actually scheduled since there are files that get filtered out by + * the tasks scheduler. In this special case, an ingest thread will + * never get to make the following check for this stage of the job. + */ + if (IngestJob.taskScheduler.tasksForJobAreCompleted(this)) { + this.handleTasksCompleted(); + } + } + } + + /** + * Starts the second stage of the ingest job. + */ + private void startSecondStage() { + this.stage = IngestJob.Stages.SECOND; + this.startDataSourceIngestProgressBar(); + this.dataSourceIngestPipeline = this.secondStageDataSourceIngestPipeline; + IngestJob.taskScheduler.scheduleDataSourceIngestTask(this); + } + + /** + * Checks to see if this job has at least one ingest pipeline. + * + * @return True or false. + */ + private boolean hasIngestPipeline() { + return this.hasFirstStageDataSourceIngestPipeline() + || this.hasFileIngestPipeline() + || this.hasSecondStageDataSourceIngestPipeline(); + } + + /** + * Checks to see if this job has a first stage data source ingest pipeline. + * + * @return True or false. + */ + private boolean hasFirstStageDataSourceIngestPipeline() { + return (this.firstStageDataSourceIngestPipeline.isEmpty() == false); + } + + /** + * Checks to see if this job has a second stage data source ingest pipeline. + * + * @return True or false. + */ + private boolean hasSecondStageDataSourceIngestPipeline() { + return (this.secondStageDataSourceIngestPipeline.isEmpty() == false); + } + + /** + * Checks to see if the job has a file ingest pipeline. + * + * @return True or false. + */ + private boolean hasFileIngestPipeline() { + return (this.fileIngestPipelines.peek().isEmpty() == false); + } + /** * Starts up each of the file and data source ingest modules to collect * possible errors. * * @return A collection of ingest module startup errors, empty on success. - * @throws InterruptedException */ - private List startUpIngestPipelines() throws InterruptedException { + private List startUpIngestPipelines() { List errors = new ArrayList<>(); // Start up the first stage data source ingest pipeline. @@ -725,8 +796,23 @@ final class IngestJob { } /** - * Shuts down the file ingest pipelines and current progress bars, if any, - * for this job. + * Handles when all ingest tasks for this job are completed by finishing the + * current stage and possibly starting the next stage. + */ + private void handleTasksCompleted() { + switch (this.stage) { + case FIRST: + this.finishFirstStage(); + break; + case SECOND: + this.finish(); + break; + } + } + + /** + * Shuts down the first stage ingest pipelines and progress bars and starts + * the second stage, if appropriate. */ private void finishFirstStage() { // Shut down the file ingest pipelines. Note that no shut down is @@ -758,22 +844,12 @@ final class IngestJob { this.fileIngestProgress = null; } } - } - /** - * RJCTODO - */ - private void startSecondStage() { - this.stage = IngestJob.Stages.SECOND; - if (!this.cancelled && !this.secondStageDataSourceIngestPipeline.isEmpty()) { - this.dataSourceIngestPipeline = this.secondStageDataSourceIngestPipeline; - this.startDataSourceIngestProgressBar(); - try { - IngestJob.ingestScheduler.scheduleDataSourceIngestTask(this); - } catch (InterruptedException ex) { - // RJCTODO: - this.finish(); - } + /** + * Start the second stage, if appropriate. + */ + if (!this.cancelled && this.hasSecondStageDataSourceIngestPipeline()) { + this.startSecondStage(); } else { this.finish(); } @@ -792,7 +868,7 @@ final class IngestJob { } } - IngestJob.ingestJobsById.remove(this.id); + IngestJob.jobsById.remove(this.id); if (!this.isCancelled()) { logger.log(Level.INFO, "Ingest job {0} completed", this.id); IngestManager.getInstance().fireIngestJobCompleted(this.id); @@ -821,6 +897,15 @@ final class IngestJob { this.currentDataSourceIngestModuleCancelled = true; } + /** + * Gets a snapshot of this jobs state and performance. + * + * @return An ingest job statistics object. + */ + private IngestJobSnapshot getIngestJobSnapshot() { + return new IngestJobSnapshot(); + } + /** * Stores basic diagnostic statistics for an ingest job. */ @@ -847,23 +932,13 @@ final class IngestJob { this.estimatedFilesToProcess = IngestJob.this.estimatedFilesToProcess; this.snapShotTime = new Date().getTime(); } - this.tasksSnapshot = IngestJob.ingestScheduler.getTasksSnapshotForJob(this.jobId); + this.tasksSnapshot = IngestJob.taskScheduler.getTasksSnapshotForJob(this.jobId); } - /** - * RJCTODO - * - * @return - */ long getJobId() { return this.jobId; } - /** - * RJCTODO - * - * @return - */ String getDataSource() { return dataSource; } @@ -916,47 +991,22 @@ final class IngestJob { return estimatedFilesToProcess; } - /** - * RJCTODO - * - * @return - */ long getRootQueueSize() { return this.tasksSnapshot.getRootQueueSize(); } - /** - * RJCTODO - * - * @return - */ long getDirQueueSize() { - return this.tasksSnapshot.getDirQueueSize(); + return this.tasksSnapshot.getDirectoryTasksQueueSize(); } - /** - * RJCTODO - * - * @return - */ long getFileQueueSize() { return this.tasksSnapshot.getFileQueueSize(); } - /** - * RJCTODO - * - * @return - */ long getDsQueueSize() { return this.tasksSnapshot.getDsQueueSize(); } - /** - * RJCTODO - * - * @return - */ long getRunningListSize() { return this.tasksSnapshot.getRunningListSize(); } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java index 6d81f9db79..6587a20d19 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestJobContext.java @@ -19,7 +19,6 @@ package org.sleuthkit.autopsy.ingest; import java.util.List; -import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.Content; @@ -29,7 +28,6 @@ import org.sleuthkit.datamodel.Content; */ public final class IngestJobContext { - private static final Logger logger = Logger.getLogger(IngestJobContext.class.getName()); private final IngestJob ingestJob; IngestJobContext(IngestJob ingestJob) { @@ -101,25 +99,25 @@ public final class IngestJobContext { } /** - * Adds one or more files to the files to be passed through the file ingest - * pipeline of the ingest job associated with this context. + * Adds one or more files, i.e., extracted or carved files, to the ingest + * job associated with this context. * - * @param files The files to be processed by the file ingest pipeline. + * @param files The files to be added. * @deprecated use addFilesToJob() instead */ @Deprecated public void scheduleFiles(List files) { this.addFilesToJob(files); } - + /** - * Adds one or more files to the files to be passed through the file ingest - * pipeline of the ingest job associated with this context. + * Adds one or more files, i.e., extracted or carved files, to the ingest + * job associated with this context. * - * @param files The files to be processed by the file ingest pipeline. + * @param files The files to be added. */ public void addFilesToJob(List files) { this.ingestJob.addFiles(files); } - + } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java index b04f973d70..ccee8b482e 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java @@ -134,7 +134,7 @@ public class IngestManager { */ private void startDataSourceIngestTask() { long threadId = nextThreadId.incrementAndGet(); - dataSourceIngestThreadPool.submit(new ExecuteIngestTasksTask(threadId, IngestTasksScheduler.getInstance().getDataSourceIngestTaskQueue())); + dataSourceIngestThreadPool.submit(new ExecuteIngestTasksRunnable(threadId, IngestTasksScheduler.getInstance().getDataSourceIngestTaskQueue())); ingestThreadActivitySnapshots.put(threadId, new IngestThreadActivitySnapshot(threadId)); } @@ -144,7 +144,7 @@ public class IngestManager { */ private void startFileIngestTask() { long threadId = nextThreadId.incrementAndGet(); - fileIngestThreadPool.submit(new ExecuteIngestTasksTask(threadId, IngestTasksScheduler.getInstance().getFileIngestTaskQueue())); + fileIngestThreadPool.submit(new ExecuteIngestTasksRunnable(threadId, IngestTasksScheduler.getInstance().getFileIngestTaskQueue())); ingestThreadActivitySnapshots.put(threadId, new IngestThreadActivitySnapshot(threadId)); } @@ -154,7 +154,7 @@ public class IngestManager { } long taskId = nextThreadId.incrementAndGet(); - Future task = startIngestJobsThreadPool.submit(new StartIngestJobsTask(taskId, dataSources, moduleTemplates, processUnallocatedSpace)); + Future task = startIngestJobsThreadPool.submit(new StartIngestJobsCallable(taskId, dataSources, moduleTemplates, processUnallocatedSpace)); startIngestJobsTasks.put(taskId, task); } @@ -200,11 +200,11 @@ public class IngestManager { return IngestJob.ingestJobsAreRunning(); } - /** * Called each time a module in a data source pipeline starts + * * @param task - * @param ingestModuleDisplayName + * @param ingestModuleDisplayName */ void setIngestTaskProgress(DataSourceIngestTask task, String ingestModuleDisplayName) { ingestThreadActivitySnapshots.put(task.getThreadId(), new IngestThreadActivitySnapshot(task.getThreadId(), task.getIngestJob().getId(), ingestModuleDisplayName, task.getDataSource())); @@ -212,20 +212,22 @@ public class IngestManager { /** * Called each time a module in a file ingest pipeline starts + * * @param task - * @param ingestModuleDisplayName + * @param ingestModuleDisplayName */ void setIngestTaskProgress(FileIngestTask task, String ingestModuleDisplayName) { IngestThreadActivitySnapshot prevSnap = ingestThreadActivitySnapshots.get(task.getThreadId()); IngestThreadActivitySnapshot newSnap = new IngestThreadActivitySnapshot(task.getThreadId(), task.getIngestJob().getId(), ingestModuleDisplayName, task.getDataSource(), task.getFile()); ingestThreadActivitySnapshots.put(task.getThreadId(), newSnap); - + incrementModuleRunTime(prevSnap.getActivity(), newSnap.getStartTime().getTime() - prevSnap.getStartTime().getTime()); } /** * Called each time a data source ingest task completes - * @param task + * + * @param task */ void setIngestTaskProgressCompleted(DataSourceIngestTask task) { ingestThreadActivitySnapshots.put(task.getThreadId(), new IngestThreadActivitySnapshot(task.getThreadId())); @@ -233,7 +235,8 @@ public class IngestManager { /** * Called when a file ingest pipeline is complete for a given file - * @param task + * + * @param task */ void setIngestTaskProgressCompleted(FileIngestTask task) { IngestThreadActivitySnapshot prevSnap = ingestThreadActivitySnapshots.get(task.getThreadId()); @@ -242,19 +245,21 @@ public class IngestManager { synchronized (processedFilesSnapshotLock) { processedFilesSnapshot.incrementProcessedFilesCount(); } - + incrementModuleRunTime(prevSnap.getActivity(), newSnap.getStartTime().getTime() - prevSnap.getStartTime().getTime()); } - + /** - * Internal method to update the times associated with each module. + * Internal method to update the times associated with each module. + * * @param moduleName - * @param duration + * @param duration */ private void incrementModuleRunTime(String moduleName, Long duration) { - if (moduleName.equals("IDLE")) + if (moduleName.equals("IDLE")) { return; - + } + synchronized (ingestModuleRunTimes) { Long prevTimeL = ingestModuleRunTimes.get(moduleName); long prevTime = 0; @@ -262,12 +267,13 @@ public class IngestManager { prevTime = prevTimeL; } prevTime += duration; - ingestModuleRunTimes.put(moduleName, prevTime); + ingestModuleRunTimes.put(moduleName, prevTime); } } - + /** * Return the list of run times for each module + * * @return Map of module name to run time (in milliseconds) */ Map getModuleRunTimes() { @@ -279,13 +285,13 @@ public class IngestManager { /** * Get the stats on current state of each thread - * @return + * + * @return */ List getIngestThreadActivitySnapshots() { return new ArrayList<>(ingestThreadActivitySnapshots.values()); } - public void cancelAllIngestJobs() { // Stop creating new ingest jobs. for (Future handle : startIngestJobsTasks.values()) { @@ -418,7 +424,7 @@ public class IngestManager { * @param ingestJobId The ingest job id. */ void fireIngestJobStarted(long ingestJobId) { - fireIngestEventsThreadPool.submit(new FireIngestEventTask(ingestJobEventPublisher, IngestJobEvent.STARTED, ingestJobId, null)); + fireIngestEventsThreadPool.submit(new FireIngestEventRunnable(ingestJobEventPublisher, IngestJobEvent.STARTED, ingestJobId, null)); } /** @@ -427,7 +433,7 @@ public class IngestManager { * @param ingestJobId The ingest job id. */ void fireIngestJobCompleted(long ingestJobId) { - fireIngestEventsThreadPool.submit(new FireIngestEventTask(ingestJobEventPublisher, IngestJobEvent.COMPLETED, ingestJobId, null)); + fireIngestEventsThreadPool.submit(new FireIngestEventRunnable(ingestJobEventPublisher, IngestJobEvent.COMPLETED, ingestJobId, null)); } /** @@ -436,7 +442,7 @@ public class IngestManager { * @param ingestJobId The ingest job id. */ void fireIngestJobCancelled(long ingestJobId) { - fireIngestEventsThreadPool.submit(new FireIngestEventTask(ingestJobEventPublisher, IngestJobEvent.CANCELLED, ingestJobId, null)); + fireIngestEventsThreadPool.submit(new FireIngestEventRunnable(ingestJobEventPublisher, IngestJobEvent.CANCELLED, ingestJobId, null)); } /** @@ -445,7 +451,7 @@ public class IngestManager { * @param file The file that is completed. */ void fireFileIngestDone(AbstractFile file) { - fireIngestEventsThreadPool.submit(new FireIngestEventTask(ingestModuleEventPublisher, IngestModuleEvent.FILE_DONE, file.getId(), file)); + fireIngestEventsThreadPool.submit(new FireIngestEventRunnable(ingestModuleEventPublisher, IngestModuleEvent.FILE_DONE, file.getId(), file)); } /** @@ -454,7 +460,7 @@ public class IngestManager { * @param moduleDataEvent A ModuleDataEvent with the details of the posting. */ void fireIngestModuleDataEvent(ModuleDataEvent moduleDataEvent) { - fireIngestEventsThreadPool.submit(new FireIngestEventTask(ingestModuleEventPublisher, IngestModuleEvent.DATA_ADDED, moduleDataEvent, null)); + fireIngestEventsThreadPool.submit(new FireIngestEventRunnable(ingestModuleEventPublisher, IngestModuleEvent.DATA_ADDED, moduleDataEvent, null)); } /** @@ -465,7 +471,7 @@ public class IngestManager { * content. */ void fireIngestModuleContentEvent(ModuleContentEvent moduleContentEvent) { - fireIngestEventsThreadPool.submit(new FireIngestEventTask(ingestModuleEventPublisher, IngestModuleEvent.CONTENT_CHANGED, moduleContentEvent, null)); + fireIngestEventsThreadPool.submit(new FireIngestEventRunnable(ingestModuleEventPublisher, IngestModuleEvent.CONTENT_CHANGED, moduleContentEvent, null)); } /** @@ -509,7 +515,7 @@ public class IngestManager { /** * Creates ingest jobs. */ - private class StartIngestJobsTask implements Callable { + private final class StartIngestJobsCallable implements Callable { private final long threadId; private final List dataSources; @@ -517,7 +523,7 @@ public class IngestManager { private final boolean processUnallocatedSpace; private ProgressHandle progress; - StartIngestJobsTask(long threadId, List dataSources, List moduleTemplates, boolean processUnallocatedSpace) { + StartIngestJobsCallable(long threadId, List dataSources, List moduleTemplates, boolean processUnallocatedSpace) { this.threadId = threadId; this.dataSources = dataSources; this.moduleTemplates = moduleTemplates; @@ -587,9 +593,6 @@ public class IngestManager { break; } } - } catch (InterruptedException ex) { - // Reset interrupted status. - Thread.currentThread().interrupt(); } catch (Exception ex) { logger.log(Level.SEVERE, "Failed to create ingest job", ex); //NON-NLS } finally { @@ -603,12 +606,12 @@ public class IngestManager { /** * A consumer for an ingest task queue. */ - private class ExecuteIngestTasksTask implements Runnable { + private final class ExecuteIngestTasksRunnable implements Runnable { private final long threadId; private final IngestTaskQueue tasks; - ExecuteIngestTasksTask(long threadId, IngestTaskQueue tasks) { + ExecuteIngestTasksRunnable(long threadId, IngestTaskQueue tasks) { this.threadId = threadId; this.tasks = tasks; } @@ -632,7 +635,7 @@ public class IngestManager { /** * Fires ingest events to ingest manager property change listeners. */ - private static class FireIngestEventTask implements Runnable { + private static final class FireIngestEventRunnable implements Runnable { private final PropertyChangeSupport publisher; private final IngestJobEvent jobEvent; @@ -640,7 +643,7 @@ public class IngestManager { private final Object oldValue; private final Object newValue; - FireIngestEventTask(PropertyChangeSupport publisher, IngestJobEvent event, Object oldValue, Object newValue) { + FireIngestEventRunnable(PropertyChangeSupport publisher, IngestJobEvent event, Object oldValue, Object newValue) { this.publisher = publisher; this.jobEvent = event; this.moduleEvent = null; @@ -648,7 +651,7 @@ public class IngestManager { this.newValue = newValue; } - FireIngestEventTask(PropertyChangeSupport publisher, IngestModuleEvent event, Object oldValue, Object newValue) { + FireIngestEventRunnable(PropertyChangeSupport publisher, IngestModuleEvent event, Object oldValue, Object newValue) { this.publisher = publisher; this.jobEvent = null; this.moduleEvent = event; @@ -695,9 +698,9 @@ public class IngestManager { startTime = new Date(); this.activity = activity; this.dataSourceName = dataSource.getName(); - this.fileName = ""; + this.fileName = ""; } - + // file ingest thread IngestThreadActivitySnapshot(long threadId, long jobId, String activity, Content dataSource, AbstractFile file) { this.threadId = threadId; @@ -711,7 +714,7 @@ public class IngestManager { long getJobId() { return jobId; } - + long getThreadId() { return threadId; } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java index e7c71bab23..862ba5bcf0 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java @@ -38,6 +38,7 @@ import org.sleuthkit.autopsy.modules.fileextmismatch.FileExtMismatchDetectorModu import org.sleuthkit.autopsy.modules.filetypeid.FileTypeIdModuleFactory; import org.sleuthkit.autopsy.modules.hashdatabase.HashLookupModuleFactory; import org.sleuthkit.autopsy.modules.interestingitems.InterestingItemsIngestModuleFactory; +import org.sleuthkit.autopsy.modules.photoreccarver.PhotoRecCarverIngestModuleFactory; import org.sleuthkit.autopsy.modules.sevenzip.ArchiveFileExtractorModuleFactory; import org.sleuthkit.autopsy.python.JythonModuleLoader; @@ -51,8 +52,6 @@ final class IngestModuleFactoryLoader { private static final String SAMPLE_EXECUTABLE_MODULE_FACTORY_CLASS_NAME = SampleExecutableIngestModuleFactory.class.getCanonicalName(); private static final ArrayList coreModuleOrdering = new ArrayList() { { - // RJCTODO: Find out wherer ot put the photorec carver - // The ordering of the core ingest module factories implemented // using Java is hard-coded. add("org.sleuthkit.autopsy.recentactivity.RecentActivityExtracterModuleFactory"); //NON-NLS @@ -66,6 +65,7 @@ final class IngestModuleFactoryLoader { add(E01VerifierModuleFactory.class.getCanonicalName()); add(AndroidModuleFactory.class.getCanonicalName()); add(InterestingItemsIngestModuleFactory.class.getCanonicalName()); + add(PhotoRecCarverIngestModuleFactory.class.getCanonicalName()); } }; diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java index f9fe24fb55..669797b64b 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java @@ -44,9 +44,8 @@ final class IngestPipelinesConfiguration { private static final String PIPELINE_TYPE_ATTR = "type"; //NON-NLS private static final String STAGE_ONE_DATA_SOURCE_INGEST_PIPELINE_ELEM = "ImageAnalysisStageOne"; //NON-NLS private static final String STAGE_TWO_DATA_SOURCE_INGEST_PIPELINE_ELEM = "ImageAnalysisStageTwo"; //NON-NLS - private static final String FILE_INGEST_PIPELINE_TYPE = "FileAnalysis"; //NON-NLS + private static final String FILE_INGEST_PIPELINE_ELEM = "FileAnalysis"; //NON-NLS private static final String INGEST_MODULE_ELEM = "MODULE"; //NON-NLS - private static final String XML_MODULE_CLASS_NAME_ATTR = "location"; //NON-NLS private static IngestPipelinesConfiguration instance; @@ -54,10 +53,6 @@ final class IngestPipelinesConfiguration { private final List fileIngestPipelineConfig = new ArrayList<>(); private final List stageTwoDataSourceIngestPipelineConfig = new ArrayList<>(); - // RJCTODO: Bring this code back into use, use it in IngestJob to sort things - // into the now three pipelines. Other NBMs built on top of Autopsy that - // have custom pipeline config files can do a PlatformUtil.extractResourceToUserConfigDir() - // before this is called. /** * Gets the ingest pipelines configuration singleton. * @@ -140,7 +135,6 @@ final class IngestPipelinesConfiguration { // Parse the pipeline elements to populate the pipeline // configuration lists. - // RJCTODO: SHould check that each element is unique. Or could try the XSD bit. List pipelineConfig = null; for (int pipelineNum = 0; pipelineNum < numPipelines; ++pipelineNum) { Element pipelineElement = (Element) pipelineElements.item(pipelineNum); @@ -150,7 +144,7 @@ final class IngestPipelinesConfiguration { case STAGE_ONE_DATA_SOURCE_INGEST_PIPELINE_ELEM: pipelineConfig = this.stageOneDataSourceIngestPipelineConfig; break; - case FILE_INGEST_PIPELINE_TYPE: + case FILE_INGEST_PIPELINE_ELEM: pipelineConfig = this.fileIngestPipelineConfig; break; case STAGE_TWO_DATA_SOURCE_INGEST_PIPELINE_ELEM: diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java index c9c97d8ab7..980753a04d 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestTasksScheduler.java @@ -47,39 +47,55 @@ final class IngestTasksScheduler { private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue(); private static IngestTasksScheduler instance; - // Scheduling of data source ingest tasks is accomplished by putting them - // in a FIFO queue to be consumed by the ingest threads. The pending data - // tasks queue is therefore wrapped in a "dispenser" that implements the - // IngestTaskQueue interface and is exposed via a getter method. + /** + * Scheduling of data source ingest tasks is accomplished by putting them in + * a FIFO queue to be consumed by the ingest threads, so the queue is + * wrapped in a "dispenser" that implements the IngestTaskQueue interface + * and is exposed via a getter method. + */ private final LinkedBlockingQueue pendingDataSourceTasks; private final DataSourceIngestTaskQueue dataSourceTasksDispenser; - // Scheduling of file ingest tasks is accomplished by "shuffling" them - // through a sequence of internal queues that allows for the interleaving of - // tasks from different ingest jobs based on priority. These scheduling - // queues are: - // 1. root directory tasks (priority queue) - // 2. directory tasks (FIFO queue) - // 3. pending file tasks (LIFO queue) - // Tasks in the pending file tasks queue are ready to be consumed by the - // ingest threads. The pending file tasks queue is therefore wrapped in a - // "dispenser" that implements the IngestTaskQueue interface and is exposed - // via a getter method. + /** + * Scheduling of file ingest tasks is accomplished by "shuffling" them + * through a sequence of internal queues that allows for the interleaving of + * tasks from different ingest jobs based on priority. These scheduling + * queues are: + * + * 1. Root directory tasks (priority queue) + * + * 2. Directory tasks (FIFO queue) + * + * 3. Pending file tasks (LIFO queue). + * + * The pending file tasks queue is LIFO to handle large numbers of files + * extracted from archive files. At least one image has been processed that + * had a folder full of archive files. The queue grew to have thousands of + * entries, as each successive archive file was expanded, so now extracted + * files get added to the front of the queue so that in such a scenario they + * would be processed before the expansion of the next archive file. + * + * Tasks in the pending file tasks queue are ready to be consumed by the + * ingest threads, so the queue is wrapped in a "dispenser" that implements + * the IngestTaskQueue interface and is exposed via a getter method. + */ private final TreeSet rootDirectoryTasks; private final List directoryTasks; private final BlockingDeque pendingFileTasks; private final FileIngestTaskQueue fileTasksDispenser; - // The ingest scheduler is responsible for notifying an ingest jobs whenever - // all of the ingest tasks currently associated with the job are complete. - // To make this possible, the ingest tasks scheduler needs to keep track not - // only of the tasks in its queues, but also of the tasks that have been - // handed out for processing by code running on the ingest manager's ingest - // threads. Therefore all ingest tasks are added to this list and are not - // removed when an ingest thread takes an ingest task. Instead, the ingest - // thread calls back into the scheduler when the task is completed, at - // which time the task will be removed from this list. - private final List tasksInProgressAndPending; + /** + * The ingest tasks scheduler allows ingest jobs to query it to see if there + * are any tasks in progress for the job. To make this possible, the ingest + * tasks scheduler needs to keep track not only of the tasks in its queues, + * but also of the tasks that have been handed out for processing by the + * ingest threads. Therefore all ingest tasks are added to this list when + * they are created and are not removed when an ingest thread takes an + * ingest task. Instead, the ingest thread calls back into the scheduler + * when the task is completed, at which time the task will be removed from + * this list. + */ + private final List tasksInProgress; /** * Gets the ingest tasks scheduler singleton. @@ -101,7 +117,7 @@ final class IngestTasksScheduler { this.directoryTasks = new ArrayList<>(); this.pendingFileTasks = new LinkedBlockingDeque<>(); this.fileTasksDispenser = new FileIngestTaskQueue(); - this.tasksInProgressAndPending = new ArrayList<>(); + this.tasksInProgress = new ArrayList<>(); } /** @@ -132,42 +148,33 @@ final class IngestTasksScheduler { * @throws InterruptedException if the calling thread is blocked due to a * full tasks queue and is interrupted. */ - synchronized void scheduleIngestTasks(IngestJob job) throws InterruptedException { - // The initial ingest scheduling for a job an an atomic operation. - // Otherwise, the data source task might be completed before the file - // tasks are created, resulting in a potential false positive when this - // task scheduler checks whether or not all the tasks for the job are - // completed. - if (job.hasDataSourceIngestPipeline()) { - scheduleDataSourceIngestTask(job); - } - if (job.hasFileIngestPipeline()) { - scheduleFileIngestTasks(job); - } + synchronized void scheduleIngestTasks(IngestJob job) { + // Scheduling of both a data source ingest task and file ingest tasks + // for a job must be an atomic operation. Otherwise, the data source + // task might be completed before the file tasks are scheduled, + // resulting in a potential false positive when another thread checks + // whether or not all the tasks for the job are completed. + this.scheduleDataSourceIngestTask(job); + this.scheduleFileIngestTasks(job); } /** * Schedules a data source ingest task for an ingest job. * * @param job The job for which the tasks are to be scheduled. - * @throws InterruptedException if the calling thread is blocked due to a - * full tasks queue and is interrupted. */ - synchronized void scheduleDataSourceIngestTask(IngestJob job) throws InterruptedException { - // Create a data source ingest task for the data source associated with - // the ingest job and add the task to the pending data source tasks - // queue. Data source tasks are scheduled on a first come, first served - // basis. + synchronized void scheduleDataSourceIngestTask(IngestJob job) { DataSourceIngestTask task = new DataSourceIngestTask(job); - this.tasksInProgressAndPending.add(task); + this.tasksInProgress.add(task); try { - // This call should not block because the queue is (theoretically) - // unbounded. this.pendingDataSourceTasks.put(task); } catch (InterruptedException ex) { - this.tasksInProgressAndPending.remove(task); - IngestTasksScheduler.logger.log(Level.SEVERE, "Interruption of unexpected block on pending data source tasks queue", ex); //NON-NLS - throw ex; + /** + * The current thread was interrupted while blocked on a full queue. + * Discard the task and reset the interrupted flag. + */ + this.tasksInProgress.remove(task); + Thread.currentThread().interrupt(); } } @@ -175,18 +182,15 @@ final class IngestTasksScheduler { * Schedules file ingest tasks for an ingest job. * * @param job The job for which the tasks are to be scheduled. - * @throws InterruptedException if the calling thread is blocked due to a - * full tasks queue and is interrupted. */ - synchronized void scheduleFileIngestTasks(IngestJob job) throws InterruptedException { + synchronized void scheduleFileIngestTasks(IngestJob job) { // Get the top level files for the data source associated with this job - // and add them to the root directories priority queue. The file tasks - // may be interleaved with file tasks from other jobs, based on priority. + // and add them to the root directories priority queue. List topLevelFiles = getTopLevelFiles(job.getDataSource()); for (AbstractFile firstLevelFile : topLevelFiles) { FileIngestTask task = new FileIngestTask(job, firstLevelFile); if (IngestTasksScheduler.shouldEnqueueFileTask(task)) { - this.tasksInProgressAndPending.add(task); + this.tasksInProgress.add(task); this.rootDirectoryTasks.add(task); } } @@ -197,16 +201,12 @@ final class IngestTasksScheduler { * Schedules a file ingest task for an ingest job. * * @param job The job for which the tasks are to be scheduled. - * @param file The file associated with the task. - * @throws InterruptedException if the calling thread is blocked due to a - * full tasks queue and is interrupted. + * @param file The file to be associated with the task. */ - void scheduleFileIngestTask(IngestJob job, AbstractFile file) throws InterruptedException, IllegalStateException { + synchronized void scheduleFileIngestTask(IngestJob job, AbstractFile file) { FileIngestTask task = new FileIngestTask(job, file); if (IngestTasksScheduler.shouldEnqueueFileTask(task)) { - // This synchronized method sends the file task directly to the - // pending file tasks queue. This is done to prioritize derived - // and carved files generated by a file ingest task in progress. + this.tasksInProgress.add(task); addToPendingFileTasksQueue(task); } } @@ -217,12 +217,24 @@ final class IngestTasksScheduler { * * @param task The completed task. */ - synchronized void notifyTaskCompleted(IngestTask task) throws InterruptedException { - tasksInProgressAndPending.remove(task); - IngestJob job = task.getIngestJob(); - if (this.tasksForJobAreCompleted(job)) { - job.notifyTasksCompleted(); + synchronized void notifyTaskCompleted(IngestTask task) { + tasksInProgress.remove(task); + } + + /** + * Queries the task scheduler to determine whether or not all current ingest + * tasks for an ingest job are completed. + * + * @param job The job for which the query is to be performed. + * @return True or false. + */ + synchronized boolean tasksForJobAreCompleted(IngestJob job) { + for (IngestTask task : tasksInProgress) { + if (task.getIngestJob().getId() == job.getId()) { + return false; + } } + return true; } /** @@ -234,25 +246,17 @@ final class IngestTasksScheduler { * @param job The job for which the tasks are to to canceled. */ synchronized void cancelPendingTasksForIngestJob(IngestJob job) { - // The scheduling queues are cleared of tasks for the job, and the tasks - // that are removed from the scheduling queues are also removed from the - // tasks in progress list. However, a tasks in progress check for the - // job may still return true since the tasks that have been taken by the - // ingest threads are still in the tasks in progress list. long jobId = job.getId(); this.removeTasksForJob(this.rootDirectoryTasks, jobId); this.removeTasksForJob(this.directoryTasks, jobId); this.removeTasksForJob(this.pendingFileTasks, jobId); this.removeTasksForJob(this.pendingDataSourceTasks, jobId); - if (this.tasksForJobAreCompleted(job)) { - job.notifyTasksCompleted(); - } } /** - * A helper that gets the top level files such as file system root - * directories, layout files and virtual directories for a data source. Used - * to create file tasks to put into the root directories queue. + * Gets the top level files such as file system root directories, layout + * files and virtual directories for a data source. Used to create file + * tasks to put into the root directories queue. * * @param dataSource The data source. * @return A list of top level files. @@ -290,14 +294,11 @@ final class IngestTasksScheduler { } /** - * A helper that "shuffles" the file task queues to ensure that there is at - * least one task in the pending file ingest tasks queue, as long as there - * are still file ingest tasks to be performed. - * - * @throws InterruptedException if the calling thread is blocked due to a - * full tasks queue and is interrupted. + * "Shuffles" the file task queues to ensure that there is at least one task + * in the pending file ingest tasks queue, as long as there are still file + * ingest tasks to be performed. */ - synchronized private void shuffleFileTaskQueues() throws InterruptedException, IllegalStateException { + synchronized private void shuffleFileTaskQueues() { // This is synchronized because it is called both by synchronized // methods of this ingest scheduler and an unsynchronized method of its // file tasks "dispenser". @@ -323,16 +324,13 @@ final class IngestTasksScheduler { } // Try to add the most recently added directory from the - // directory tasks queue to the pending file tasks queue. Note - // the removal of the task from the tasks in progress list. If - // the task is enqueued, it will be put back in the list by - // the addToPendingFileTasksQueue() helper. - boolean tasksEnqueuedForDirectory = false; + // directory tasks queue to the pending file tasks queue. FileIngestTask directoryTask = this.directoryTasks.remove(this.directoryTasks.size() - 1); - this.tasksInProgressAndPending.remove(directoryTask); + this.tasksInProgress.remove(directoryTask); if (shouldEnqueueFileTask(directoryTask)) { addToPendingFileTasksQueue(directoryTask); - tasksEnqueuedForDirectory = true; + } else { + this.tasksInProgress.remove(directoryTask); } // If the directory contains subdirectories or files, try to @@ -349,16 +347,15 @@ final class IngestTasksScheduler { // addition of the task to the tasks in progress // list. This is necessary because this is the // first appearance of this task in the queues. - this.tasksInProgressAndPending.add(childTask); + this.tasksInProgress.add(childTask); this.directoryTasks.add(childTask); - tasksEnqueuedForDirectory = true; } else if (shouldEnqueueFileTask(childTask)) { // Found a file, put the task directly into the // pending file tasks queue. The new task will // be put into the tasks in progress list by the // addToPendingFileTasksQueue() helper. + this.tasksInProgress.add(childTask); addToPendingFileTasksQueue(childTask); - tasksEnqueuedForDirectory = true; } } } @@ -366,24 +363,13 @@ final class IngestTasksScheduler { String errorMessage = String.format("An error occurred getting the children of %s", directory.getName()); //NON-NLS logger.log(Level.SEVERE, errorMessage, ex); } - - // In the case where the directory task is not pushed into the - // the pending file tasks queue and has no children, check to - // see if the job is completed - the directory task might have - // been the last task for the job. - if (!tasksEnqueuedForDirectory) { - IngestJob job = directoryTask.getIngestJob(); - if (this.tasksForJobAreCompleted(job)) { - job.notifyTasksCompleted(); - } - } } } /** - * A helper method that examines the file associated with a file ingest task - * to determine whether or not the file should be processed and therefore - * the task should be enqueued. + * Examines the file associated with a file ingest task to determine whether + * or not the file should be processed and therefore whether or not the task + * should be enqueued. * * @param task The task to be scrutinized. * @return True or false. @@ -407,9 +393,6 @@ final class IngestTasksScheduler { // Skip the task if the file is one of a select group of special, large // NTFS or FAT file system files. - // the file is in the root directory, has a file name - // starting with $, containing : (not default attributes) - //with meta address < 32, i.e. some special large NTFS and FAT files if (file instanceof org.sleuthkit.datamodel.File) { final org.sleuthkit.datamodel.File f = (org.sleuthkit.datamodel.File) file; @@ -452,50 +435,28 @@ final class IngestTasksScheduler { return true; } - // RJCTODO: Is this still necessary? There is code elsewhere to remove and - // re-add the task to the tasks in progress list. /** - * A helper method to safely add a file ingest task to the blocking pending - * tasks queue. + * Adds a file ingest task to the blocking pending tasks queue. * - * @param task - * @throws IllegalStateException + * @param task The task to add. */ - synchronized private void addToPendingFileTasksQueue(FileIngestTask task) throws IllegalStateException { - tasksInProgressAndPending.add(task); + synchronized private void addToPendingFileTasksQueue(FileIngestTask task) { try { - // The file is added to the front of the pending files queue because - // at least one image has been processed that had a folder full of - // archive files. The queue grew to have thousands of entries, so - // this (might) help with pushing those files through ingest. - this.pendingFileTasks.addFirst(task); - } catch (IllegalStateException ex) { - tasksInProgressAndPending.remove(task); - Logger.getLogger(IngestTasksScheduler.class.getName()).log(Level.SEVERE, "Pending file tasks queue is full", ex); //NON-NLS - throw ex; + this.pendingFileTasks.putFirst(task); + } catch (InterruptedException ex) { + /** + * The current thread was interrupted while blocked on a full queue. + * Discard the task and reset the interrupted flag. + */ + this.tasksInProgress.remove(task); + Thread.currentThread().interrupt(); } } /** - * Determines whether or not all current ingest tasks for an ingest job are - * completed. - * - * @param job The job for which the query is to be performed. - * @return True or false. - */ - private boolean tasksForJobAreCompleted(IngestJob job) { - for (IngestTask task : tasksInProgressAndPending) { - if (task.getIngestJob().getId() == job.getId()) { - return false; - } - } - return true; - } - - /** - * A helper that removes all of the ingest tasks associated with an ingest - * job from a tasks queue. The task is removed from the the tasks in - * progress list as well. + * Removes all of the ingest tasks associated with an ingest job from a + * tasks queue. The task is removed from the the tasks in progress list as + * well. * * @param taskQueue The queue from which to remove the tasks. * @param jobId The id of the job for which the tasks are to be removed. @@ -505,15 +466,14 @@ final class IngestTasksScheduler { while (iterator.hasNext()) { IngestTask task = iterator.next(); if (task.getIngestJob().getId() == jobId) { - this.tasksInProgressAndPending.remove(task); + this.tasksInProgress.remove(task); iterator.remove(); } } } /** - * A helper that counts the number of ingest tasks in a task queue for a - * given job. + * Counts the number of ingest tasks in a task queue for a given job. * * @param queue The queue for which to count tasks. * @param jobId The id of the job for which the tasks are to be counted. @@ -532,10 +492,11 @@ final class IngestTasksScheduler { } /** - * RJCTODO - * - * @param jobId - * @return + * Returns a snapshot of the states of the tasks in progress for an ingest + * job. + * + * @param jobId The identifier assigned to the job. + * @return */ synchronized IngestJobTasksSnapshot getTasksSnapshotForJob(long jobId) { return new IngestJobTasksSnapshot(jobId); @@ -684,9 +645,10 @@ final class IngestTasksScheduler { } /** - * A snapshot of ingest tasks data for an ingest job. + * A snapshot of ingest tasks data for an ingest job. */ class IngestJobTasksSnapshot { + private final long jobId; private final long rootQueueSize; private final long dirQueueSize; @@ -695,8 +657,9 @@ final class IngestTasksScheduler { private final long runningListSize; /** - * RJCTODO - * @param jobId + * Constructs a snapshot of ingest tasks data for an ingest job. + * + * @param jobId The identifier associated with the job. */ IngestJobTasksSnapshot(long jobId) { this.jobId = jobId; @@ -704,56 +667,51 @@ final class IngestTasksScheduler { this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryTasks, jobId); this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingFileTasks, jobId); this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingDataSourceTasks, jobId); - this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgressAndPending, jobId) - fileQueueSize - dsQueueSize; + this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgress, jobId) - fileQueueSize - dsQueueSize; } - + /** - * RJCTODO - * @return + * Gets the identifier associated with the ingest job for which this + * snapshot was created. + * + * @return The ingest job identifier. */ long getJobId() { return jobId; } /** - * RJCTODO - * @return + * Gets the number of file ingest tasks associated with the job that are + * in the root directories queue. + * + * @return The tasks count. */ long getRootQueueSize() { return rootQueueSize; } /** - * RJCTODO - * @return + * Gets the number of file ingest tasks associated with the job that are + * in the root directories queue. + * + * @return The tasks count. */ - long getDirQueueSize() { + long getDirectoryTasksQueueSize() { return dirQueueSize; } - /** - * RJCTODO - * @return - */ long getFileQueueSize() { return fileQueueSize; } - /** - * RJCTODO - * @return - */ long getDsQueueSize() { return dsQueueSize; } - /** - * RJCTODO - * @return - */ long getRunningListSize() { return runningListSize; - } + } + } - + } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml b/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml index 36de99011f..69f9f362c3 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml +++ b/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml @@ -16,6 +16,7 @@ Contains only the core ingest modules that ship with Autopsy --> org.sleuthkit.autopsy.thunderbirdparser.EmailParserModuleFactory org.sleuthkit.autopsy.modules.fileextmismatch.FileExtMismatchDetectorModuleFactory org.sleuthkit.autopsy.modules.interestingitems.InterestingItemsIngestModuleFactory + org.sleuthkit.autopsy.modules.photoreccarver.PhotoRecCarverIngestModuleFactory diff --git a/Core/src/org/sleuthkit/autopsy/modules/photoreccarver/PhotoRecCarverIngestModuleFactory.java b/Core/src/org/sleuthkit/autopsy/modules/photoreccarver/PhotoRecCarverIngestModuleFactory.java index 915ca1433f..325c0e7613 100755 --- a/Core/src/org/sleuthkit/autopsy/modules/photoreccarver/PhotoRecCarverIngestModuleFactory.java +++ b/Core/src/org/sleuthkit/autopsy/modules/photoreccarver/PhotoRecCarverIngestModuleFactory.java @@ -25,7 +25,6 @@ import org.sleuthkit.autopsy.ingest.FileIngestModule; import org.sleuthkit.autopsy.ingest.IngestModuleFactory; import org.sleuthkit.autopsy.ingest.IngestModuleFactoryAdapter; import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings; -import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettingsPanel; /** * A factory for creating instances of file ingest modules that carve unallocated space From 882343a60f6f99a729e5f1a207a31c50f7323d79 Mon Sep 17 00:00:00 2001 From: Eamonn Saunders Date: Wed, 5 Nov 2014 16:02:47 -0500 Subject: [PATCH 07/10] - Added Win32Process which uses JNA to give us access to Windows APIs that give us more control over processes. - Added killProcess() method to ExecUtil. This method can be used to terminate a process and its children. --- .../sleuthkit/autopsy/coreutils/ExecUtil.java | 33 ++++- .../autopsy/coreutils/Win32Process.java | 115 ++++++++++++++++++ 2 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java diff --git a/Core/src/org/sleuthkit/autopsy/coreutils/ExecUtil.java b/Core/src/org/sleuthkit/autopsy/coreutils/ExecUtil.java index 536fa786a0..8005609470 100644 --- a/Core/src/org/sleuthkit/autopsy/coreutils/ExecUtil.java +++ b/Core/src/org/sleuthkit/autopsy/coreutils/ExecUtil.java @@ -18,11 +18,13 @@ */ package org.sleuthkit.autopsy.coreutils; +import com.sun.javafx.PlatformUtil; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Writer; +import java.util.List; import java.util.concurrent.TimeUnit; import java.util.logging.Level; @@ -108,12 +110,12 @@ public final class ExecUtil { do { process.waitFor(timeOut, units); if (process.isAlive() && terminator.shouldTerminateProcess()) { - process.destroyForcibly(); + killProcess(process); } } while (process.isAlive()); } catch (InterruptedException ex) { if (process.isAlive()) { - process.destroyForcibly(); + killProcess(process); } Logger.getLogger(ExecUtil.class.getName()).log(Level.INFO, "Thread interrupted while running {0}", processBuilder.command().get(0)); Thread.currentThread().interrupt(); @@ -121,6 +123,33 @@ public final class ExecUtil { return process.exitValue(); } + /** + * Kill a process and its children + * @param process The parent process to kill + */ + public static void killProcess(Process process) { + if (process == null) + return; + + try { + if (PlatformUtil.isWindows()) { + Win32Process parentProcess = new Win32Process(process); + List children = parentProcess.getChildren(); + + children.stream().forEach((child) -> { + child.terminate(); + }); + parentProcess.terminate(); + } + else { + process.destroyForcibly(); + } + } + catch (Exception ex) { + logger.log(Level.WARNING, "Error occurred when attempting to kill process: {0}", ex.getMessage()); // NON-NLS + } + } + private static final Logger logger = Logger.getLogger(ExecUtil.class.getName()); private Process proc = null; private ExecUtil.StreamToStringRedirect errorStringRedirect = null; diff --git a/Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java b/Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java new file mode 100644 index 0000000000..84cc47be09 --- /dev/null +++ b/Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java @@ -0,0 +1,115 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2012-2014 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.coreutils; + +import com.sun.jna.Pointer; +import com.sun.jna.platform.win32.Kernel32; +import com.sun.jna.platform.win32.Kernel32Util; +import com.sun.jna.platform.win32.Tlhelp32; +import com.sun.jna.platform.win32.WinDef.DWORD; +import com.sun.jna.platform.win32.WinNT; +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.List; + +/** + * Class that represents a Windows process. + * It uses JNA to access the Win32 API. + * This code is based on http://stackoverflow.com/questions/10124299/how-do-i-terminate-a-process-tree-from-java + */ +public class Win32Process { + WinNT.HANDLE handle; + int pid; + + /** + * Create a Win32Process object for the given Process object. + * Reflection is used to construct a Windows process handle. + * @param process A Java Process object + * @throws IOException + */ + Win32Process (Process process) throws Exception + { + if (process.getClass().getName().equals("java.lang.Win32Process") || // NON-NLS + process.getClass().getName().equals("java.lang.ProcessImpl")) { // NON-NLS + try { + Field f = process.getClass().getDeclaredField("handle"); // NON-NLS + f.setAccessible(true); + long handleVal = f.getLong(process); + handle = new WinNT.HANDLE(Pointer.createConstant(handleVal)); + } + catch (NoSuchFieldException | SecurityException | IllegalArgumentException | IllegalAccessException ex) { + throw new Exception(ex.getMessage()); // NON-NLS + } + } + this.pid = Kernel32.INSTANCE.GetProcessId(handle); + } + + /** + * Create a Win32Process object for the given process id. + * @param pid Process Id + * @throws IOException + */ + Win32Process (int pid) throws Exception + { + handle = Kernel32.INSTANCE.OpenProcess ( + 0x0400| /* PROCESS_QUERY_INFORMATION */ + 0x0800| /* PROCESS_SUSPEND_RESUME */ + 0x0001| /* PROCESS_TERMINATE */ + 0x00100000 /* SYNCHRONIZE */, + false, + pid); + if (handle == null) + throw new Exception (Kernel32Util.formatMessageFromLastErrorCode (Kernel32.INSTANCE.GetLastError ())); + this.pid = Kernel32.INSTANCE.GetProcessId(handle); + } + + @Override + protected void finalize () throws Throwable + { + Kernel32.INSTANCE.CloseHandle (handle); + super.finalize(); + } + + /** + * Kill the process. Note that this does not kill children. + */ + public void terminate () + { + Kernel32.INSTANCE.TerminateProcess (handle, 0); + } + + /** + * Get children of current process object. + * @return list of child processes + * @throws IOException + */ + public List getChildren () throws Exception + { + ArrayList result = new ArrayList<> (); + WinNT.HANDLE hSnap = Kernel32.INSTANCE.CreateToolhelp32Snapshot(Tlhelp32.TH32CS_SNAPPROCESS, new DWORD(0)); + Tlhelp32.PROCESSENTRY32.ByReference ent = new Tlhelp32.PROCESSENTRY32.ByReference (); + if (!Kernel32.INSTANCE.Process32First (hSnap, ent)) return result; + do { + if (ent.th32ParentProcessID.intValue () == pid) result.add (new Win32Process (ent.th32ProcessID.intValue ())); + } while (Kernel32.INSTANCE.Process32Next (hSnap, ent)); + Kernel32.INSTANCE.CloseHandle (hSnap); + return result; + } +} From edc46f96b535a03fcce76541a64b0e0e9c14ec4f Mon Sep 17 00:00:00 2001 From: Eamonn Saunders Date: Wed, 5 Nov 2014 16:13:16 -0500 Subject: [PATCH 08/10] - Fix @throws documentation. --- Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java b/Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java index 84cc47be09..56617529ef 100644 --- a/Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java +++ b/Core/src/org/sleuthkit/autopsy/coreutils/Win32Process.java @@ -42,7 +42,7 @@ public class Win32Process { * Create a Win32Process object for the given Process object. * Reflection is used to construct a Windows process handle. * @param process A Java Process object - * @throws IOException + * @throws Exception */ Win32Process (Process process) throws Exception { @@ -64,7 +64,7 @@ public class Win32Process { /** * Create a Win32Process object for the given process id. * @param pid Process Id - * @throws IOException + * @throws Exception */ Win32Process (int pid) throws Exception { From 6b7380f2d0899b46ca2a4f4d3aa9d1083fda01c0 Mon Sep 17 00:00:00 2001 From: esaunders Date: Wed, 5 Nov 2014 16:55:29 -0500 Subject: [PATCH 09/10] Replace NUL bytes in RegRipper output with spaces. If we don't do this the XML parser will fail. --- .../org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java | 1 + 1 file changed, 1 insertion(+) diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java index 4256b352d9..1448740ff8 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java @@ -393,6 +393,7 @@ class ExtractRegistry extends Extract { result = result.replaceAll("\\r", ""); //NON-NLS result = result.replaceAll("'", "'"); //NON-NLS result = result.replaceAll("&", "&"); //NON-NLS + result = result.replace('\0', ' '); // NON-NLS String enddoc = ""; //NON-NLS String stringdoc = startdoc + result + enddoc; DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); From f9305cea53a50ea343836f9969744a4e079f0eac Mon Sep 17 00:00:00 2001 From: Brian Carrier Date: Thu, 6 Nov 2014 07:54:42 -0500 Subject: [PATCH 10/10] added reference to contentUtils from #937 --- docs/doxygen/platformConcepts.dox | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/doxygen/platformConcepts.dox b/docs/doxygen/platformConcepts.dox index 87f68813b5..8b943e6a01 100644 --- a/docs/doxygen/platformConcepts.dox +++ b/docs/doxygen/platformConcepts.dox @@ -55,6 +55,7 @@ The followig are basic services that are available to any module: - Pop-up Windows: If you have a background task that needs the provide the user with feedback, you can use the org.sleuthkit.autopsy.coreutils.MessageNotifyUtil.Notify.show() method to make a message in the lower right hand area. - Module Settings: If you want to persist settings between invocations of Autopsy, you can use org.sleuthkit.autopsy.coreutils.ModuleSettings. +- Content Utilities: The org.sleuthkit.autopsy.datamodel.ContentUtils class has utility methods to write files from Autopsy to local disk. Specifically the org.sleuthkit.autopsy.datamodel.ContentUtils.writeToFile() method. - Platform Utilities: The org.sleuthkit.autopsy.coreutils.PlatformUtil class allows you to save resources into the user folder and determine paths for the user folders. Specifically: - PlatformUtil.extractResourceToUserConfigDir() - PlatformUtil.isWindowsOS()