diff --git a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java index f39ec0494b..b603166981 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/DataSourceIngestPipeline.java @@ -43,7 +43,7 @@ final class DataSourceIngestPipeline { DataSourceIngestModuleDecorator module = new DataSourceIngestModuleDecorator(template.createDataSourceIngestModule(), template.getModuleName()); modules.add(module); } - } + } } boolean isEmpty() { diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java index c1fe37824f..c050fd0a1c 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestJob.java @@ -20,7 +20,9 @@ package org.sleuthkit.autopsy.ingest; import java.util.ArrayList; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicLong; @@ -114,7 +116,7 @@ final class IngestJob { long jobId = nextIngestJobId.incrementAndGet(); IngestJob job = new IngestJob(jobId, dataSource, processUnallocatedSpace); errors = job.start(ingestModuleTemplates); - if (errors.isEmpty() && (job.hasDataSourceIngestPipeline() || job.hasFileIngestPipeline())) { + if (errors.isEmpty() && (job.hasDataSourceIngestPipeline() || job.hasFileIngestPipeline())) { // RJCTODO: What about 2nd stage only? ingestJobsById.put(jobId, job); IngestManager.getInstance().fireIngestJobStarted(jobId); IngestJob.ingestScheduler.scheduleIngestTasks(job); @@ -135,7 +137,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ static List getJobSnapshots() { List snapShots = new ArrayList<>(); @@ -144,7 +147,7 @@ final class IngestJob { } return snapShots; } - + /** * RJCTODO */ @@ -555,20 +558,38 @@ final class IngestJob { * @throws InterruptedException */ private void createIngestPipelines(List ingestModuleTemplates) throws InterruptedException { - // RJCTODO: Use config file - // Sort the ingest module templates as required for the pipelines. - List firstStageDataSourceModuleTemplates = new ArrayList<>(); - List secondStageDataSourceModuleTemplates = new ArrayList<>(); - List fileIngestModuleTemplates = new ArrayList<>(); + // RJCTODO: Improve variable names! + + // Make mappings of ingest module factory class names to templates. + Map dataSourceModuleTemplates = new HashMap<>(); + Map fileModuleTemplates = new HashMap<>(); for (IngestModuleTemplate template : ingestModuleTemplates) { if (template.isDataSourceIngestModuleTemplate()) { - firstStageDataSourceModuleTemplates.add(template); - } else { - firstStageDataSourceModuleTemplates.add(template); + dataSourceModuleTemplates.put(template.getModuleFactory().getClass().getCanonicalName(), template); + } + if (template.isFileIngestModuleTemplate()) { + fileModuleTemplates.put(template.getModuleFactory().getClass().getCanonicalName(), template); } } - // Contruct the pipelines. + // Use the mappings and the ingest pipelines configuration to create + // ordered lists of ingest module templates for each ingest pipeline. + IngestPipelinesConfiguration pipelineConfigs = IngestPipelinesConfiguration.getInstance(); + List firstStageDataSourceModuleTemplates = this.getConfiguredIngestModuleTemplates(dataSourceModuleTemplates, pipelineConfigs.getStageOneDataSourceIngestPipelineConfig()); + List fileIngestModuleTemplates = this.getConfiguredIngestModuleTemplates(fileModuleTemplates, pipelineConfigs.getFileIngestPipelineConfig()); + List secondStageDataSourceModuleTemplates = this.getConfiguredIngestModuleTemplates(dataSourceModuleTemplates, pipelineConfigs.getStageTwoDataSourceIngestPipelineConfig()); + + // Add any module templates that were not specified in the pipeline + // configurations to an appropriate pipeline - either the first stage + // data source ingest pipeline or the file ingest pipeline. + for (IngestModuleTemplate template : dataSourceModuleTemplates.values()) { + firstStageDataSourceModuleTemplates.add(template); + } + for (IngestModuleTemplate template : fileModuleTemplates.values()) { + fileIngestModuleTemplates.add(template); + } + + // Contruct the data source ingest pipelines. this.firstStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, firstStageDataSourceModuleTemplates); this.secondStageDataSourceIngestPipeline = new DataSourceIngestPipeline(this, secondStageDataSourceModuleTemplates); this.dataSourceIngestPipeline = firstStageDataSourceIngestPipeline; @@ -580,6 +601,28 @@ final class IngestJob { } } + /** + * Use an ordered list of ingest module factory class names to create an + * ordered subset of a collection ingest module templates. The ingest module + * templates are removed from the input collection as they are added to the + * output collection. + * + * @param ingestModuleTemplates A mapping of ingest module factory class + * names to ingest module templates. + * @param pipelineConfig An ordered list of ingest module factory class + * names representing an ingest pipeline. + * @return + */ + List getConfiguredIngestModuleTemplates(Map ingestModuleTemplates, List pipelineConfig) { + List templates = new ArrayList<>(); + for (String moduleClassName : pipelineConfig) { + if (ingestModuleTemplates.containsKey(moduleClassName)) { + templates.add(ingestModuleTemplates.remove(moduleClassName)); + } + } + return templates; + } + /** * Starts up each of the file and data source ingest modules to collect * possible errors. @@ -748,7 +791,7 @@ final class IngestJob { this.dataSourceIngestProgress = null; } } - + IngestJob.ingestJobsById.remove(this.id); if (!this.isCancelled()) { logger.log(Level.INFO, "Ingest job {0} completed", this.id); @@ -784,7 +827,7 @@ final class IngestJob { class IngestJobSnapshot { private final long jobId; - private final String dataSource; + private final String dataSource; private final long startTime; private final long processedFiles; private final long estimatedFilesToProcess; @@ -809,7 +852,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getJobId() { return this.jobId; @@ -817,12 +861,13 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ String getDataSource() { return dataSource; - } - + } + /** * Gets files per second throughput since job started. * @@ -870,10 +915,11 @@ final class IngestJob { long getFilesEstimated() { return estimatedFilesToProcess; } - + /** * RJCTODO - * @return + * + * @return */ long getRootQueueSize() { return this.tasksSnapshot.getRootQueueSize(); @@ -881,7 +927,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getDirQueueSize() { return this.tasksSnapshot.getDirQueueSize(); @@ -889,7 +936,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getFileQueueSize() { return this.tasksSnapshot.getFileQueueSize(); @@ -897,7 +945,8 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getDsQueueSize() { return this.tasksSnapshot.getDsQueueSize(); @@ -905,12 +954,13 @@ final class IngestJob { /** * RJCTODO - * @return + * + * @return */ long getRunningListSize() { return this.tasksSnapshot.getRunningListSize(); - } - + } + } } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java index e7ce367a64..e7c71bab23 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestModuleFactoryLoader.java @@ -51,6 +51,8 @@ final class IngestModuleFactoryLoader { private static final String SAMPLE_EXECUTABLE_MODULE_FACTORY_CLASS_NAME = SampleExecutableIngestModuleFactory.class.getCanonicalName(); private static final ArrayList coreModuleOrdering = new ArrayList() { { + // RJCTODO: Find out wherer ot put the photorec carver + // The ordering of the core ingest module factories implemented // using Java is hard-coded. add("org.sleuthkit.autopsy.recentactivity.RecentActivityExtracterModuleFactory"); //NON-NLS diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java index d829a7e300..f9fe24fb55 100755 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestPipelinesConfiguration.java @@ -18,13 +18,13 @@ */ package org.sleuthkit.autopsy.ingest; -import java.io.File; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.logging.Level; import org.sleuthkit.autopsy.coreutils.Logger; -import org.sleuthkit.autopsy.coreutils.ModuleSettings; import org.sleuthkit.autopsy.coreutils.PlatformUtil; import org.sleuthkit.autopsy.coreutils.XMLUtil; import org.w3c.dom.Document; @@ -33,32 +33,36 @@ import org.w3c.dom.NodeList; /** * Provides data source and file ingest pipeline configurations as ordered lists - * of ingest module class names. The order of the module class names indicates - * the desired sequence of ingest module instances in an ingest modules - * pipeline. + * of ingest module factory class names. */ final class IngestPipelinesConfiguration { private static final Logger logger = Logger.getLogger(IngestPipelinesConfiguration.class.getName()); - private static final String PIPELINE_CONFIG_FILE_VERSION_KEY = "PipelineConfigFileVersion"; //NON-NLS - private static final String PIPELINE_CONFIG_FILE_VERSION_NO_STRING = "1"; - private static final int PIPELINE_CONFIG_FILE_VERSION_NO = 1; - private static final String PIPELINES_CONFIG_FILE = "pipeline_config.xml"; //NON-NLS - private static final String PIPELINES_CONFIG_FILE_XSD = "PipelineConfigSchema.xsd"; //NON-NLS - private static final String XML_PIPELINE_ELEM = "PIPELINE"; //NON-NLS - private static final String XML_PIPELINE_TYPE_ATTR = "type"; //NON-NLS - private static final String DATA_SOURCE_INGEST_PIPELINE_TYPE = "ImageAnalysis"; //NON-NLS + private static final String PIPELINES_CONFIG_FILE = "PipelineConfig.xml"; //NON-NLS + private static final String PIPELINE_ELEM = "PIPELINE"; //NON-NLS + private static final int NUMBER_OF_PIPELINE_DEFINITIONS = 3; + private static final String PIPELINE_TYPE_ATTR = "type"; //NON-NLS + private static final String STAGE_ONE_DATA_SOURCE_INGEST_PIPELINE_ELEM = "ImageAnalysisStageOne"; //NON-NLS + private static final String STAGE_TWO_DATA_SOURCE_INGEST_PIPELINE_ELEM = "ImageAnalysisStageTwo"; //NON-NLS private static final String FILE_INGEST_PIPELINE_TYPE = "FileAnalysis"; //NON-NLS - private static final String XML_MODULE_ELEM = "MODULE"; //NON-NLS + private static final String INGEST_MODULE_ELEM = "MODULE"; //NON-NLS private static final String XML_MODULE_CLASS_NAME_ATTR = "location"; //NON-NLS + private static IngestPipelinesConfiguration instance; - private final List dataSourceIngestPipelineConfig = new ArrayList<>(); + + private final List stageOneDataSourceIngestPipelineConfig = new ArrayList<>(); private final List fileIngestPipelineConfig = new ArrayList<>(); + private final List stageTwoDataSourceIngestPipelineConfig = new ArrayList<>(); - private IngestPipelinesConfiguration() { - readPipelinesConfigurationFile(); - } - + // RJCTODO: Bring this code back into use, use it in IngestJob to sort things + // into the now three pipelines. Other NBMs built on top of Autopsy that + // have custom pipeline config files can do a PlatformUtil.extractResourceToUserConfigDir() + // before this is called. + /** + * Gets the ingest pipelines configuration singleton. + * + * @return The singleton. + */ synchronized static IngestPipelinesConfiguration getInstance() { if (instance == null) { Logger.getLogger(IngestPipelinesConfiguration.class.getName()).log(Level.INFO, "Creating ingest module loader instance"); //NON-NLS @@ -67,57 +71,90 @@ final class IngestPipelinesConfiguration { return instance; } - List getDataSourceIngestPipelineConfig() { - return new ArrayList<>(dataSourceIngestPipelineConfig); + /** + * Constructs an object that provides data source and file ingest pipeline + * configurations as ordered lists of ingest module factory class names. + */ + private IngestPipelinesConfiguration() { + this.readPipelinesConfigurationFile(); } + /** + * Gets the ordered list of ingest module factory class names for the + * file ingest pipeline. + * + * @return An ordered list of ingest module factory class names. + */ + List getStageOneDataSourceIngestPipelineConfig() { + return new ArrayList<>(stageOneDataSourceIngestPipelineConfig); + } + + /** + * Gets the ordered list of ingest module factory class names for the + * first stage data source ingest pipeline. + * + * @return An ordered list of ingest module factory class names. + */ List getFileIngestPipelineConfig() { return new ArrayList<>(fileIngestPipelineConfig); } + /** + * Gets the ordered list of ingest module factory class names for the + * second stage data source ingest pipeline. + * + * @return An ordered list of ingest module factory class names. + */ + List getStageTwoDataSourceIngestPipelineConfig() { + return new ArrayList<>(stageTwoDataSourceIngestPipelineConfig); + } + + /** + * Attempts to read the ingest pipeline configuration data from an XML file. + */ private void readPipelinesConfigurationFile() { try { - boolean overWrite; - if (!ModuleSettings.settingExists(this.getClass().getSimpleName(), PIPELINE_CONFIG_FILE_VERSION_KEY)) { - ModuleSettings.setConfigSetting(this.getClass().getSimpleName(), PIPELINE_CONFIG_FILE_VERSION_KEY, PIPELINE_CONFIG_FILE_VERSION_NO_STRING); - overWrite = true; - } else { - int versionNumber = Integer.parseInt(ModuleSettings.getConfigSetting(this.getClass().getSimpleName(), PIPELINE_CONFIG_FILE_VERSION_KEY)); - overWrite = versionNumber < PIPELINE_CONFIG_FILE_VERSION_NO; - // TODO: Migrate user edits - } - PlatformUtil.extractResourceToUserConfigDir(IngestPipelinesConfiguration.class, PIPELINES_CONFIG_FILE, overWrite); + PlatformUtil.extractResourceToUserConfigDir(IngestPipelinesConfiguration.class, PIPELINES_CONFIG_FILE, false); - String configFilePath = PlatformUtil.getUserConfigDirectory() + File.separator + PIPELINES_CONFIG_FILE; - Document doc = XMLUtil.loadDoc(IngestPipelinesConfiguration.class, configFilePath); + Path configFilePath = Paths.get(PlatformUtil.getUserConfigDirectory(), PIPELINES_CONFIG_FILE); + Document doc = XMLUtil.loadDoc(IngestPipelinesConfiguration.class, configFilePath.toAbsolutePath().toString()); if (doc == null) { return; } + // Get the document root element. Element rootElement = doc.getDocumentElement(); - if (rootElement == null) { + if (null == rootElement) { logger.log(Level.SEVERE, "Invalid pipelines config file"); //NON-NLS return; } - NodeList pipelineElements = rootElement.getElementsByTagName(XML_PIPELINE_ELEM); + // Get the pipeline elements and confirm that the correct number is + // present. + NodeList pipelineElements = rootElement.getElementsByTagName(IngestPipelinesConfiguration.PIPELINE_ELEM); int numPipelines = pipelineElements.getLength(); - if (numPipelines < 1 || numPipelines > 2) { + if (numPipelines != IngestPipelinesConfiguration.NUMBER_OF_PIPELINE_DEFINITIONS) { logger.log(Level.SEVERE, "Invalid pipelines config file"); //NON-NLS return; } + // Parse the pipeline elements to populate the pipeline + // configuration lists. + // RJCTODO: SHould check that each element is unique. Or could try the XSD bit. List pipelineConfig = null; for (int pipelineNum = 0; pipelineNum < numPipelines; ++pipelineNum) { Element pipelineElement = (Element) pipelineElements.item(pipelineNum); - String pipelineTypeAttr = pipelineElement.getAttribute(XML_PIPELINE_TYPE_ATTR); - if (pipelineTypeAttr != null) { + String pipelineTypeAttr = pipelineElement.getAttribute(PIPELINE_TYPE_ATTR); + if (null != pipelineTypeAttr) { switch (pipelineTypeAttr) { - case DATA_SOURCE_INGEST_PIPELINE_TYPE: - pipelineConfig = dataSourceIngestPipelineConfig; + case STAGE_ONE_DATA_SOURCE_INGEST_PIPELINE_ELEM: + pipelineConfig = this.stageOneDataSourceIngestPipelineConfig; break; case FILE_INGEST_PIPELINE_TYPE: - pipelineConfig = fileIngestPipelineConfig; + pipelineConfig = this.fileIngestPipelineConfig; + break; + case STAGE_TWO_DATA_SOURCE_INGEST_PIPELINE_ELEM: + pipelineConfig = this.stageTwoDataSourceIngestPipelineConfig; break; default: logger.log(Level.SEVERE, "Invalid pipelines config file"); //NON-NLS @@ -128,16 +165,13 @@ final class IngestPipelinesConfiguration { // Create an ordered list of class names. The sequence of class // names defines the sequence of modules in the pipeline. if (pipelineConfig != null) { - NodeList modulesElems = pipelineElement.getElementsByTagName(XML_MODULE_ELEM); + NodeList modulesElems = pipelineElement.getElementsByTagName(INGEST_MODULE_ELEM); int numModules = modulesElems.getLength(); - if (numModules == 0) { - break; - } for (int moduleNum = 0; moduleNum < numModules; ++moduleNum) { Element moduleElement = (Element) modulesElems.item(moduleNum); - final String moduleClassName = moduleElement.getAttribute(XML_MODULE_CLASS_NAME_ATTR); - if (moduleClassName != null) { - pipelineConfig.add(moduleClassName); + String className = moduleElement.getTextContent(); + if (null != className && !className.isEmpty()) { + pipelineConfig.add(className); } } } diff --git a/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml b/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml new file mode 100644 index 0000000000..36de99011f --- /dev/null +++ b/Core/src/org/sleuthkit/autopsy/ingest/PipelineConfig.xml @@ -0,0 +1,25 @@ + + + + + org.sleuthkit.autopsy.recentactivity.RecentActivityExtracterModuleFactory + org.sleuthkit.autopsy.modules.android.AndroidModuleFactory + + + + org.sleuthkit.autopsy.modules.hashdatabase.HashLookupModuleFactory + org.sleuthkit.autopsy.modules.filetypeid.FileTypeIdModuleFactory + org.sleuthkit.autopsy.modules.sevenzip.ArchiveFileExtractorModuleFactory + org.sleuthkit.autopsy.modules.exif.ExifParserModuleFactory + org.sleuthkit.autopsy.keywordsearch.KeywordSearchModuleFactory + org.sleuthkit.autopsy.thunderbirdparser.EmailParserModuleFactory + org.sleuthkit.autopsy.modules.fileextmismatch.FileExtMismatchDetectorModuleFactory + org.sleuthkit.autopsy.modules.interestingitems.InterestingItemsIngestModuleFactory + + + + org.sleuthkit.autopsy.modules.e01verify.E01VerifierModuleFactory + + + diff --git a/Core/src/org/sleuthkit/autopsy/ingest/pipeline_config.xml b/Core/src/org/sleuthkit/autopsy/ingest/pipeline_config.xml deleted file mode 100644 index 542f168986..0000000000 --- a/Core/src/org/sleuthkit/autopsy/ingest/pipeline_config.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - - - - - - - - - - - - - - -