2018-12-07 14:02:47 -05:00

354 lines
18 KiB
Java

/*
* Autopsy Forensic Browser
*
* Copyright 2011-2018 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.experimental.autoingest;
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.logging.Level;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.openide.util.Lookup;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.casemodule.LocalFilesDSProcessor;
import org.sleuthkit.autopsy.corecomponentinterfaces.DataSourceProcessorCallback;
import static org.sleuthkit.autopsy.corecomponentinterfaces.DataSourceProcessorCallback.DataSourceProcessorResult.CRITICAL_ERRORS;
import org.sleuthkit.autopsy.corecomponentinterfaces.DataSourceProcessorProgressMonitor;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.datasourceprocessors.AutoIngestDataSourceProcessor;
import org.sleuthkit.autopsy.coreutils.TimeStampUtils;
import org.sleuthkit.autopsy.datasourceprocessors.RawDSProcessor;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.DataSource;
/*
* A runnable that adds an archive data source as well as data sources contained
* in the archive to the case database.
*/
class AddArchiveTask implements Runnable {
private final Logger logger = Logger.getLogger(AddArchiveTask.class.getName());
private final String deviceId;
private final String archivePath;
private final DataSourceProcessorProgressMonitor progressMonitor;
private final DataSourceProcessorCallback callback;
private boolean criticalErrorOccurred;
private final Object archiveDspLock;
private static final String ARCHIVE_EXTRACTOR_MODULE_OUTPUT_DIR = "Archive Extractor";
/**
* Constructs a runnable task that adds an archive as well as data sources
* contained in the archive to the case database.
*
* @param deviceId An ASCII-printable identifier for the device
* associated with the data source that is intended
* to be unique across multiple cases (e.g., a UUID).
* @param archivePath Path to the archive file.
* @param progressMonitor Progress monitor to report progress during
* processing.
* @param callback Callback to call when processing is done.
*/
AddArchiveTask(String deviceId, String archivePath, DataSourceProcessorProgressMonitor progressMonitor, DataSourceProcessorCallback callback) {
this.deviceId = deviceId;
this.archivePath = archivePath;
this.callback = callback;
this.progressMonitor = progressMonitor;
this.archiveDspLock = new Object();
}
/**
* Adds the archive to the case database.
*/
@Override
public void run() {
progressMonitor.setIndeterminate(true);
List<String> errorMessages = new ArrayList<>();
List<Content> newDataSources = new ArrayList<>();
DataSourceProcessorCallback.DataSourceProcessorResult result;
if (!ArchiveUtil.isArchive(Paths.get(archivePath))) {
criticalErrorOccurred = true;
logger.log(Level.SEVERE, String.format("Input data source is not a valid datasource: %s", archivePath)); //NON-NLS
errorMessages.add("Input data source is not a valid datasource: " + archivePath);
result = DataSourceProcessorCallback.DataSourceProcessorResult.CRITICAL_ERRORS;
callback.done(result, errorMessages, newDataSources);
}
logger.log(Level.INFO, "Using Archive Extractor DSP to process archive {0} ", archivePath);
// extract the archive and pass the extracted folder as input
try {
Case currentCase = Case.getCurrentCaseThrows();
// create folder to extract archive to
Path destinationFolder = createDirectoryForFile(archivePath, currentCase.getModuleDirectory());
if (destinationFolder.toString().isEmpty()) {
// unable to create directory
criticalErrorOccurred = true;
errorMessages.add(String.format("Unable to create directory {0} to extract archive {1} ", new Object[]{destinationFolder.toString(), archivePath}));
logger.log(Level.SEVERE, "Unable to create directory {0} to extract archive {1} ", new Object[]{destinationFolder.toString(), archivePath});
return;
}
// extract contents of ZIP archive into destination folder
List<String> extractedFiles = new ArrayList<>();
int numExtractedFilesRemaining = 0;
try {
progressMonitor.setProgressText(String.format("Extracting archive contents to: %s", destinationFolder.toString()));
extractedFiles = ArchiveUtil.unpackArchiveFile(archivePath, destinationFolder.toString());
numExtractedFilesRemaining = extractedFiles.size();
} catch (ArchiveUtil.ArchiveExtractionException ex) {
// delete extracted contents
logger.log(Level.SEVERE,"Exception while extracting archive contents into {0}. Deleteing the directory", destinationFolder.toString());
FileUtils.deleteDirectory(destinationFolder.toFile());
throw ex;
}
// lookup all AutomatedIngestDataSourceProcessors so that we only do it once.
// LocalDisk, LocalFiles, and ArchiveDSP are removed from the list.
List<AutoIngestDataSourceProcessor> processorCandidates = getListOfValidDataSourceProcessors();
// do processing
for (String file : extractedFiles) {
// we only care about files, skip directories
File fileObject = new File(file);
if (fileObject.isDirectory()) {
numExtractedFilesRemaining--;
continue;
}
// identify all "valid" DSPs that can process this file
List<AutoIngestDataSourceProcessor> validDataSourceProcessors = getDataSourceProcessorsForFile(Paths.get(file), errorMessages, processorCandidates);
if (validDataSourceProcessors.isEmpty()) {
continue;
}
// identified a "valid" data source within the archive
progressMonitor.setProgressText(String.format("Adding: %s", file));
/*
* NOTE: we have to move the valid data sources to a separate
* folder and then add the data source from that folder. This is
* necessary because after all valid data sources have been
* identified, we are going to add the remaining extracted
* contents of the archive as a single logical file set. Hence,
* if we do not move the data sources out of the extracted
* contents folder, those data source files will get added twice
* and can potentially result in duplicate keyword hits.
*/
Path newFolder = createDirectoryForFile(file, currentCase.getModuleDirectory());
if (newFolder.toString().isEmpty()) {
// unable to create directory
criticalErrorOccurred = true;
errorMessages.add(String.format("Unable to create directory {0} to extract content of archive {1} ", new Object[]{newFolder.toString(), archivePath}));
logger.log(Level.SEVERE, "Unable to create directory {0} to extract content of archive {1} ", new Object[]{newFolder.toString(), archivePath});
return;
}
// Copy it to a different folder
FileUtils.copyFileToDirectory(fileObject, newFolder.toFile());
Path newFilePath = Paths.get(newFolder.toString(), FilenameUtils.getName(file));
// Try each DSP in decreasing order of confidence
boolean success = false;
for (AutoIngestDataSourceProcessor selectedProcessor : validDataSourceProcessors) {
logger.log(Level.INFO, "Using {0} to process extracted file {1} ", new Object[]{selectedProcessor.getDataSourceType(), file});
synchronized (archiveDspLock) {
UUID taskId = UUID.randomUUID();
currentCase.notifyAddingDataSource(taskId);
AutoIngestDataSource internalDataSource = new AutoIngestDataSource(deviceId, newFilePath);
DataSourceProcessorCallback internalArchiveDspCallBack = new AddDataSourceCallback(currentCase, internalDataSource, taskId, archiveDspLock);
selectedProcessor.process(deviceId, newFilePath, progressMonitor, internalArchiveDspCallBack);
archiveDspLock.wait();
// at this point we got the content object(s) from the current DSP.
// check whether the data source was processed successfully
if ((internalDataSource.getResultDataSourceProcessorResultCode() == CRITICAL_ERRORS)
|| internalDataSource.getContent().isEmpty()) {
// move onto the the next DSP that can process this data source
for (String errorMessage : internalDataSource.getDataSourceProcessorErrorMessages()) {
logger.log(Level.SEVERE, "Data source processor {0} was unable to process {1}: {2}", new Object[]{selectedProcessor.getDataSourceType(), internalDataSource.getPath(), errorMessage});
}
continue;
}
// if we are here it means the data source was added successfully
success = true;
newDataSources.addAll(internalDataSource.getContent());
// Update the names for all new data sources to be the root archive plus the name of the data source
for (Content c:internalDataSource.getContent()) {
if (c instanceof DataSource) {
DataSource ds = (DataSource) c;
String newName = Paths.get(archivePath).getFileName() + "/" + ds.getName();
ds.setDisplayName(newName);
}
}
// skip all other DSPs for this data source
break;
}
}
if (success) {
// one of the DSPs successfully processed the data source. delete the
// copy of the data source in the original extracted archive folder.
// otherwise the data source is going to be added again as a logical file.
numExtractedFilesRemaining--;
FileUtils.deleteQuietly(fileObject);
} else {
// none of the DSPs were able to process the data source. delete the
// copy of the data source in the temporary folder. the data source is
// going to be added as a logical file with the rest of the extracted contents.
FileUtils.deleteQuietly(newFolder.toFile());
}
}
// after all archive contents have been examined (and moved to separate folders if necessary),
// add remaining extracted contents as one logical file set
if (numExtractedFilesRemaining > 0) {
progressMonitor.setProgressText(String.format("Adding: %s", destinationFolder.toString()));
logger.log(Level.INFO, "Adding directory {0} as logical file set", destinationFolder.toString());
synchronized (archiveDspLock) {
UUID taskId = UUID.randomUUID();
currentCase.notifyAddingDataSource(taskId);
AutoIngestDataSource internalDataSource = new AutoIngestDataSource(deviceId, destinationFolder);
DataSourceProcessorCallback internalArchiveDspCallBack = new AddDataSourceCallback(currentCase, internalDataSource, taskId, archiveDspLock);
// folder where archive was extracted to
List<String> pathsList = new ArrayList<>();
pathsList.add(destinationFolder.toString());
// use archive file name as the name of the logical file set
String archiveFileName = FilenameUtils.getName(archivePath);
LocalFilesDSProcessor localFilesDSP = new LocalFilesDSProcessor();
localFilesDSP.run(deviceId, archiveFileName, pathsList, progressMonitor, internalArchiveDspCallBack);
archiveDspLock.wait();
// at this point we got the content object(s) from the current DSP
newDataSources.addAll(internalDataSource.getContent());
}
}
} catch (Exception ex) {
criticalErrorOccurred = true;
errorMessages.add(ex.getMessage());
logger.log(Level.SEVERE, String.format("Critical error occurred while extracting archive %s", archivePath), ex); //NON-NLS
} finally {
logger.log(Level.INFO, "Finished processing of archive {0}", archivePath);
progressMonitor.setProgress(100);
if (criticalErrorOccurred) {
result = DataSourceProcessorCallback.DataSourceProcessorResult.CRITICAL_ERRORS;
} else if (!errorMessages.isEmpty()) {
result = DataSourceProcessorCallback.DataSourceProcessorResult.NONCRITICAL_ERRORS;
} else {
result = DataSourceProcessorCallback.DataSourceProcessorResult.NO_ERRORS;
}
callback.done(result, errorMessages, newDataSources);
}
}
/**
* Get a list of data source processors. LocalFiles, RawDSProcessor, and
* ArchiveDSP are removed from the list.
*
* @return List of data source processors
*/
private List<AutoIngestDataSourceProcessor> getListOfValidDataSourceProcessors() {
Collection<? extends AutoIngestDataSourceProcessor> processorCandidates = Lookup.getDefault().lookupAll(AutoIngestDataSourceProcessor.class);
List<AutoIngestDataSourceProcessor> validDataSourceProcessors = processorCandidates.stream().collect(Collectors.toList());
for (Iterator<AutoIngestDataSourceProcessor> iterator = validDataSourceProcessors.iterator(); iterator.hasNext();) {
AutoIngestDataSourceProcessor selectedProcessor = iterator.next();
// skip local files, only looking for "valid" data sources.
// also skip RawDSP as we don't want to add random "bin" and "raw" files that may be inside archive
// as individual data sources.
// also skip nested archive files, those will be ingested as logical files and extracted during ingest
if ((selectedProcessor instanceof LocalFilesDSProcessor)
|| (selectedProcessor instanceof RawDSProcessor)
|| (selectedProcessor instanceof ArchiveExtractorDSProcessor)) {
iterator.remove();
}
}
return validDataSourceProcessors;
}
/**
* Get a list of data source processors that can process the data source of
* interest. The list is sorted by confidence in decreasing order.
*
* @param dataSourcePath Full path to the data source
* @param errorMessages List<String> for error messages
* @param errorMessages List of AutoIngestDataSourceProcessor to try
*
* @return Ordered list of applicable DSPs
*/
private List<AutoIngestDataSourceProcessor> getDataSourceProcessorsForFile(Path dataSourcePath, List<String> errorMessages,
List<AutoIngestDataSourceProcessor> processorCandidates) {
// Get an ordered list of data source processors to try
List<AutoIngestDataSourceProcessor> validDataSourceProcessorsForFile = Collections.emptyList();
try {
validDataSourceProcessorsForFile = DataSourceProcessorUtility.getOrderedListOfDataSourceProcessors(dataSourcePath, processorCandidates);
} catch (AutoIngestDataSourceProcessor.AutoIngestDataSourceProcessorException ex) {
criticalErrorOccurred = true;
errorMessages.add(ex.getMessage());
logger.log(Level.SEVERE, String.format("Critical error occurred while extracting archive %s", archivePath), ex); //NON-NLS
return Collections.emptyList();
}
return validDataSourceProcessorsForFile;
}
/**
* Create a directory in ModuleOutput folder based on input file name. A
* time stamp is appended to the directory name.
*
* @param fileName File name
* @param baseDirectory Base directory. Typically the case output directory.
*
* @return Full path to the new directory
*/
private Path createDirectoryForFile(String fileName, String baseDirectory) {
// get file name without full path or extension
String fileNameNoExt = FilenameUtils.getBaseName(fileName);
// create folder to extract archive to
Path newFolder = Paths.get(baseDirectory, ARCHIVE_EXTRACTOR_MODULE_OUTPUT_DIR, fileNameNoExt + "_" + TimeStampUtils.createTimeStamp());
if (newFolder.toFile().mkdirs() == false) {
// unable to create directory
return Paths.get("");
}
return newFolder;
}
}