Merge pull request #6720 from wschaeferB/6737-EmbeddedFileExtractorCancellation

6737 embedded file extractor cancellation
This commit is contained in:
Richard Cordovano 2021-02-18 07:38:13 -05:00 committed by GitHub
commit 8055f1c656
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 112 additions and 18 deletions

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2015 Basis Technology Corp.
* Copyright 2015-2021 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -32,6 +32,7 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.Level;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
@ -146,6 +147,9 @@ class DocumentEmbeddedContentExtractor {
boolean isContentExtractionSupported(AbstractFile abstractFile) {
String abstractFileMimeType = fileTypeDetector.getMIMEType(abstractFile);
for (SupportedExtractionFormats s : SupportedExtractionFormats.values()) {
if (checkForIngestCancellation(abstractFile)) {
break;
}
if (s.toString().equals(abstractFileMimeType)) {
abstractFileExtractionFormat = s;
return true;
@ -154,6 +158,25 @@ class DocumentEmbeddedContentExtractor {
return false;
}
/**
* Private helper method to standardize the cancellation check that is
* performed when running ingest. Will return false if the
* DocumentEmbeddedContentExtractor is being used without an
* IngestJobContext.
*
* @param file The file being extracted, this is only used for logging
* purposes.
*
* @return True if ingest has been cancelled, false otherwise. FFFF
*/
private boolean checkForIngestCancellation(AbstractFile file) {
if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
LOGGER.log(Level.INFO, "Ingest was cancelled. Results extracted from the following document file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
return true;
}
return false;
}
/**
* This method selects the appropriate process of extracting embedded
* content from files using either Tika or POI classes. Once the content has
@ -189,7 +212,9 @@ class DocumentEmbeddedContentExtractor {
LOGGER.log(Level.SEVERE, String.format("Error checking if %s (objID = %d) has already has been processed, skipping", abstractFile.getName(), abstractFile.getId()), e); //NON-NLS
return;
}
if (checkForIngestCancellation(abstractFile)) {
return;
}
// Call the appropriate extraction method based on mime type
switch (abstractFileExtractionFormat) {
case DOCX:
@ -219,6 +244,9 @@ class DocumentEmbeddedContentExtractor {
// the common task of adding abstractFile to derivedfiles is performed.
listOfExtractedImageAbstractFiles = new ArrayList<>();
for (ExtractedFile extractedImage : listOfExtractedImages) {
if (checkForIngestCancellation(abstractFile)) {
return;
}
try {
listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
@ -258,11 +286,12 @@ class DocumentEmbeddedContentExtractor {
officeParserConfig.setUseSAXPptxExtractor(true);
officeParserConfig.setUseSAXDocxExtractor(true);
parseContext.set(OfficeParserConfig.class, officeParserConfig);
EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext);
parseContext.set(EmbeddedDocumentExtractor.class, extractor);
ReadContentInputStream stream = new ReadContentInputStream(abstractFile);
if (checkForIngestCancellation(abstractFile)) {
return null; //null will cause the calling method to return.
}
try {
parser.parse(stream, contentHandler, metadata, parseContext);
} catch (IOException | SAXException | TikaException ex) {
@ -322,6 +351,9 @@ class DocumentEmbeddedContentExtractor {
byte[] data = null;
int pictureNumber = 0; //added to ensure uniqueness in cases where suggestFullFileName returns duplicates
for (Picture picture : listOfAllPictures) {
if (checkForIngestCancellation(af)) {
return null; //null will cause the calling method to return.
}
String fileName = UNKNOWN_IMAGE_NAME_PREFIX + pictureNumber + "." + picture.suggestFileExtension();
try {
data = picture.getContent();
@ -385,7 +417,9 @@ class DocumentEmbeddedContentExtractor {
List<ExtractedFile> listOfExtractedImages = new ArrayList<>();
byte[] data = null;
for (HSLFPictureData pictureData : listOfAllPictures) {
if (checkForIngestCancellation(af)) {
return null; //null will cause the calling method to return.
}
// Get image extension, generate image name, write image to the module
// output folder, add it to the listOfExtractedImageAbstractFiles
PictureType type = pictureData.getType();
@ -475,6 +509,9 @@ class DocumentEmbeddedContentExtractor {
List<ExtractedFile> listOfExtractedImages = new ArrayList<>();
byte[] data = null;
for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
if (checkForIngestCancellation(af)) {
return null; //null will cause the calling method to return.
}
String imageName = UNKNOWN_IMAGE_NAME_PREFIX + i + "." + pictureData.suggestFileExtension(); //NON-NLS
try {
data = pictureData.getData();
@ -510,15 +547,17 @@ class DocumentEmbeddedContentExtractor {
//Convert output to hook into the existing logic for creating derived files
List<ExtractedFile> extractedFiles = new ArrayList<>();
extractedAttachments.entrySet().forEach((pathEntry) -> {
for (Entry<String, PDFAttachmentExtractor.NewResourceData> pathEntry : extractedAttachments.entrySet()) {
if (checkForIngestCancellation(abstractFile)) {
return null; //null will cause the calling method to return.
}
String fileName = pathEntry.getKey();
Path writeLocation = pathEntry.getValue().getPath();
int fileSize = pathEntry.getValue().getLength();
extractedFiles.add(new ExtractedFile(fileName,
getFileRelativePath(writeLocation.getFileName().toString()),
fileSize));
});
}
return extractedFiles;
} catch (IOException | SAXException | TikaException | InvalidPathException ex) {
LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs for file Name: " + abstractFile.getName() + " ID: " + abstractFile.getId(), ex); //NON-NLS

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2015-2020 Basis Technology Corp.
* Copyright 2015-2021 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -143,7 +143,7 @@ class SevenZipExtractor {
}
/**
* Contructs an embedded file extractor that uses 7Zip via Java bindings to
* Constructs an embedded file extractor that uses 7Zip via Java bindings to
* extract the contents of an archive file to a directory named for the
* archive file.
*
@ -184,6 +184,9 @@ class SevenZipExtractor {
boolean isSevenZipExtractionSupported(AbstractFile file) {
String fileMimeType = fileTypeDetector.getMIMEType(file);
for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) {
if (checkForIngestCancellation(file)) {
break;
}
if (mimeType.toString().equals(fileMimeType)) {
return true;
}
@ -191,6 +194,24 @@ class SevenZipExtractor {
return false;
}
/**
* Private helper method to standardize the cancellation check that is
* performed when running ingest. Will return false if the SevenZipExtractor
* is being used without an IngestJobContext.
*
* @param file The file being extracted, this is only used for logging
* purposes.
*
* @return True if ingest has been cancelled, false otherwise.
*/
private boolean checkForIngestCancellation(AbstractFile file) {
if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
return true;
}
return false;
}
/**
* Check if the item inside archive is a potential zipbomb
*
@ -567,7 +588,9 @@ class SevenZipExtractor {
unpackSuccessful = false;
return unpackSuccessful;
}
if (checkForIngestCancellation(archiveFile)) {
return false;
}
try {
List<AbstractFile> existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath);
for (AbstractFile file : existingFiles) {
@ -578,7 +601,9 @@ class SevenZipExtractor {
unpackSuccessful = false;
return unpackSuccessful;
}
if (checkForIngestCancellation(archiveFile)) {
return false;
}
parentAr = depthMap.get(archiveFile.getId());
if (parentAr == null) {
parentAr = new Archive(0, archiveFile.getId(), archiveFile);
@ -598,6 +623,9 @@ class SevenZipExtractor {
return unpackSuccessful;
}
}
if (checkForIngestCancellation(archiveFile)) {
return false;
}
IInArchive inArchive = null;
try {
stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile));
@ -605,6 +633,9 @@ class SevenZipExtractor {
// it will be opened incorrectly when using 7zip's built-in auto-detect functionality.
// All other archive formats are still opened using 7zip built-in auto-detect functionality.
ArchiveFormat options = get7ZipOptions(archiveFile);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
if (password == null) {
inArchive = SevenZip.openInArchive(options, stream);
} else {
@ -613,7 +644,9 @@ class SevenZipExtractor {
numItems = inArchive.getNumberOfItems();
progress.start(numItems);
progressStarted = true;
if (checkForIngestCancellation(archiveFile)) {
return false;
}
//setup the archive local root folder
final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) {
@ -634,6 +667,9 @@ class SevenZipExtractor {
Map<Integer, InArchiveItemDetails> archiveDetailsMap = new HashMap<>();
for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) {
if (checkForIngestCancellation(archiveFile)) {
return false;
}
progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0);
if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) {
unpackSuccessful = false;
@ -643,7 +679,9 @@ class SevenZipExtractor {
String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile);
byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile);
UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED);
if (isEncrypted && password == null) {
@ -681,6 +719,9 @@ class SevenZipExtractor {
freeDiskSpace = newDiskSpace;
}
}
if (checkForIngestCancellation(archiveFile)) {
return false;
}
final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex + "_" + new File(pathInArchive).getName());
final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName;
final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName;
@ -699,7 +740,9 @@ class SevenZipExtractor {
localFileExists = false;
logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS
}
if (checkForIngestCancellation(archiveFile)) {
return false;
}
// skip the rest of this loop if we couldn't create the file
//continue will skip details from being added to the map
if (!localFileExists) {
@ -716,7 +759,9 @@ class SevenZipExtractor {
}
int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
StandardIArchiveExtractCallback archiveCallBack
= new StandardIArchiveExtractCallback(
inArchive, archiveFile, progress,
@ -726,7 +771,9 @@ class SevenZipExtractor {
//for efficiency. Hence, the HashMap and linear processing of
//inArchiveItemIndex. False indicates non-test mode
inArchive.extract(extractionIndices, false, archiveCallBack);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
unpackSuccessful &= archiveCallBack.wasSuccessful();
archiveDetailsMap = null;
@ -735,9 +782,15 @@ class SevenZipExtractor {
// intermediate nodes since the order is not guaranteed
try {
unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
unpackedFiles = unpackedTree.getAllFileObjects();
//check if children are archives, update archive depth tracking
for (int i = 0; i < unpackedFiles.size(); i++) {
if (checkForIngestCancellation(archiveFile)) {
return false;
}
progress.progress(String.format("%s: Searching for nested archives (%d of %d)", currentArchiveName, i + 1, unpackedFiles.size()));
AbstractFile unpackedFile = unpackedFiles.get(i);
if (unpackedFile == null) {
@ -792,7 +845,9 @@ class SevenZipExtractor {
progress.finish();
}
}
if (checkForIngestCancellation(archiveFile)) {
return false;
}
//create artifact and send user message
if (hasEncrypted) {
String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;