Merge pull request #6720 from wschaeferB/6737-EmbeddedFileExtractorCancellation

6737 embedded file extractor cancellation
This commit is contained in:
Richard Cordovano 2021-02-18 07:38:13 -05:00 committed by GitHub
commit 8055f1c656
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 112 additions and 18 deletions

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2015 Basis Technology Corp. * Copyright 2015-2021 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -32,6 +32,7 @@ import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry;
import java.util.logging.Level; import java.util.logging.Level;
import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
@ -146,6 +147,9 @@ class DocumentEmbeddedContentExtractor {
boolean isContentExtractionSupported(AbstractFile abstractFile) { boolean isContentExtractionSupported(AbstractFile abstractFile) {
String abstractFileMimeType = fileTypeDetector.getMIMEType(abstractFile); String abstractFileMimeType = fileTypeDetector.getMIMEType(abstractFile);
for (SupportedExtractionFormats s : SupportedExtractionFormats.values()) { for (SupportedExtractionFormats s : SupportedExtractionFormats.values()) {
if (checkForIngestCancellation(abstractFile)) {
break;
}
if (s.toString().equals(abstractFileMimeType)) { if (s.toString().equals(abstractFileMimeType)) {
abstractFileExtractionFormat = s; abstractFileExtractionFormat = s;
return true; return true;
@ -154,6 +158,25 @@ class DocumentEmbeddedContentExtractor {
return false; return false;
} }
/**
* Private helper method to standardize the cancellation check that is
* performed when running ingest. Will return false if the
* DocumentEmbeddedContentExtractor is being used without an
* IngestJobContext.
*
* @param file The file being extracted, this is only used for logging
* purposes.
*
* @return True if ingest has been cancelled, false otherwise. FFFF
*/
private boolean checkForIngestCancellation(AbstractFile file) {
if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
LOGGER.log(Level.INFO, "Ingest was cancelled. Results extracted from the following document file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
return true;
}
return false;
}
/** /**
* This method selects the appropriate process of extracting embedded * This method selects the appropriate process of extracting embedded
* content from files using either Tika or POI classes. Once the content has * content from files using either Tika or POI classes. Once the content has
@ -189,7 +212,9 @@ class DocumentEmbeddedContentExtractor {
LOGGER.log(Level.SEVERE, String.format("Error checking if %s (objID = %d) has already has been processed, skipping", abstractFile.getName(), abstractFile.getId()), e); //NON-NLS LOGGER.log(Level.SEVERE, String.format("Error checking if %s (objID = %d) has already has been processed, skipping", abstractFile.getName(), abstractFile.getId()), e); //NON-NLS
return; return;
} }
if (checkForIngestCancellation(abstractFile)) {
return;
}
// Call the appropriate extraction method based on mime type // Call the appropriate extraction method based on mime type
switch (abstractFileExtractionFormat) { switch (abstractFileExtractionFormat) {
case DOCX: case DOCX:
@ -219,6 +244,9 @@ class DocumentEmbeddedContentExtractor {
// the common task of adding abstractFile to derivedfiles is performed. // the common task of adding abstractFile to derivedfiles is performed.
listOfExtractedImageAbstractFiles = new ArrayList<>(); listOfExtractedImageAbstractFiles = new ArrayList<>();
for (ExtractedFile extractedImage : listOfExtractedImages) { for (ExtractedFile extractedImage : listOfExtractedImages) {
if (checkForIngestCancellation(abstractFile)) {
return;
}
try { try {
listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(), listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(), extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
@ -258,11 +286,12 @@ class DocumentEmbeddedContentExtractor {
officeParserConfig.setUseSAXPptxExtractor(true); officeParserConfig.setUseSAXPptxExtractor(true);
officeParserConfig.setUseSAXDocxExtractor(true); officeParserConfig.setUseSAXDocxExtractor(true);
parseContext.set(OfficeParserConfig.class, officeParserConfig); parseContext.set(OfficeParserConfig.class, officeParserConfig);
EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext); EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext);
parseContext.set(EmbeddedDocumentExtractor.class, extractor); parseContext.set(EmbeddedDocumentExtractor.class, extractor);
ReadContentInputStream stream = new ReadContentInputStream(abstractFile); ReadContentInputStream stream = new ReadContentInputStream(abstractFile);
if (checkForIngestCancellation(abstractFile)) {
return null; //null will cause the calling method to return.
}
try { try {
parser.parse(stream, contentHandler, metadata, parseContext); parser.parse(stream, contentHandler, metadata, parseContext);
} catch (IOException | SAXException | TikaException ex) { } catch (IOException | SAXException | TikaException ex) {
@ -322,6 +351,9 @@ class DocumentEmbeddedContentExtractor {
byte[] data = null; byte[] data = null;
int pictureNumber = 0; //added to ensure uniqueness in cases where suggestFullFileName returns duplicates int pictureNumber = 0; //added to ensure uniqueness in cases where suggestFullFileName returns duplicates
for (Picture picture : listOfAllPictures) { for (Picture picture : listOfAllPictures) {
if (checkForIngestCancellation(af)) {
return null; //null will cause the calling method to return.
}
String fileName = UNKNOWN_IMAGE_NAME_PREFIX + pictureNumber + "." + picture.suggestFileExtension(); String fileName = UNKNOWN_IMAGE_NAME_PREFIX + pictureNumber + "." + picture.suggestFileExtension();
try { try {
data = picture.getContent(); data = picture.getContent();
@ -385,7 +417,9 @@ class DocumentEmbeddedContentExtractor {
List<ExtractedFile> listOfExtractedImages = new ArrayList<>(); List<ExtractedFile> listOfExtractedImages = new ArrayList<>();
byte[] data = null; byte[] data = null;
for (HSLFPictureData pictureData : listOfAllPictures) { for (HSLFPictureData pictureData : listOfAllPictures) {
if (checkForIngestCancellation(af)) {
return null; //null will cause the calling method to return.
}
// Get image extension, generate image name, write image to the module // Get image extension, generate image name, write image to the module
// output folder, add it to the listOfExtractedImageAbstractFiles // output folder, add it to the listOfExtractedImageAbstractFiles
PictureType type = pictureData.getType(); PictureType type = pictureData.getType();
@ -475,6 +509,9 @@ class DocumentEmbeddedContentExtractor {
List<ExtractedFile> listOfExtractedImages = new ArrayList<>(); List<ExtractedFile> listOfExtractedImages = new ArrayList<>();
byte[] data = null; byte[] data = null;
for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) { for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
if (checkForIngestCancellation(af)) {
return null; //null will cause the calling method to return.
}
String imageName = UNKNOWN_IMAGE_NAME_PREFIX + i + "." + pictureData.suggestFileExtension(); //NON-NLS String imageName = UNKNOWN_IMAGE_NAME_PREFIX + i + "." + pictureData.suggestFileExtension(); //NON-NLS
try { try {
data = pictureData.getData(); data = pictureData.getData();
@ -510,15 +547,17 @@ class DocumentEmbeddedContentExtractor {
//Convert output to hook into the existing logic for creating derived files //Convert output to hook into the existing logic for creating derived files
List<ExtractedFile> extractedFiles = new ArrayList<>(); List<ExtractedFile> extractedFiles = new ArrayList<>();
extractedAttachments.entrySet().forEach((pathEntry) -> { for (Entry<String, PDFAttachmentExtractor.NewResourceData> pathEntry : extractedAttachments.entrySet()) {
if (checkForIngestCancellation(abstractFile)) {
return null; //null will cause the calling method to return.
}
String fileName = pathEntry.getKey(); String fileName = pathEntry.getKey();
Path writeLocation = pathEntry.getValue().getPath(); Path writeLocation = pathEntry.getValue().getPath();
int fileSize = pathEntry.getValue().getLength(); int fileSize = pathEntry.getValue().getLength();
extractedFiles.add(new ExtractedFile(fileName, extractedFiles.add(new ExtractedFile(fileName,
getFileRelativePath(writeLocation.getFileName().toString()), getFileRelativePath(writeLocation.getFileName().toString()),
fileSize)); fileSize));
}); }
return extractedFiles; return extractedFiles;
} catch (IOException | SAXException | TikaException | InvalidPathException ex) { } catch (IOException | SAXException | TikaException | InvalidPathException ex) {
LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs for file Name: " + abstractFile.getName() + " ID: " + abstractFile.getId(), ex); //NON-NLS LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs for file Name: " + abstractFile.getName() + " ID: " + abstractFile.getId(), ex); //NON-NLS

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2015-2020 Basis Technology Corp. * Copyright 2015-2021 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -143,7 +143,7 @@ class SevenZipExtractor {
} }
/** /**
* Contructs an embedded file extractor that uses 7Zip via Java bindings to * Constructs an embedded file extractor that uses 7Zip via Java bindings to
* extract the contents of an archive file to a directory named for the * extract the contents of an archive file to a directory named for the
* archive file. * archive file.
* *
@ -184,6 +184,9 @@ class SevenZipExtractor {
boolean isSevenZipExtractionSupported(AbstractFile file) { boolean isSevenZipExtractionSupported(AbstractFile file) {
String fileMimeType = fileTypeDetector.getMIMEType(file); String fileMimeType = fileTypeDetector.getMIMEType(file);
for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) { for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) {
if (checkForIngestCancellation(file)) {
break;
}
if (mimeType.toString().equals(fileMimeType)) { if (mimeType.toString().equals(fileMimeType)) {
return true; return true;
} }
@ -191,6 +194,24 @@ class SevenZipExtractor {
return false; return false;
} }
/**
* Private helper method to standardize the cancellation check that is
* performed when running ingest. Will return false if the SevenZipExtractor
* is being used without an IngestJobContext.
*
* @param file The file being extracted, this is only used for logging
* purposes.
*
* @return True if ingest has been cancelled, false otherwise.
*/
private boolean checkForIngestCancellation(AbstractFile file) {
if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
return true;
}
return false;
}
/** /**
* Check if the item inside archive is a potential zipbomb * Check if the item inside archive is a potential zipbomb
* *
@ -567,7 +588,9 @@ class SevenZipExtractor {
unpackSuccessful = false; unpackSuccessful = false;
return unpackSuccessful; return unpackSuccessful;
} }
if (checkForIngestCancellation(archiveFile)) {
return false;
}
try { try {
List<AbstractFile> existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath); List<AbstractFile> existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath);
for (AbstractFile file : existingFiles) { for (AbstractFile file : existingFiles) {
@ -578,7 +601,9 @@ class SevenZipExtractor {
unpackSuccessful = false; unpackSuccessful = false;
return unpackSuccessful; return unpackSuccessful;
} }
if (checkForIngestCancellation(archiveFile)) {
return false;
}
parentAr = depthMap.get(archiveFile.getId()); parentAr = depthMap.get(archiveFile.getId());
if (parentAr == null) { if (parentAr == null) {
parentAr = new Archive(0, archiveFile.getId(), archiveFile); parentAr = new Archive(0, archiveFile.getId(), archiveFile);
@ -598,6 +623,9 @@ class SevenZipExtractor {
return unpackSuccessful; return unpackSuccessful;
} }
} }
if (checkForIngestCancellation(archiveFile)) {
return false;
}
IInArchive inArchive = null; IInArchive inArchive = null;
try { try {
stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile)); stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile));
@ -605,6 +633,9 @@ class SevenZipExtractor {
// it will be opened incorrectly when using 7zip's built-in auto-detect functionality. // it will be opened incorrectly when using 7zip's built-in auto-detect functionality.
// All other archive formats are still opened using 7zip built-in auto-detect functionality. // All other archive formats are still opened using 7zip built-in auto-detect functionality.
ArchiveFormat options = get7ZipOptions(archiveFile); ArchiveFormat options = get7ZipOptions(archiveFile);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
if (password == null) { if (password == null) {
inArchive = SevenZip.openInArchive(options, stream); inArchive = SevenZip.openInArchive(options, stream);
} else { } else {
@ -613,7 +644,9 @@ class SevenZipExtractor {
numItems = inArchive.getNumberOfItems(); numItems = inArchive.getNumberOfItems();
progress.start(numItems); progress.start(numItems);
progressStarted = true; progressStarted = true;
if (checkForIngestCancellation(archiveFile)) {
return false;
}
//setup the archive local root folder //setup the archive local root folder
final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile)); final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) { if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) {
@ -634,6 +667,9 @@ class SevenZipExtractor {
Map<Integer, InArchiveItemDetails> archiveDetailsMap = new HashMap<>(); Map<Integer, InArchiveItemDetails> archiveDetailsMap = new HashMap<>();
for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) { for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) {
if (checkForIngestCancellation(archiveFile)) {
return false;
}
progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0); progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0);
if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) { if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) {
unpackSuccessful = false; unpackSuccessful = false;
@ -643,7 +679,9 @@ class SevenZipExtractor {
String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile); String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile);
byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile); byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile);
UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive); UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED); final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED);
if (isEncrypted && password == null) { if (isEncrypted && password == null) {
@ -681,6 +719,9 @@ class SevenZipExtractor {
freeDiskSpace = newDiskSpace; freeDiskSpace = newDiskSpace;
} }
} }
if (checkForIngestCancellation(archiveFile)) {
return false;
}
final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex + "_" + new File(pathInArchive).getName()); final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex + "_" + new File(pathInArchive).getName());
final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName; final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName;
final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName; final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName;
@ -699,7 +740,9 @@ class SevenZipExtractor {
localFileExists = false; localFileExists = false;
logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS
} }
if (checkForIngestCancellation(archiveFile)) {
return false;
}
// skip the rest of this loop if we couldn't create the file // skip the rest of this loop if we couldn't create the file
//continue will skip details from being added to the map //continue will skip details from being added to the map
if (!localFileExists) { if (!localFileExists) {
@ -716,7 +759,9 @@ class SevenZipExtractor {
} }
int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap); int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
StandardIArchiveExtractCallback archiveCallBack StandardIArchiveExtractCallback archiveCallBack
= new StandardIArchiveExtractCallback( = new StandardIArchiveExtractCallback(
inArchive, archiveFile, progress, inArchive, archiveFile, progress,
@ -726,7 +771,9 @@ class SevenZipExtractor {
//for efficiency. Hence, the HashMap and linear processing of //for efficiency. Hence, the HashMap and linear processing of
//inArchiveItemIndex. False indicates non-test mode //inArchiveItemIndex. False indicates non-test mode
inArchive.extract(extractionIndices, false, archiveCallBack); inArchive.extract(extractionIndices, false, archiveCallBack);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
unpackSuccessful &= archiveCallBack.wasSuccessful(); unpackSuccessful &= archiveCallBack.wasSuccessful();
archiveDetailsMap = null; archiveDetailsMap = null;
@ -735,9 +782,15 @@ class SevenZipExtractor {
// intermediate nodes since the order is not guaranteed // intermediate nodes since the order is not guaranteed
try { try {
unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath); unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath);
if (checkForIngestCancellation(archiveFile)) {
return false;
}
unpackedFiles = unpackedTree.getAllFileObjects(); unpackedFiles = unpackedTree.getAllFileObjects();
//check if children are archives, update archive depth tracking //check if children are archives, update archive depth tracking
for (int i = 0; i < unpackedFiles.size(); i++) { for (int i = 0; i < unpackedFiles.size(); i++) {
if (checkForIngestCancellation(archiveFile)) {
return false;
}
progress.progress(String.format("%s: Searching for nested archives (%d of %d)", currentArchiveName, i + 1, unpackedFiles.size())); progress.progress(String.format("%s: Searching for nested archives (%d of %d)", currentArchiveName, i + 1, unpackedFiles.size()));
AbstractFile unpackedFile = unpackedFiles.get(i); AbstractFile unpackedFile = unpackedFiles.get(i);
if (unpackedFile == null) { if (unpackedFile == null) {
@ -792,7 +845,9 @@ class SevenZipExtractor {
progress.finish(); progress.finish();
} }
} }
if (checkForIngestCancellation(archiveFile)) {
return false;
}
//create artifact and send user message //create artifact and send user message
if (hasEncrypted) { if (hasEncrypted) {
String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL; String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;