mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 10:17:41 +00:00
Merge pull request #6720 from wschaeferB/6737-EmbeddedFileExtractorCancellation
6737 embedded file extractor cancellation
This commit is contained in:
commit
8055f1c656
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2015 Basis Technology Corp.
|
||||
* Copyright 2015-2021 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -32,6 +32,7 @@ import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.logging.Level;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
@ -146,6 +147,9 @@ class DocumentEmbeddedContentExtractor {
|
||||
boolean isContentExtractionSupported(AbstractFile abstractFile) {
|
||||
String abstractFileMimeType = fileTypeDetector.getMIMEType(abstractFile);
|
||||
for (SupportedExtractionFormats s : SupportedExtractionFormats.values()) {
|
||||
if (checkForIngestCancellation(abstractFile)) {
|
||||
break;
|
||||
}
|
||||
if (s.toString().equals(abstractFileMimeType)) {
|
||||
abstractFileExtractionFormat = s;
|
||||
return true;
|
||||
@ -154,6 +158,25 @@ class DocumentEmbeddedContentExtractor {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Private helper method to standardize the cancellation check that is
|
||||
* performed when running ingest. Will return false if the
|
||||
* DocumentEmbeddedContentExtractor is being used without an
|
||||
* IngestJobContext.
|
||||
*
|
||||
* @param file The file being extracted, this is only used for logging
|
||||
* purposes.
|
||||
*
|
||||
* @return True if ingest has been cancelled, false otherwise. FFFF
|
||||
*/
|
||||
private boolean checkForIngestCancellation(AbstractFile file) {
|
||||
if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
|
||||
LOGGER.log(Level.INFO, "Ingest was cancelled. Results extracted from the following document file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method selects the appropriate process of extracting embedded
|
||||
* content from files using either Tika or POI classes. Once the content has
|
||||
@ -189,7 +212,9 @@ class DocumentEmbeddedContentExtractor {
|
||||
LOGGER.log(Level.SEVERE, String.format("Error checking if %s (objID = %d) has already has been processed, skipping", abstractFile.getName(), abstractFile.getId()), e); //NON-NLS
|
||||
return;
|
||||
}
|
||||
|
||||
if (checkForIngestCancellation(abstractFile)) {
|
||||
return;
|
||||
}
|
||||
// Call the appropriate extraction method based on mime type
|
||||
switch (abstractFileExtractionFormat) {
|
||||
case DOCX:
|
||||
@ -219,6 +244,9 @@ class DocumentEmbeddedContentExtractor {
|
||||
// the common task of adding abstractFile to derivedfiles is performed.
|
||||
listOfExtractedImageAbstractFiles = new ArrayList<>();
|
||||
for (ExtractedFile extractedImage : listOfExtractedImages) {
|
||||
if (checkForIngestCancellation(abstractFile)) {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
|
||||
extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
|
||||
@ -258,11 +286,12 @@ class DocumentEmbeddedContentExtractor {
|
||||
officeParserConfig.setUseSAXPptxExtractor(true);
|
||||
officeParserConfig.setUseSAXDocxExtractor(true);
|
||||
parseContext.set(OfficeParserConfig.class, officeParserConfig);
|
||||
|
||||
EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext);
|
||||
parseContext.set(EmbeddedDocumentExtractor.class, extractor);
|
||||
ReadContentInputStream stream = new ReadContentInputStream(abstractFile);
|
||||
|
||||
if (checkForIngestCancellation(abstractFile)) {
|
||||
return null; //null will cause the calling method to return.
|
||||
}
|
||||
try {
|
||||
parser.parse(stream, contentHandler, metadata, parseContext);
|
||||
} catch (IOException | SAXException | TikaException ex) {
|
||||
@ -322,6 +351,9 @@ class DocumentEmbeddedContentExtractor {
|
||||
byte[] data = null;
|
||||
int pictureNumber = 0; //added to ensure uniqueness in cases where suggestFullFileName returns duplicates
|
||||
for (Picture picture : listOfAllPictures) {
|
||||
if (checkForIngestCancellation(af)) {
|
||||
return null; //null will cause the calling method to return.
|
||||
}
|
||||
String fileName = UNKNOWN_IMAGE_NAME_PREFIX + pictureNumber + "." + picture.suggestFileExtension();
|
||||
try {
|
||||
data = picture.getContent();
|
||||
@ -385,7 +417,9 @@ class DocumentEmbeddedContentExtractor {
|
||||
List<ExtractedFile> listOfExtractedImages = new ArrayList<>();
|
||||
byte[] data = null;
|
||||
for (HSLFPictureData pictureData : listOfAllPictures) {
|
||||
|
||||
if (checkForIngestCancellation(af)) {
|
||||
return null; //null will cause the calling method to return.
|
||||
}
|
||||
// Get image extension, generate image name, write image to the module
|
||||
// output folder, add it to the listOfExtractedImageAbstractFiles
|
||||
PictureType type = pictureData.getType();
|
||||
@ -475,6 +509,9 @@ class DocumentEmbeddedContentExtractor {
|
||||
List<ExtractedFile> listOfExtractedImages = new ArrayList<>();
|
||||
byte[] data = null;
|
||||
for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
|
||||
if (checkForIngestCancellation(af)) {
|
||||
return null; //null will cause the calling method to return.
|
||||
}
|
||||
String imageName = UNKNOWN_IMAGE_NAME_PREFIX + i + "." + pictureData.suggestFileExtension(); //NON-NLS
|
||||
try {
|
||||
data = pictureData.getData();
|
||||
@ -510,15 +547,17 @@ class DocumentEmbeddedContentExtractor {
|
||||
|
||||
//Convert output to hook into the existing logic for creating derived files
|
||||
List<ExtractedFile> extractedFiles = new ArrayList<>();
|
||||
extractedAttachments.entrySet().forEach((pathEntry) -> {
|
||||
for (Entry<String, PDFAttachmentExtractor.NewResourceData> pathEntry : extractedAttachments.entrySet()) {
|
||||
if (checkForIngestCancellation(abstractFile)) {
|
||||
return null; //null will cause the calling method to return.
|
||||
}
|
||||
String fileName = pathEntry.getKey();
|
||||
Path writeLocation = pathEntry.getValue().getPath();
|
||||
int fileSize = pathEntry.getValue().getLength();
|
||||
extractedFiles.add(new ExtractedFile(fileName,
|
||||
getFileRelativePath(writeLocation.getFileName().toString()),
|
||||
fileSize));
|
||||
});
|
||||
|
||||
}
|
||||
return extractedFiles;
|
||||
} catch (IOException | SAXException | TikaException | InvalidPathException ex) {
|
||||
LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs for file Name: " + abstractFile.getName() + " ID: " + abstractFile.getId(), ex); //NON-NLS
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2015-2020 Basis Technology Corp.
|
||||
* Copyright 2015-2021 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -143,7 +143,7 @@ class SevenZipExtractor {
|
||||
}
|
||||
|
||||
/**
|
||||
* Contructs an embedded file extractor that uses 7Zip via Java bindings to
|
||||
* Constructs an embedded file extractor that uses 7Zip via Java bindings to
|
||||
* extract the contents of an archive file to a directory named for the
|
||||
* archive file.
|
||||
*
|
||||
@ -184,6 +184,9 @@ class SevenZipExtractor {
|
||||
boolean isSevenZipExtractionSupported(AbstractFile file) {
|
||||
String fileMimeType = fileTypeDetector.getMIMEType(file);
|
||||
for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) {
|
||||
if (checkForIngestCancellation(file)) {
|
||||
break;
|
||||
}
|
||||
if (mimeType.toString().equals(fileMimeType)) {
|
||||
return true;
|
||||
}
|
||||
@ -191,6 +194,24 @@ class SevenZipExtractor {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Private helper method to standardize the cancellation check that is
|
||||
* performed when running ingest. Will return false if the SevenZipExtractor
|
||||
* is being used without an IngestJobContext.
|
||||
*
|
||||
* @param file The file being extracted, this is only used for logging
|
||||
* purposes.
|
||||
*
|
||||
* @return True if ingest has been cancelled, false otherwise.
|
||||
*/
|
||||
private boolean checkForIngestCancellation(AbstractFile file) {
|
||||
if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) {
|
||||
logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()});
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the item inside archive is a potential zipbomb
|
||||
*
|
||||
@ -567,7 +588,9 @@ class SevenZipExtractor {
|
||||
unpackSuccessful = false;
|
||||
return unpackSuccessful;
|
||||
}
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
List<AbstractFile> existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath);
|
||||
for (AbstractFile file : existingFiles) {
|
||||
@ -578,7 +601,9 @@ class SevenZipExtractor {
|
||||
unpackSuccessful = false;
|
||||
return unpackSuccessful;
|
||||
}
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
parentAr = depthMap.get(archiveFile.getId());
|
||||
if (parentAr == null) {
|
||||
parentAr = new Archive(0, archiveFile.getId(), archiveFile);
|
||||
@ -598,6 +623,9 @@ class SevenZipExtractor {
|
||||
return unpackSuccessful;
|
||||
}
|
||||
}
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
IInArchive inArchive = null;
|
||||
try {
|
||||
stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile));
|
||||
@ -605,6 +633,9 @@ class SevenZipExtractor {
|
||||
// it will be opened incorrectly when using 7zip's built-in auto-detect functionality.
|
||||
// All other archive formats are still opened using 7zip built-in auto-detect functionality.
|
||||
ArchiveFormat options = get7ZipOptions(archiveFile);
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
if (password == null) {
|
||||
inArchive = SevenZip.openInArchive(options, stream);
|
||||
} else {
|
||||
@ -613,7 +644,9 @@ class SevenZipExtractor {
|
||||
numItems = inArchive.getNumberOfItems();
|
||||
progress.start(numItems);
|
||||
progressStarted = true;
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
//setup the archive local root folder
|
||||
final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile));
|
||||
if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) {
|
||||
@ -634,6 +667,9 @@ class SevenZipExtractor {
|
||||
|
||||
Map<Integer, InArchiveItemDetails> archiveDetailsMap = new HashMap<>();
|
||||
for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) {
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0);
|
||||
if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) {
|
||||
unpackSuccessful = false;
|
||||
@ -643,7 +679,9 @@ class SevenZipExtractor {
|
||||
String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile);
|
||||
byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile);
|
||||
UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive);
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED);
|
||||
|
||||
if (isEncrypted && password == null) {
|
||||
@ -681,6 +719,9 @@ class SevenZipExtractor {
|
||||
freeDiskSpace = newDiskSpace;
|
||||
}
|
||||
}
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex + "_" + new File(pathInArchive).getName());
|
||||
final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName;
|
||||
final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName;
|
||||
@ -699,7 +740,9 @@ class SevenZipExtractor {
|
||||
localFileExists = false;
|
||||
logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS
|
||||
}
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
// skip the rest of this loop if we couldn't create the file
|
||||
//continue will skip details from being added to the map
|
||||
if (!localFileExists) {
|
||||
@ -716,7 +759,9 @@ class SevenZipExtractor {
|
||||
}
|
||||
|
||||
int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap);
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
StandardIArchiveExtractCallback archiveCallBack
|
||||
= new StandardIArchiveExtractCallback(
|
||||
inArchive, archiveFile, progress,
|
||||
@ -726,7 +771,9 @@ class SevenZipExtractor {
|
||||
//for efficiency. Hence, the HashMap and linear processing of
|
||||
//inArchiveItemIndex. False indicates non-test mode
|
||||
inArchive.extract(extractionIndices, false, archiveCallBack);
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
unpackSuccessful &= archiveCallBack.wasSuccessful();
|
||||
|
||||
archiveDetailsMap = null;
|
||||
@ -735,9 +782,15 @@ class SevenZipExtractor {
|
||||
// intermediate nodes since the order is not guaranteed
|
||||
try {
|
||||
unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath);
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
unpackedFiles = unpackedTree.getAllFileObjects();
|
||||
//check if children are archives, update archive depth tracking
|
||||
for (int i = 0; i < unpackedFiles.size(); i++) {
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
progress.progress(String.format("%s: Searching for nested archives (%d of %d)", currentArchiveName, i + 1, unpackedFiles.size()));
|
||||
AbstractFile unpackedFile = unpackedFiles.get(i);
|
||||
if (unpackedFile == null) {
|
||||
@ -792,7 +845,9 @@ class SevenZipExtractor {
|
||||
progress.finish();
|
||||
}
|
||||
}
|
||||
|
||||
if (checkForIngestCancellation(archiveFile)) {
|
||||
return false;
|
||||
}
|
||||
//create artifact and send user message
|
||||
if (hasEncrypted) {
|
||||
String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
|
||||
|
Loading…
x
Reference in New Issue
Block a user