From 9ebc98794e4c96324dc25b870f8991f039d64e48 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Wed, 10 Feb 2021 13:59:27 -0500 Subject: [PATCH 1/4] 6737 add ingest cancellation checks to seven zip extractor --- .../SevenZipExtractor.java | 72 ++++++++++++++++--- 1 file changed, 64 insertions(+), 8 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java index 68a802e175..c315c0d4eb 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java @@ -184,6 +184,9 @@ class SevenZipExtractor { boolean isSevenZipExtractionSupported(AbstractFile file) { String fileMimeType = fileTypeDetector.getMIMEType(file); for (SupportedArchiveExtractionFormats mimeType : SupportedArchiveExtractionFormats.values()) { + if (checkForIngestCancellation(file)) { + break; + } if (mimeType.toString().equals(fileMimeType)) { return true; } @@ -191,6 +194,25 @@ class SevenZipExtractor { return false; } + /** + * Private helper method to standardize the cancellation check that is + * performed when running ingest. Will return false if the SevenZipExtractor + * is being used without an IngestJobContext. + * + * @param file The file being extracted, this is only used for logging + * purposes. + * + * @return True if ingest has been cancelled, false otherwise. FFFF + */ + private boolean checkForIngestCancellation(AbstractFile file) { + if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) { + fileTaskExecutor.shutDown(); + logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()}); + return true; + } + return false; + } + /** * Check if the item inside archive is a potential zipbomb * @@ -567,7 +589,9 @@ class SevenZipExtractor { unpackSuccessful = false; return unpackSuccessful; } - + if (checkForIngestCancellation(archiveFile)) { + return false; + } try { List existingFiles = getAlreadyExtractedFiles(archiveFile, archiveFilePath); for (AbstractFile file : existingFiles) { @@ -578,7 +602,9 @@ class SevenZipExtractor { unpackSuccessful = false; return unpackSuccessful; } - + if (checkForIngestCancellation(archiveFile)) { + return false; + } parentAr = depthMap.get(archiveFile.getId()); if (parentAr == null) { parentAr = new Archive(0, archiveFile.getId(), archiveFile); @@ -598,6 +624,9 @@ class SevenZipExtractor { return unpackSuccessful; } } + if (checkForIngestCancellation(archiveFile)) { + return false; + } IInArchive inArchive = null; try { stream = new SevenZipContentReadStream(new ReadContentInputStream(archiveFile)); @@ -605,6 +634,9 @@ class SevenZipExtractor { // it will be opened incorrectly when using 7zip's built-in auto-detect functionality. // All other archive formats are still opened using 7zip built-in auto-detect functionality. ArchiveFormat options = get7ZipOptions(archiveFile); + if (checkForIngestCancellation(archiveFile)) { + return false; + } if (password == null) { inArchive = SevenZip.openInArchive(options, stream); } else { @@ -613,7 +645,9 @@ class SevenZipExtractor { numItems = inArchive.getNumberOfItems(); progress.start(numItems); progressStarted = true; - + if (checkForIngestCancellation(archiveFile)) { + return false; + } //setup the archive local root folder final String uniqueArchiveFileName = FileUtil.escapeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile)); if (!makeExtractedFilesDirectory(uniqueArchiveFileName)) { @@ -634,6 +668,9 @@ class SevenZipExtractor { Map archiveDetailsMap = new HashMap<>(); for (int inArchiveItemIndex = 0; inArchiveItemIndex < numItems; inArchiveItemIndex++) { + if (checkForIngestCancellation(archiveFile)) { + return false; + } progress.progress(String.format("%s: Analyzing archive metadata and creating local files (%d of %d)", currentArchiveName, inArchiveItemIndex + 1, numItems), 0); if (isZipBombArchiveItemCheck(archiveFile, inArchive, inArchiveItemIndex, depthMap, escapedArchiveFilePath)) { unpackSuccessful = false; @@ -643,7 +680,9 @@ class SevenZipExtractor { String pathInArchive = getPathInArchive(inArchive, inArchiveItemIndex, archiveFile); byte[] pathBytesInArchive = getPathBytesInArchive(inArchive, inArchiveItemIndex, archiveFile); UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive, pathBytesInArchive); - + if (checkForIngestCancellation(archiveFile)) { + return false; + } final boolean isEncrypted = (Boolean) inArchive.getProperty(inArchiveItemIndex, PropID.ENCRYPTED); if (isEncrypted && password == null) { @@ -681,6 +720,9 @@ class SevenZipExtractor { freeDiskSpace = newDiskSpace; } } + if (checkForIngestCancellation(archiveFile)) { + return false; + } final String uniqueExtractedName = FileUtil.escapeFileName(uniqueArchiveFileName + File.separator + (inArchiveItemIndex / 1000) + File.separator + inArchiveItemIndex + "_" + new File(pathInArchive).getName()); final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName; final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName; @@ -699,7 +741,9 @@ class SevenZipExtractor { localFileExists = false; logger.log(Level.SEVERE, String.format("Error fiding or creating %s", localFile.getAbsolutePath()), ex); //NON-NLS } - + if (checkForIngestCancellation(archiveFile)) { + return false; + } // skip the rest of this loop if we couldn't create the file //continue will skip details from being added to the map if (!localFileExists) { @@ -716,7 +760,9 @@ class SevenZipExtractor { } int[] extractionIndices = getExtractableFilesFromDetailsMap(archiveDetailsMap); - + if (checkForIngestCancellation(archiveFile)) { + return false; + } StandardIArchiveExtractCallback archiveCallBack = new StandardIArchiveExtractCallback( inArchive, archiveFile, progress, @@ -726,7 +772,9 @@ class SevenZipExtractor { //for efficiency. Hence, the HashMap and linear processing of //inArchiveItemIndex. False indicates non-test mode inArchive.extract(extractionIndices, false, archiveCallBack); - + if (checkForIngestCancellation(archiveFile)) { + return false; + } unpackSuccessful &= archiveCallBack.wasSuccessful(); archiveDetailsMap = null; @@ -735,9 +783,15 @@ class SevenZipExtractor { // intermediate nodes since the order is not guaranteed try { unpackedTree.updateOrAddFileToCaseRec(statusMap, archiveFilePath); + if (checkForIngestCancellation(archiveFile)) { + return false; + } unpackedFiles = unpackedTree.getAllFileObjects(); //check if children are archives, update archive depth tracking for (int i = 0; i < unpackedFiles.size(); i++) { + if (checkForIngestCancellation(archiveFile)) { + return false; + } progress.progress(String.format("%s: Searching for nested archives (%d of %d)", currentArchiveName, i + 1, unpackedFiles.size())); AbstractFile unpackedFile = unpackedFiles.get(i); if (unpackedFile == null) { @@ -792,7 +846,9 @@ class SevenZipExtractor { progress.finish(); } } - + if (checkForIngestCancellation(archiveFile)) { + return false; + } //create artifact and send user message if (hasEncrypted) { String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL; From f02c000414b7a0b5369e76f82fc04699630fdd13 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Wed, 10 Feb 2021 14:19:37 -0500 Subject: [PATCH 2/4] 6737 add ingest cancellation checks to DocumentEmbeddedContentExtractor --- .../DocumentEmbeddedContentExtractor.java | 56 ++++++++++++++++--- .../SevenZipExtractor.java | 4 +- 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java index 36df7b871b..89a687c994 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2015 Basis Technology Corp. + * Copyright 2015-2021 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -32,6 +32,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.logging.Level; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; @@ -146,6 +147,9 @@ class DocumentEmbeddedContentExtractor { boolean isContentExtractionSupported(AbstractFile abstractFile) { String abstractFileMimeType = fileTypeDetector.getMIMEType(abstractFile); for (SupportedExtractionFormats s : SupportedExtractionFormats.values()) { + if (checkForIngestCancellation(abstractFile)) { + break; + } if (s.toString().equals(abstractFileMimeType)) { abstractFileExtractionFormat = s; return true; @@ -154,6 +158,26 @@ class DocumentEmbeddedContentExtractor { return false; } + /** + * Private helper method to standardize the cancellation check that is + * performed when running ingest. Will return false if the + * DocumentEmbeddedContentExtractor is being used without an + * IngestJobContext. + * + * @param file The file being extracted, this is only used for logging + * purposes. + * + * @return True if ingest has been cancelled, false otherwise. FFFF + */ + private boolean checkForIngestCancellation(AbstractFile file) { + if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) { + fileTaskExecutor.shutDown(); + LOGGER.log(Level.INFO, "Ingest was cancelled. Results extracted from the following document file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()}); + return true; + } + return false; + } + /** * This method selects the appropriate process of extracting embedded * content from files using either Tika or POI classes. Once the content has @@ -189,7 +213,9 @@ class DocumentEmbeddedContentExtractor { LOGGER.log(Level.SEVERE, String.format("Error checking if %s (objID = %d) has already has been processed, skipping", abstractFile.getName(), abstractFile.getId()), e); //NON-NLS return; } - + if (checkForIngestCancellation(abstractFile)) { + return; + } // Call the appropriate extraction method based on mime type switch (abstractFileExtractionFormat) { case DOCX: @@ -219,6 +245,9 @@ class DocumentEmbeddedContentExtractor { // the common task of adding abstractFile to derivedfiles is performed. listOfExtractedImageAbstractFiles = new ArrayList<>(); for (ExtractedFile extractedImage : listOfExtractedImages) { + if (checkForIngestCancellation(abstractFile)) { + return; + } try { listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(), extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(), @@ -258,11 +287,12 @@ class DocumentEmbeddedContentExtractor { officeParserConfig.setUseSAXPptxExtractor(true); officeParserConfig.setUseSAXDocxExtractor(true); parseContext.set(OfficeParserConfig.class, officeParserConfig); - EmbeddedDocumentExtractor extractor = new EmbeddedContentExtractor(parseContext); parseContext.set(EmbeddedDocumentExtractor.class, extractor); ReadContentInputStream stream = new ReadContentInputStream(abstractFile); - + if (checkForIngestCancellation(abstractFile)) { + return null; //null will cause the calling method to return. + } try { parser.parse(stream, contentHandler, metadata, parseContext); } catch (IOException | SAXException | TikaException ex) { @@ -322,6 +352,9 @@ class DocumentEmbeddedContentExtractor { byte[] data = null; int pictureNumber = 0; //added to ensure uniqueness in cases where suggestFullFileName returns duplicates for (Picture picture : listOfAllPictures) { + if (checkForIngestCancellation(af)) { + return null; //null will cause the calling method to return. + } String fileName = UNKNOWN_IMAGE_NAME_PREFIX + pictureNumber + "." + picture.suggestFileExtension(); try { data = picture.getContent(); @@ -385,7 +418,9 @@ class DocumentEmbeddedContentExtractor { List listOfExtractedImages = new ArrayList<>(); byte[] data = null; for (HSLFPictureData pictureData : listOfAllPictures) { - + if (checkForIngestCancellation(af)) { + return null; //null will cause the calling method to return. + } // Get image extension, generate image name, write image to the module // output folder, add it to the listOfExtractedImageAbstractFiles PictureType type = pictureData.getType(); @@ -475,6 +510,9 @@ class DocumentEmbeddedContentExtractor { List listOfExtractedImages = new ArrayList<>(); byte[] data = null; for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) { + if (checkForIngestCancellation(af)) { + return null; //null will cause the calling method to return. + } String imageName = UNKNOWN_IMAGE_NAME_PREFIX + i + "." + pictureData.suggestFileExtension(); //NON-NLS try { data = pictureData.getData(); @@ -510,15 +548,17 @@ class DocumentEmbeddedContentExtractor { //Convert output to hook into the existing logic for creating derived files List extractedFiles = new ArrayList<>(); - extractedAttachments.entrySet().forEach((pathEntry) -> { + for (Entry pathEntry : extractedAttachments.entrySet()) { + if (checkForIngestCancellation(abstractFile)) { + return null; //null will cause the calling method to return. + } String fileName = pathEntry.getKey(); Path writeLocation = pathEntry.getValue().getPath(); int fileSize = pathEntry.getValue().getLength(); extractedFiles.add(new ExtractedFile(fileName, getFileRelativePath(writeLocation.getFileName().toString()), fileSize)); - }); - + } return extractedFiles; } catch (IOException | SAXException | TikaException | InvalidPathException ex) { LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs for file Name: " + abstractFile.getName() + " ID: " + abstractFile.getId(), ex); //NON-NLS diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java index c315c0d4eb..1b04a6a2e6 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2015-2020 Basis Technology Corp. + * Copyright 2015-2021 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -207,7 +207,7 @@ class SevenZipExtractor { private boolean checkForIngestCancellation(AbstractFile file) { if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) { fileTaskExecutor.shutDown(); - logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()}); + logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()}); return true; } return false; From 098d98ca042891d1c444dc8251cc8eecf947420d Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Wed, 10 Feb 2021 14:31:54 -0500 Subject: [PATCH 3/4] 6737 remove extra calls to executor shutdown --- .../DocumentEmbeddedContentExtractor.java | 1 - .../modules/embeddedfileextractor/SevenZipExtractor.java | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java index 89a687c994..e9276a81b7 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/DocumentEmbeddedContentExtractor.java @@ -171,7 +171,6 @@ class DocumentEmbeddedContentExtractor { */ private boolean checkForIngestCancellation(AbstractFile file) { if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) { - fileTaskExecutor.shutDown(); LOGGER.log(Level.INFO, "Ingest was cancelled. Results extracted from the following document file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()}); return true; } diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java index 1b04a6a2e6..c888d83299 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java @@ -202,11 +202,10 @@ class SevenZipExtractor { * @param file The file being extracted, this is only used for logging * purposes. * - * @return True if ingest has been cancelled, false otherwise. FFFF + * @return True if ingest has been cancelled, false otherwise. */ private boolean checkForIngestCancellation(AbstractFile file) { if (fileTaskExecutor != null && context != null && context.fileIngestIsCancelled()) { - fileTaskExecutor.shutDown(); logger.log(Level.INFO, "Ingest was cancelled. Results extracted from the following archive file may be incomplete. Name: {0}Object ID: {1}", new Object[]{file.getName(), file.getId()}); return true; } From a482b20acb5cbfb79818bfa3f8a7812223e0c9a9 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Wed, 10 Feb 2021 14:33:31 -0500 Subject: [PATCH 4/4] 6737 fix typo --- .../modules/embeddedfileextractor/SevenZipExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java index c888d83299..fdf4fea231 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/SevenZipExtractor.java @@ -143,7 +143,7 @@ class SevenZipExtractor { } /** - * Contructs an embedded file extractor that uses 7Zip via Java bindings to + * Constructs an embedded file extractor that uses 7Zip via Java bindings to * extract the contents of an archive file to a directory named for the * archive file. *