mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-14 17:06:16 +00:00
Embedded file extraction module implemented
This commit is contained in:
parent
c6b2e936dd
commit
07b76f4910
@ -39,7 +39,7 @@ import org.sleuthkit.autopsy.modules.filetypeid.FileTypeIdModuleFactory;
|
||||
import org.sleuthkit.autopsy.modules.hashdatabase.HashLookupModuleFactory;
|
||||
import org.sleuthkit.autopsy.modules.interestingitems.InterestingItemsIngestModuleFactory;
|
||||
import org.sleuthkit.autopsy.modules.photoreccarver.PhotoRecCarverIngestModuleFactory;
|
||||
import org.sleuthkit.autopsy.modules.sevenzip.ArchiveFileExtractorModuleFactory;
|
||||
import org.sleuthkit.autopsy.modules.embeddedfileextractor.EmbeddedFileExtractorModuleFactory;
|
||||
import org.sleuthkit.autopsy.python.JythonModuleLoader;
|
||||
|
||||
/**
|
||||
@ -57,7 +57,7 @@ final class IngestModuleFactoryLoader {
|
||||
add("org.sleuthkit.autopsy.recentactivity.RecentActivityExtracterModuleFactory"); //NON-NLS
|
||||
add(HashLookupModuleFactory.class.getCanonicalName());
|
||||
add(FileTypeIdModuleFactory.class.getCanonicalName());
|
||||
add(ArchiveFileExtractorModuleFactory.class.getCanonicalName());
|
||||
add(EmbeddedFileExtractorModuleFactory.class.getCanonicalName());
|
||||
add(ExifParserModuleFactory.class.getCanonicalName());
|
||||
add("org.sleuthkit.autopsy.keywordsearch.KeywordSearchModuleFactory"); //NON-NLS
|
||||
add("org.sleuthkit.autopsy.thunderbirdparser.EmailParserModuleFactory"); //NON-NLS
|
||||
|
@ -0,0 +1,39 @@
|
||||
OpenIDE-Module-Display-Category=Ingest Module
|
||||
OpenIDE-Module-Long-Description=\
|
||||
Embedded File Extraction Ingest Module\n\nThe Embedded File Extraction Ingest Module processes document files (such as doc, docx, ppt, pptx, xls, xlsx) and archive files (such as zip and others archive types supported by the 7zip extractor).\n\
|
||||
Contents of these files are extracted and the derived files are added back to the current ingest to be processed by the configured ingest modules.\n\
|
||||
If the derived file happens to be an archive file, it will be re-processed by the 7zip extractor - the extractor will process archive files N-levels deep.\n\n\
|
||||
The extracted files are navigable in the directory tree.\n\n\
|
||||
The module is supported on Windows, Linux and Mac operating systems.
|
||||
OpenIDE-Module-Name=Embedded File Extraction
|
||||
OpenIDE-Module-Short-Description=Embedded File Extraction Ingest Module
|
||||
EmbeddedFileExtractorIngestModule.SevenZipContentReadStream.seek.exception.invalidOrigin=Invalid seek origin\: {0}
|
||||
EmbeddedFileExtractorIngestModule.SevenZipContentReadStream.read.exception.errReadStream=Error reading content stream.
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.moduleName=Embedded File Extractor
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.moduleDesc.text=Extracts embedded files (doc, docx, ppt, pptx, xls, xlsx, zip, rar, arj, 7z, gzip, bzip2, tar), reschedules them to current ingest and populates directory tree with new files.
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.encryptionFileLevel=File-level Encryption
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.encryptionFull=Full Encryption
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.init.errInitModule.msg=Error initializing {0}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.init.errInitModule.details=Error initializing output dir\: {0}\: {1}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.init.errCantInitLib=Could not initialize 7-ZIP library\: {0}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnMsg=Possible ZIP bomb detected in archive\: {0}, item\: {1}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnDetails=The archive item compression ratio is {0}, skipping processing of this archive item.
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnMsg.zipBomb=Possible ZIP bomb detected\: {0}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnDetails.zipBomb=The archive is {0} levels deep, skipping processing of this archive and its contents
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.unknownPath.msg=Unknown item path in archive\: {0}, will use\: {1}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.msg=Not enough disk space to unpack archive item\: {0}, {1}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.details=The archive item is too large to unpack, skipping unpacking this item.
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.msg=Error unpacking {0}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.details=Error unpacking {0}. {1}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.msg=Encrypted files in archive detected.
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.details=Some files in archive\: {0} are encrypted. {1} extractor was unable to extract all files from this archive.
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackStream.write.exception.msg=Error writing unpacked file to\: {0}
|
||||
EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackedTree.exception.msg=Error adding a derived file to db\:{0}
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err=Doc container could not be initialized while reading
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err=Docx container could not be initialized while reading: {0}
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err=Ppt container could not be initialized while reading: {0}
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err=Pptx container could not be initialized while reading: {0}
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err=Xls container could not be initialized while reading: {0}
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err=Xlsx container could not be initialized while reading: {0}
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg=Unable to add the derived files to the database.
|
||||
EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg=Could not get path for image extraction from Abstract File: {0}
|
@ -0,0 +1,181 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2013-2014 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.modules.embeddedfileextractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.ingest.IngestServices;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
import org.sleuthkit.autopsy.ingest.FileIngestModule;
|
||||
import org.sleuthkit.autopsy.ingest.ModuleContentEvent;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||
import org.sleuthkit.datamodel.TskCoreException;
|
||||
import org.sleuthkit.datamodel.TskData;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModule.ProcessResult;
|
||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||
import org.sleuthkit.autopsy.ingest.IngestMessage;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter;
|
||||
import org.sleuthkit.autopsy.modules.embeddedfileextractor.ImageExtractor.SupportedImageExtractionFormats;
|
||||
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
|
||||
|
||||
/**
|
||||
* Embedded File Extractor ingest module extracts embedded files from supported
|
||||
* archives and documents, adds extracted embedded DerivedFiles, reschedules
|
||||
* extracted DerivedFiles for ingest.
|
||||
*/
|
||||
public final class EmbeddedFileExtractorIngestModule implements FileIngestModule {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(EmbeddedFileExtractorIngestModule.class.getName());
|
||||
private final IngestServices services = IngestServices.getInstance();
|
||||
static final String[] SUPPORTED_EXTENSIONS = {"zip", "rar", "arj", "7z", "7zip", "gzip", "gz", "bzip2", "tar", "tgz",}; // "iso"}; NON-NLS
|
||||
|
||||
//buffer for checking file headers and signatures
|
||||
private static final int readHeaderSize = 4;
|
||||
private static final byte[] fileHeaderBuffer = new byte[readHeaderSize];
|
||||
private static final int ZIP_SIGNATURE_BE = 0x504B0304;
|
||||
private IngestJobContext context;
|
||||
private long jobId;
|
||||
private final static IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();
|
||||
|
||||
private static final Case currentCase = Case.getCurrentCase();
|
||||
protected static final String moduleDirRelative = Case.getModulesOutputDirRelPath() + File.separator + EmbeddedFileExtractorModuleFactory.getModuleName(); //relative to the case, to store in db
|
||||
protected static final String moduleDirAbsolute = currentCase.getModulesOutputDirAbsPath() + File.separator + EmbeddedFileExtractorModuleFactory.getModuleName(); //absolute, to extract to
|
||||
|
||||
private boolean archivextraction;
|
||||
private boolean imageExtraction;
|
||||
private ImageExtractor imageExtractor;
|
||||
private SevenZipExtractor archiveExtractor;
|
||||
SupportedImageExtractionFormats abstractFileExtractionFormat;
|
||||
FileTypeDetector fileTypeDetector;
|
||||
|
||||
EmbeddedFileExtractorIngestModule() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startUp(IngestJobContext context) throws IngestModuleException {
|
||||
this.context = context;
|
||||
jobId = context.getJobId();
|
||||
|
||||
// initialize the folder where the embedded files are extracted.
|
||||
File extractionDirectory = new File(EmbeddedFileExtractorIngestModule.moduleDirAbsolute);
|
||||
if (!extractionDirectory.exists()) {
|
||||
try {
|
||||
extractionDirectory.mkdirs();
|
||||
} catch (SecurityException ex) {
|
||||
logger.log(Level.SEVERE, "Error initializing output dir: " + EmbeddedFileExtractorIngestModule.moduleDirAbsolute, ex); //NON-NLS
|
||||
services.postMessage(IngestMessage.createErrorMessage(EmbeddedFileExtractorModuleFactory.getModuleName(), "Error initializing", "Error initializing output dir: " + EmbeddedFileExtractorIngestModule.moduleDirAbsolute)); //NON-NLS
|
||||
throw new IngestModuleException(ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
// initialize the filetypedetector
|
||||
try {
|
||||
fileTypeDetector = new FileTypeDetector();
|
||||
} catch (FileTypeDetector.FileTypeDetectorInitException ex) {
|
||||
throw new IngestModuleException(ex.getMessage());
|
||||
}
|
||||
|
||||
// initialize the extraction modules.
|
||||
this.archiveExtractor = new SevenZipExtractor(context, fileTypeDetector);
|
||||
this.imageExtractor = new ImageExtractor(context, fileTypeDetector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProcessResult process(AbstractFile abstractFile) {
|
||||
// skip the unallocated blocks
|
||||
if (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
// skip unknown files
|
||||
if (abstractFile.getKnown().equals(TskData.FileKnown.KNOWN)) {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
// check if the file is supported by either of the two embedded file extractors.
|
||||
this.archivextraction = archiveExtractor.isSevenZipExtractionSupported(abstractFile);
|
||||
this.imageExtraction = imageExtractor.isImageExtractionSupported(abstractFile);
|
||||
|
||||
if (!abstractFile.isFile() && (!this.archivextraction || !this.imageExtraction)) {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
//check if already has derived files, skip
|
||||
try {
|
||||
if (abstractFile.hasChildren()) {
|
||||
//check if local unpacked dir exists
|
||||
final String uniqueFileName = getUniqueName(abstractFile);
|
||||
final String localRootAbsPath = getLocalRootAbsPath(uniqueFileName);
|
||||
if (new File(localRootAbsPath).exists()) {
|
||||
logger.log(Level.INFO, "File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName()); //NON-NLS
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
}
|
||||
} catch (TskCoreException e) {
|
||||
logger.log(Level.INFO, "Error checking if file already has been processed, skipping: {0}", abstractFile.getName()); //NON-NLS
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
logger.log(Level.INFO, "Processing with embedded file extractor: {0}", abstractFile.getName()); //NON-NLS
|
||||
|
||||
// call the archive extractor if archiveextraction flag is set.
|
||||
if (this.archivextraction) {
|
||||
archiveExtractor.unpack(abstractFile);
|
||||
}
|
||||
|
||||
// calling the image extractor if imageExtraction flag set.
|
||||
if (this.imageExtraction) {
|
||||
imageExtractor.extractImage(abstractFile);
|
||||
}
|
||||
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutDown() {
|
||||
// We don't need the value, but for cleanliness and consistency
|
||||
refCounter.decrementAndGet(jobId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get local relative path to the unpacked archive root
|
||||
*
|
||||
* @param archiveFile
|
||||
* @return
|
||||
*/
|
||||
protected static String getUniqueName(AbstractFile archiveFile) {
|
||||
return archiveFile.getName() + "_" + archiveFile.getId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get local abs path to the unpacked archive root
|
||||
*
|
||||
* @param localRootRelPath relative path to archive, from getUniqueName()
|
||||
* @return
|
||||
*/
|
||||
protected static String getLocalRootAbsPath(String localRootRelPath) {
|
||||
return moduleDirAbsolute + File.separator + localRootRelPath;
|
||||
}
|
||||
}
|
@ -16,7 +16,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.modules.sevenzip;
|
||||
package org.sleuthkit.autopsy.modules.embeddedfileextractor;
|
||||
|
||||
import org.openide.util.NbBundle;
|
||||
import org.openide.util.lookup.ServiceProvider;
|
||||
@ -31,10 +31,10 @@ import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings;
|
||||
* interface panels used to configure the settings for instances of the modules.
|
||||
*/
|
||||
@ServiceProvider(service = IngestModuleFactory.class)
|
||||
public class ArchiveFileExtractorModuleFactory extends IngestModuleFactoryAdapter {
|
||||
public class EmbeddedFileExtractorModuleFactory extends IngestModuleFactoryAdapter {
|
||||
|
||||
static String getModuleName() {
|
||||
return NbBundle.getMessage(SevenZipIngestModule.class, "SevenZipIngestModule.moduleName");
|
||||
return NbBundle.getMessage(EmbeddedFileExtractorIngestModule.class, "EmbeddedFileExtractorIngestModule.ArchiveExtractor.moduleName");
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -44,8 +44,8 @@ public class ArchiveFileExtractorModuleFactory extends IngestModuleFactoryAdapte
|
||||
|
||||
@Override
|
||||
public String getModuleDescription() {
|
||||
return NbBundle.getMessage(SevenZipIngestModule.class,
|
||||
"SevenZipIngestModule.moduleDesc.text");
|
||||
return NbBundle.getMessage(EmbeddedFileExtractorIngestModule.class,
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.moduleDesc.text");
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -60,6 +60,6 @@ public class ArchiveFileExtractorModuleFactory extends IngestModuleFactoryAdapte
|
||||
|
||||
@Override
|
||||
public FileIngestModule createFileIngestModule(IngestModuleIngestJobSettings ingestOptions) {
|
||||
return new SevenZipIngestModule();
|
||||
return new EmbeddedFileExtractorIngestModule();
|
||||
}
|
||||
}
|
575
Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/ImageExtractor.java
Executable file
575
Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/ImageExtractor.java
Executable file
@ -0,0 +1,575 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2011-2015 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.modules.embeddedfileextractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
import org.apache.poi.hslf.model.Picture;
|
||||
import org.apache.poi.hslf.usermodel.PictureData;
|
||||
import org.apache.poi.hslf.usermodel.SlideShow;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.model.PicturesTable;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFPictureData;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFPictureData;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.casemodule.services.FileManager;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||
import org.sleuthkit.autopsy.ingest.IngestServices;
|
||||
import org.sleuthkit.autopsy.ingest.ModuleContentEvent;
|
||||
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
import org.sleuthkit.datamodel.TskCoreException;
|
||||
|
||||
class ImageExtractor {
|
||||
|
||||
private final FileManager fileManager;
|
||||
private final IngestServices services;
|
||||
private static final Logger logger = Logger.getLogger(ImageExtractor.class.getName());
|
||||
private final IngestJobContext context;
|
||||
private String parentFileName;
|
||||
private final String UNKNOWN_NAME_PREFIX = "image_";
|
||||
private final FileTypeDetector fileTypeDetector;
|
||||
/**
|
||||
* Enum of mimetypes which support image extraction
|
||||
*/
|
||||
enum SupportedImageExtractionFormats {
|
||||
|
||||
DOC("application/msword"),
|
||||
DOCX("application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
|
||||
PPT("application/vnd.ms-powerpoint"),
|
||||
PPTX("application/vnd.openxmlformats-officedocument.presentationml.presentation"),
|
||||
XLS("application/vnd.ms-excel"),
|
||||
XLSX("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
|
||||
|
||||
private final String mimeType;
|
||||
|
||||
SupportedImageExtractionFormats(final String mimeType) {
|
||||
this.mimeType = mimeType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.mimeType;
|
||||
}
|
||||
// TODO Expand to support more formats
|
||||
}
|
||||
private SupportedImageExtractionFormats abstractFileExtractionFormat;
|
||||
|
||||
ImageExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector) {
|
||||
|
||||
this.fileManager = Case.getCurrentCase().getServices().getFileManager();
|
||||
this.services = IngestServices.getInstance();
|
||||
this.context = context;
|
||||
this.fileTypeDetector = fileTypeDetector;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns true if the file format is currently supported. Else
|
||||
* it returns false. Performs only Apache Tika based detection.
|
||||
*
|
||||
* @param abstractFile The AbstractFilw whose mimetype is to be determined.
|
||||
* @return This method returns true if the file format is currently
|
||||
* supported. Else it returns false.
|
||||
*/
|
||||
boolean isImageExtractionSupported(AbstractFile abstractFile) {
|
||||
try {
|
||||
String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
|
||||
for (SupportedImageExtractionFormats s : SupportedImageExtractionFormats.values()) {
|
||||
if (s.toString().equals(abstractFileMimeType)) {
|
||||
abstractFileExtractionFormat = s;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.WARNING, "Error executing FileTypeDetector.getFileType()", ex); // NON-NLS
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method selects the appropriate process of extracting images from
|
||||
* files using POI classes. Once the images have been extracted, the method
|
||||
* adds them to the DB and fires a ModuleContentEvent. ModuleContent Event
|
||||
* is not fired if the no images were extracted from the processed file.
|
||||
*
|
||||
* @param format
|
||||
* @param abstractFile The abstract file to be processed.
|
||||
*/
|
||||
protected void extractImage(AbstractFile abstractFile) {
|
||||
//
|
||||
// switchcase for different supported formats
|
||||
// process abstractFile according to the format by calling appropriate methods.
|
||||
|
||||
List<ExtractedImage> listOfExtractedImages = null;
|
||||
List<AbstractFile> listOfExtractedImageAbstractFiles = null;
|
||||
this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile);
|
||||
|
||||
switch (abstractFileExtractionFormat) {
|
||||
case DOC:
|
||||
listOfExtractedImages = extractImagesFromDoc(abstractFile);
|
||||
break;
|
||||
case DOCX:
|
||||
listOfExtractedImages = extractImagesFromDocx(abstractFile);
|
||||
break;
|
||||
case PPT:
|
||||
listOfExtractedImages = extractImagesFromPpt(abstractFile);
|
||||
break;
|
||||
case PPTX:
|
||||
listOfExtractedImages = extractImagesFromPptx(abstractFile);
|
||||
break;
|
||||
case XLS:
|
||||
listOfExtractedImages = extractImagesFromXls(abstractFile);
|
||||
break;
|
||||
case XLSX:
|
||||
listOfExtractedImages = extractImagesFromXlsx(abstractFile);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (listOfExtractedImages == null) {
|
||||
return;
|
||||
}
|
||||
// the common task of adding abstractFile to derivedfiles is performed.
|
||||
listOfExtractedImageAbstractFiles = new ArrayList<>();
|
||||
for (ExtractedImage extractedImage : listOfExtractedImages) {
|
||||
try {
|
||||
listOfExtractedImageAbstractFiles.add(fileManager.addDerivedFile(extractedImage.getFileName(), extractedImage.getLocalPath(), extractedImage.getSize(),
|
||||
extractedImage.getCtime(), extractedImage.getCrtime(), extractedImage.getAtime(), extractedImage.getAtime(),
|
||||
true, abstractFile, null, EmbeddedFileExtractorModuleFactory.getModuleName(), null, null));
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImage.addToDB.exception.msg"), ex); //NON-NLS
|
||||
}
|
||||
}
|
||||
if (!listOfExtractedImages.isEmpty()) {
|
||||
services.fireModuleContentEvent(new ModuleContentEvent(abstractFile));
|
||||
context.addFilesToJob(listOfExtractedImageAbstractFiles);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract images from doc format files.
|
||||
*
|
||||
* @param af the file from which images are to be extracted.
|
||||
* @return list of extracted images. Returns null in case no images were
|
||||
* extracted.
|
||||
*/
|
||||
private List<ExtractedImage> extractImagesFromDoc(AbstractFile af) {
|
||||
List<ExtractedImage> listOfExtractedImages;
|
||||
HWPFDocument doc = null;
|
||||
try {
|
||||
doc = new HWPFDocument(new ReadContentInputStream(af));
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.docContainer.init.err", af.getName()), ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
PicturesTable pictureTable = doc.getPicturesTable();
|
||||
List<org.apache.poi.hwpf.usermodel.Picture> listOfAllPictures = pictureTable.getAllPictures();
|
||||
String outputFolderPath;
|
||||
if (listOfAllPictures.isEmpty()) {
|
||||
return null;
|
||||
} else {
|
||||
outputFolderPath = getOutputFolderPath(this.parentFileName);
|
||||
}
|
||||
if (outputFolderPath == null) {
|
||||
return null;
|
||||
}
|
||||
listOfExtractedImages = new ArrayList<>();
|
||||
for (org.apache.poi.hwpf.usermodel.Picture picture : listOfAllPictures) {
|
||||
String fileName = picture.suggestFullFileName();
|
||||
writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), picture.getContent());
|
||||
// TODO Extract more info from the Picture viz ctime, crtime, atime, mtime
|
||||
listOfExtractedImages.add(new ExtractedImage(fileName, getFileRelativePath(fileName), picture.getSize(), af));
|
||||
}
|
||||
|
||||
return listOfExtractedImages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract images from docx format files.
|
||||
*
|
||||
* @param af the file from which images are to be extracted.
|
||||
* @return list of extracted images. Returns null in case no images were
|
||||
* extracted.
|
||||
*/
|
||||
private List<ExtractedImage> extractImagesFromDocx(AbstractFile af) {
|
||||
List<ExtractedImage> listOfExtractedImages;
|
||||
XWPFDocument docx = null;
|
||||
try {
|
||||
docx = new XWPFDocument(new ReadContentInputStream(af));
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.docxContainer.init.err", af.getName()), ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
List<XWPFPictureData> listOfAllPictures = docx.getAllPictures();
|
||||
|
||||
// if no images are extracted from the PPT, return null, else initialize
|
||||
// the output folder for image extraction.
|
||||
String outputFolderPath;
|
||||
if (listOfAllPictures.isEmpty()) {
|
||||
return null;
|
||||
} else {
|
||||
outputFolderPath = getOutputFolderPath(this.parentFileName);
|
||||
}
|
||||
if (outputFolderPath == null) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
|
||||
return null;
|
||||
}
|
||||
listOfExtractedImages = new ArrayList<>();
|
||||
for (XWPFPictureData xwpfPicture : listOfAllPictures) {
|
||||
String fileName = xwpfPicture.getFileName();
|
||||
writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), xwpfPicture.getData());
|
||||
listOfExtractedImages.add(new ExtractedImage(fileName, getFileRelativePath(fileName), xwpfPicture.getData().length, af));
|
||||
}
|
||||
return listOfExtractedImages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract images from ppt format files.
|
||||
*
|
||||
* @param af the file from which images are to be extracted.
|
||||
* @return list of extracted images. Returns null in case no images were
|
||||
* extracted.
|
||||
*/
|
||||
private List<ExtractedImage> extractImagesFromPpt(AbstractFile af) {
|
||||
List<ExtractedImage> listOfExtractedImages;
|
||||
SlideShow ppt = null;
|
||||
try {
|
||||
ppt = new SlideShow(new ReadContentInputStream(af));
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.pptContainer.init.err", af.getName()), ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
|
||||
//extract all pictures contained in the presentation
|
||||
PictureData[] listOfAllPictures = ppt.getPictureData();
|
||||
|
||||
// if no images are extracted from the PPT, return null, else initialize
|
||||
// the output folder for image extraction.
|
||||
String outputFolderPath;
|
||||
if (listOfAllPictures.length == 0) {
|
||||
return null;
|
||||
} else {
|
||||
outputFolderPath = getOutputFolderPath(this.parentFileName);
|
||||
}
|
||||
if (outputFolderPath == null) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
|
||||
return null;
|
||||
}
|
||||
|
||||
// extract the images to the above initialized outputFolder.
|
||||
// extraction path - outputFolder/image_number.ext
|
||||
int i = 0;
|
||||
listOfExtractedImages = new ArrayList<>();
|
||||
for (PictureData pictureData : listOfAllPictures) {
|
||||
|
||||
// Get image extension, generate image name, write image to the module
|
||||
// output folder, add it to the listOfExtractedImageAbstractFiles
|
||||
int type = pictureData.getType();
|
||||
String ext;
|
||||
switch (type) {
|
||||
case Picture.JPEG:
|
||||
ext = ".jpg"; //NON-NLS
|
||||
break;
|
||||
case Picture.PNG:
|
||||
ext = ".png"; //NON-NLS
|
||||
break;
|
||||
case Picture.WMF:
|
||||
ext = ".wmf"; //NON-NLS
|
||||
break;
|
||||
case Picture.EMF:
|
||||
ext = ".emf"; //NON-NLS
|
||||
break;
|
||||
case Picture.PICT:
|
||||
ext = ".pict"; //NON-NLS
|
||||
break;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
String imageName = UNKNOWN_NAME_PREFIX + i + ext; //NON-NLS
|
||||
writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
|
||||
listOfExtractedImages.add(new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
|
||||
i++;
|
||||
}
|
||||
return listOfExtractedImages;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract images from pptx format files.
|
||||
*
|
||||
* @param af the file from which images are to be extracted.
|
||||
* @return list of extracted images. Returns null in case no images were
|
||||
* extracted.
|
||||
*/
|
||||
private List<ExtractedImage> extractImagesFromPptx(AbstractFile af) {
|
||||
List<ExtractedImage> listOfExtractedImages;
|
||||
XMLSlideShow pptx;
|
||||
try {
|
||||
pptx = new XMLSlideShow(new ReadContentInputStream(af));
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.pptxContainer.init.err", af.getName()), ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
List<XSLFPictureData> listOfAllPictures = pptx.getAllPictures();
|
||||
|
||||
// if no images are extracted from the PPT, return null, else initialize
|
||||
// the output folder for image extraction.
|
||||
String outputFolderPath;
|
||||
if (listOfAllPictures.isEmpty()) {
|
||||
return null;
|
||||
} else {
|
||||
outputFolderPath = getOutputFolderPath(this.parentFileName);
|
||||
}
|
||||
if (outputFolderPath == null) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
|
||||
return null;
|
||||
}
|
||||
|
||||
listOfExtractedImages = new ArrayList<>();
|
||||
for (XSLFPictureData xslsPicture : listOfAllPictures) {
|
||||
|
||||
// get image file name, write it to the module outputFolder, and add
|
||||
// it to the listOfExtractedImageAbstractFiles.
|
||||
String fileName = xslsPicture.getFileName();
|
||||
writeExtractedImage(Paths.get(outputFolderPath, fileName).toString(), xslsPicture.getData());
|
||||
listOfExtractedImages.add(new ExtractedImage(fileName, getFileRelativePath(fileName), xslsPicture.getData().length, af));
|
||||
|
||||
}
|
||||
|
||||
return listOfExtractedImages;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract images from xls format files.
|
||||
*
|
||||
* @param af the file from which images are to be extracted.
|
||||
* @return list of extracted images. Returns null in case no images were
|
||||
* extracted.
|
||||
*/
|
||||
private List<ExtractedImage> extractImagesFromXls(AbstractFile af) {
|
||||
List<ExtractedImage> listOfExtractedImages;
|
||||
|
||||
Workbook xls;
|
||||
try {
|
||||
xls = new HSSFWorkbook(new ReadContentInputStream(af));
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.xlsContainer.init.err", af.getName()) + af.getName(), ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
|
||||
List<? extends org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = xls.getAllPictures();
|
||||
// if no images are extracted from the PPT, return null, else initialize
|
||||
// the output folder for image extraction.
|
||||
String outputFolderPath;
|
||||
if (listOfAllPictures.isEmpty()) {
|
||||
return null;
|
||||
} else {
|
||||
outputFolderPath = getOutputFolderPath(this.parentFileName);
|
||||
}
|
||||
if (outputFolderPath == null) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
|
||||
return null;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
listOfExtractedImages = new ArrayList<>();
|
||||
for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
|
||||
String imageName = UNKNOWN_NAME_PREFIX + i + "." + pictureData.suggestFileExtension(); //NON-NLS
|
||||
writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
|
||||
listOfExtractedImages.add(new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
|
||||
i++;
|
||||
}
|
||||
return listOfExtractedImages;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract images from xlsx format files.
|
||||
*
|
||||
* @param af the file from which images are to be extracted.
|
||||
* @return list of extracted images. Returns null in case no images were
|
||||
* extracted.
|
||||
*/
|
||||
private List<ExtractedImage> extractImagesFromXlsx(AbstractFile af) {
|
||||
List<ExtractedImage> listOfExtractedImages;
|
||||
Workbook xlsx;
|
||||
try {
|
||||
xlsx = new XSSFWorkbook(new ReadContentInputStream(af));
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.xlsxContainer.init.err", af.getName()), ex); //NON-NLS
|
||||
return null;
|
||||
}
|
||||
|
||||
List<? extends org.apache.poi.ss.usermodel.PictureData> listOfAllPictures = xlsx.getAllPictures();
|
||||
// if no images are extracted from the PPT, return null, else initialize
|
||||
// the output folder for image extraction.
|
||||
String outputFolderPath;
|
||||
if (listOfAllPictures.isEmpty()) {
|
||||
return null;
|
||||
} else {
|
||||
outputFolderPath = getOutputFolderPath(this.parentFileName);
|
||||
}
|
||||
if (outputFolderPath == null) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.extractImageFrom.outputPath.exception.msg", af.getName()));
|
||||
return null;
|
||||
}
|
||||
|
||||
int i = 0;
|
||||
listOfExtractedImages = new ArrayList<>();
|
||||
for (org.apache.poi.ss.usermodel.PictureData pictureData : listOfAllPictures) {
|
||||
String imageName = UNKNOWN_NAME_PREFIX + i + "." + pictureData.suggestFileExtension();
|
||||
writeExtractedImage(Paths.get(outputFolderPath, imageName).toString(), pictureData.getData());
|
||||
listOfExtractedImages.add(new ExtractedImage(imageName, getFileRelativePath(imageName), pictureData.getData().length, af));
|
||||
i++;
|
||||
}
|
||||
return listOfExtractedImages;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes image to the module output location.
|
||||
*
|
||||
* @param outputPath Path where images is written
|
||||
* @param data byte representation of the data to be written to the
|
||||
* specified location.
|
||||
*/
|
||||
private void writeExtractedImage(String outputPath, byte[] data) {
|
||||
try (FileOutputStream fos = new FileOutputStream(outputPath)) {
|
||||
fos.write(data);
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, "Could not write to the provided location: " + outputPath, ex); //NON-NLS
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets path to the output folder for image extraction. If the path does not
|
||||
* exist, it is created.
|
||||
*
|
||||
* @param parentFileName name of the abstract file being processed for image
|
||||
* extraction.
|
||||
* @return path to the image extraction folder for a given abstract file.
|
||||
*/
|
||||
private String getOutputFolderPath(String parentFileName) {
|
||||
String outputFolderPath = EmbeddedFileExtractorIngestModule.moduleDirAbsolute + File.separator + parentFileName;
|
||||
File outputFilePath = new File(outputFolderPath);
|
||||
if (!outputFilePath.exists()) {
|
||||
try {
|
||||
outputFilePath.mkdirs();
|
||||
} catch (SecurityException ex) {
|
||||
logger.log(Level.WARNING, NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ImageExtractor.getOutputFolderPath.exception.msg", parentFileName), ex);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return outputFolderPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the relative path to the file. The path is relative to the case
|
||||
* folder.
|
||||
*
|
||||
* @param fileName name of the the file for which the path is to be
|
||||
* generated.
|
||||
* @return
|
||||
*/
|
||||
private String getFileRelativePath(String fileName) {
|
||||
// Used explicit FWD slashes to maintain DB consistency across operating systems.
|
||||
return "/" + EmbeddedFileExtractorIngestModule.moduleDirRelative + "/" + this.parentFileName + "/" + fileName; //NON-NLS
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the image extracted using POI methods. Currently, POI is not
|
||||
* capable of extracting ctime, crtime, mtime, and atime; these values are
|
||||
* set to 0.
|
||||
*/
|
||||
private static class ExtractedImage {
|
||||
//String fileName, String localPath, long size, long ctime, long crtime,
|
||||
//long atime, long mtime, boolean isFile, AbstractFile parentFile, String rederiveDetails, String toolName, String toolVersion, String otherDetails
|
||||
|
||||
private final String fileName;
|
||||
private final String localPath;
|
||||
private final long size;
|
||||
private final long ctime;
|
||||
private final long crtime;
|
||||
private final long atime;
|
||||
private final long mtime;
|
||||
private final AbstractFile parentFile;
|
||||
|
||||
ExtractedImage(String fileName, String localPath, long size, AbstractFile parentFile) {
|
||||
this(fileName, localPath, size, 0, 0, 0, 0, parentFile);
|
||||
}
|
||||
|
||||
ExtractedImage(String fileName, String localPath, long size, long ctime, long crtime, long atime, long mtime, AbstractFile parentFile) {
|
||||
this.fileName = fileName;
|
||||
this.localPath = localPath;
|
||||
this.size = size;
|
||||
this.ctime = ctime;
|
||||
this.crtime = crtime;
|
||||
this.atime = atime;
|
||||
this.mtime = mtime;
|
||||
this.parentFile = parentFile;
|
||||
}
|
||||
|
||||
public String getFileName() {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
public String getLocalPath() {
|
||||
return localPath;
|
||||
}
|
||||
|
||||
public long getSize() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public long getCtime() {
|
||||
return ctime;
|
||||
}
|
||||
|
||||
public long getCrtime() {
|
||||
return crtime;
|
||||
}
|
||||
|
||||
public long getAtime() {
|
||||
return atime;
|
||||
}
|
||||
|
||||
public long getMtime() {
|
||||
return mtime;
|
||||
}
|
||||
|
||||
public AbstractFile getParentFile() {
|
||||
return parentFile;
|
||||
}
|
||||
}
|
||||
}
|
@ -16,7 +16,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.modules.sevenzip;
|
||||
package org.sleuthkit.autopsy.modules.embeddedfileextractor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.logging.Level;
|
||||
@ -30,13 +30,13 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
* Adapter from ReadContentInputStream to
|
||||
* net.sf.sevenzipjbinding.IInStream stream interface
|
||||
*/
|
||||
public class SevenZipContentReadStream implements IInStream {
|
||||
|
||||
class SevenZipContentReadStream implements IInStream {
|
||||
|
||||
private ReadContentInputStream wrapped;
|
||||
private long length;
|
||||
|
||||
|
||||
private static final Logger logger = Logger.getLogger(SevenZipContentReadStream.class.getName());
|
||||
|
||||
|
||||
public SevenZipContentReadStream(ReadContentInputStream wrapped) {
|
||||
this.wrapped = wrapped;
|
||||
this.length = wrapped.getLength();
|
||||
@ -62,9 +62,9 @@ public class SevenZipContentReadStream implements IInStream {
|
||||
NbBundle.getMessage(this.getClass(), "SevenZipContentReadStream.seek.exception.invalidOrigin",
|
||||
origin));
|
||||
}
|
||||
|
||||
|
||||
return newPosition;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -76,21 +76,21 @@ public class SevenZipContentReadStream implements IInStream {
|
||||
if (bytes.length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
int readBytes = wrapped.read(bytes);
|
||||
if (readBytes < 1) {
|
||||
return 0;
|
||||
}
|
||||
return readBytes;
|
||||
|
||||
|
||||
} catch (IOException ex) {
|
||||
String msg = NbBundle.getMessage(this.getClass(), "SevenZipContentReadStream.read.exception.errReadStream");
|
||||
logger.log(Level.SEVERE, msg, ex);
|
||||
throw new SevenZipException(msg, ex);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close the stream
|
||||
* @throws IOException
|
@ -16,7 +16,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.modules.sevenzip;
|
||||
package org.sleuthkit.autopsy.modules.embeddedfileextractor;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
@ -29,12 +29,10 @@ import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.logging.Level;
|
||||
import net.sf.sevenzipjbinding.ArchiveFormat;
|
||||
import static net.sf.sevenzipjbinding.ArchiveFormat.RAR;
|
||||
import net.sf.sevenzipjbinding.ISequentialOutStream;
|
||||
import net.sf.sevenzipjbinding.ISevenZipInArchive;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.ingest.IngestServices;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
import net.sf.sevenzipjbinding.SevenZip;
|
||||
import net.sf.sevenzipjbinding.SevenZipException;
|
||||
import net.sf.sevenzipjbinding.SevenZipNativeInitializationException;
|
||||
@ -42,44 +40,38 @@ import net.sf.sevenzipjbinding.simple.ISimpleInArchive;
|
||||
import net.sf.sevenzipjbinding.simple.ISimpleInArchiveItem;
|
||||
import org.netbeans.api.progress.ProgressHandle;
|
||||
import org.netbeans.api.progress.ProgressHandleFactory;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.casemodule.services.FileManager;
|
||||
import org.sleuthkit.autopsy.ingest.FileIngestModule;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||
import org.sleuthkit.autopsy.ingest.IngestMessage;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
||||
import org.sleuthkit.autopsy.ingest.IngestMonitor;
|
||||
import org.sleuthkit.autopsy.ingest.IngestServices;
|
||||
import org.sleuthkit.autopsy.ingest.ModuleContentEvent;
|
||||
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
|
||||
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
|
||||
import org.sleuthkit.datamodel.AbstractFile;
|
||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
|
||||
import org.sleuthkit.datamodel.DerivedFile;
|
||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
import org.sleuthkit.datamodel.TskCoreException;
|
||||
import org.sleuthkit.datamodel.TskData;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModule.ProcessResult;
|
||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||
import org.sleuthkit.autopsy.casemodule.Case;
|
||||
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter;
|
||||
import net.sf.sevenzipjbinding.ArchiveFormat;
|
||||
import static net.sf.sevenzipjbinding.ArchiveFormat.RAR;
|
||||
import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
|
||||
|
||||
/**
|
||||
* 7Zip ingest module extracts supported archives, adds extracted DerivedFiles,
|
||||
* reschedules extracted DerivedFiles for ingest.
|
||||
*/
|
||||
public final class SevenZipIngestModule implements FileIngestModule {
|
||||
class SevenZipExtractor {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(SevenZipIngestModule.class.getName());
|
||||
private static final Logger logger = Logger.getLogger(SevenZipExtractor.class.getName());
|
||||
private IngestServices services = IngestServices.getInstance();
|
||||
static final String[] SUPPORTED_EXTENSIONS = {"zip", "rar", "arj", "7z", "7zip", "gzip", "gz", "bzip2", "tar", "tgz",}; // "iso"}; NON-NLS
|
||||
private String moduleDirRelative; //relative to the case, to store in db
|
||||
private String moduleDirAbsolute; //absolute, to extract to
|
||||
|
||||
private final IngestJobContext context;
|
||||
private final FileTypeDetector fileTypeDetector;
|
||||
static final String[] SUPPORTED_EXTENSIONS = {"zip", "rar", "arj", "7z", "7zip", "gzip", "gz", "bzip2", "tar", "tgz",}; // NON-NLS
|
||||
//encryption type strings
|
||||
private static final String ENCRYPTION_FILE_LEVEL = NbBundle.getMessage(SevenZipIngestModule.class,
|
||||
"SevenZipIngestModule.encryptionFileLevel");
|
||||
private static final String ENCRYPTION_FULL = NbBundle.getMessage(SevenZipIngestModule.class,
|
||||
"SevenZipIngestModule.encryptionFull");
|
||||
private static final String ENCRYPTION_FILE_LEVEL = NbBundle.getMessage(EmbeddedFileExtractorIngestModule.class,
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.encryptionFileLevel");
|
||||
private static final String ENCRYPTION_FULL = NbBundle.getMessage(EmbeddedFileExtractorIngestModule.class,
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.encryptionFull");
|
||||
//zip bomb detection
|
||||
private static final int MAX_DEPTH = 4;
|
||||
private static final int MAX_COMPRESSION_RATIO = 600;
|
||||
@ -87,47 +79,31 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
private static final long MIN_FREE_DISK_SPACE = 1 * 1000 * 1000000L; //1GB
|
||||
//counts archive depth
|
||||
private ArchiveDepthCountTree archiveDepthCountTree;
|
||||
private IngestJobContext context;
|
||||
private long jobId;
|
||||
private final static IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();
|
||||
private FileTypeDetector fileTypeDetector;
|
||||
/**
|
||||
* Enum of mimetypes which support archive extraction
|
||||
*/
|
||||
private enum SupportedArchiveExtractionFormats {
|
||||
ZIP("application/zip"),
|
||||
SEVENZ("application/x-7z-compressed"),
|
||||
GZIP("application/gzip"),
|
||||
XGZIP("application/x-gzip"),
|
||||
XBZIP2("application/x-bzip2"),
|
||||
XTAR("application/x-tar");
|
||||
|
||||
SevenZipIngestModule() {
|
||||
private final String mimeType;
|
||||
|
||||
SupportedArchiveExtractionFormats(final String mimeType) {
|
||||
this.mimeType = mimeType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.mimeType;
|
||||
}
|
||||
// TODO Expand to support more formats after upgrading Tika
|
||||
}
|
||||
|
||||
@Override
|
||||
public void startUp(IngestJobContext context) throws IngestModuleException {
|
||||
this.context = context;
|
||||
jobId = context.getJobId();
|
||||
|
||||
try {
|
||||
fileTypeDetector = new FileTypeDetector();
|
||||
} catch (FileTypeDetector.FileTypeDetectorInitException ex) {
|
||||
throw new IngestModuleException(NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.startUp.fileTypeDetectorInitializationException.msg"));
|
||||
}
|
||||
|
||||
final Case currentCase = Case.getCurrentCase();
|
||||
|
||||
moduleDirRelative = Case.getModulesOutputDirRelPath() + File.separator + ArchiveFileExtractorModuleFactory.getModuleName();
|
||||
moduleDirAbsolute = currentCase.getModulesOutputDirAbsPath() + File.separator + ArchiveFileExtractorModuleFactory.getModuleName();
|
||||
|
||||
|
||||
File unpackDirPathFile = new File(moduleDirAbsolute);
|
||||
if (!unpackDirPathFile.exists()) {
|
||||
try {
|
||||
unpackDirPathFile.mkdirs();
|
||||
} catch (SecurityException e) {
|
||||
logger.log(Level.SEVERE, "Error initializing output dir: " + moduleDirAbsolute, e); //NON-NLS
|
||||
String msg = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.init.errInitModule.msg", ArchiveFileExtractorModuleFactory.getModuleName());
|
||||
String details = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.init.errInitModule.details",
|
||||
moduleDirAbsolute, e.getMessage());
|
||||
services.postMessage(IngestMessage.createErrorMessage(ArchiveFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
SevenZipExtractor(IngestJobContext context, FileTypeDetector fileTypeDetector) throws IngestModuleException {
|
||||
if (!SevenZip.isInitializedSuccessfully() && (SevenZip.getLastInitializationException() == null)) {
|
||||
try {
|
||||
SevenZip.initSevenZipFromPlatformJAR();
|
||||
@ -135,86 +111,51 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
logger.log(Level.INFO, "7-Zip-JBinding library was initialized on supported platform: {0}", platform); //NON-NLS
|
||||
} catch (SevenZipNativeInitializationException e) {
|
||||
logger.log(Level.SEVERE, "Error initializing 7-Zip-JBinding library", e); //NON-NLS
|
||||
String msg = NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.init.errInitModule.msg",
|
||||
ArchiveFileExtractorModuleFactory.getModuleName());
|
||||
String details = NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.init.errCantInitLib",
|
||||
String msg = NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.init.errInitModule.msg",
|
||||
EmbeddedFileExtractorModuleFactory.getModuleName());
|
||||
String details = NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.init.errCantInitLib",
|
||||
e.getMessage());
|
||||
services.postMessage(IngestMessage.createErrorMessage(ArchiveFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
throw new RuntimeException(e);
|
||||
services.postMessage(IngestMessage.createErrorMessage(EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
throw new IngestModuleException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
archiveDepthCountTree = new ArchiveDepthCountTree();
|
||||
this.context = context;
|
||||
this.fileTypeDetector = fileTypeDetector;
|
||||
this.archiveDepthCountTree = new ArchiveDepthCountTree();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ProcessResult process(AbstractFile abstractFile) {
|
||||
if (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
if (abstractFile.getKnown().equals(TskData.FileKnown.KNOWN)) {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
if (abstractFile.isFile() == false || !isSupported(abstractFile)) {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
//check if already has derived files, skip
|
||||
/**
|
||||
* This method returns true if the file format is currently supported. Else
|
||||
* it returns false. Attempt extension based detection in case Apache Tika
|
||||
* based detection fails.
|
||||
*
|
||||
* @param abstractFile The AbstractFilw whose mimetype is to be determined.
|
||||
* @return This method returns true if the file format is currently
|
||||
* supported. Else it returns false.
|
||||
*/
|
||||
boolean isSevenZipExtractionSupported(AbstractFile abstractFile) {
|
||||
try {
|
||||
if (abstractFile.hasChildren()) {
|
||||
//check if local unpacked dir exists
|
||||
final String uniqueFileName = getUniqueName(abstractFile);
|
||||
final String localRootAbsPath = getLocalRootAbsPath(uniqueFileName);
|
||||
if (new File(localRootAbsPath).exists()) {
|
||||
logger.log(Level.INFO, "File already has been processed as it has children and local unpacked file, skipping: {0}", abstractFile.getName()); //NON-NLS
|
||||
return ProcessResult.OK;
|
||||
String abstractFileMimeType = fileTypeDetector.getFileType(abstractFile);
|
||||
for (SupportedArchiveExtractionFormats s : SupportedArchiveExtractionFormats.values()) {
|
||||
if (s.toString().equals(abstractFileMimeType)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} catch (TskCoreException e) {
|
||||
logger.log(Level.INFO, "Error checking if file already has been processed, skipping: {0}", abstractFile.getName()); //NON-NLS
|
||||
return ProcessResult.OK;
|
||||
|
||||
return false;
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.WARNING, "Error executing FileTypeDetector.getFileType()", ex); // NON-NLS
|
||||
}
|
||||
|
||||
logger.log(Level.INFO, "Processing with archive extractor: {0}", abstractFile.getName()); //NON-NLS
|
||||
|
||||
List<AbstractFile> unpackedFiles = unpack(abstractFile);
|
||||
if (!unpackedFiles.isEmpty()) {
|
||||
//currently sending a single event for all new files
|
||||
services.fireModuleContentEvent(new ModuleContentEvent(abstractFile));
|
||||
|
||||
context.addFilesToJob(unpackedFiles);
|
||||
// attempt extension matching
|
||||
final String extension = abstractFile.getNameExtension();
|
||||
for (String supportedExtension : SUPPORTED_EXTENSIONS) {
|
||||
if (extension.equals(supportedExtension)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutDown() {
|
||||
// We don't need the value, but for cleanliness and consistency
|
||||
refCounter.decrementAndGet(jobId);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get local relative path to the unpacked archive root
|
||||
*
|
||||
* @param archiveFile
|
||||
* @return
|
||||
*/
|
||||
private String getUniqueName(AbstractFile archiveFile) {
|
||||
return archiveFile.getName() + "_" + archiveFile.getId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get local abs path to the unpacked archive root
|
||||
*
|
||||
* @param localRootRelPath relative path to archive, from getUniqueName()
|
||||
* @return
|
||||
*/
|
||||
private String getLocalRootAbsPath(String localRootRelPath) {
|
||||
return moduleDirAbsolute + File.separator + localRootRelPath;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -250,11 +191,11 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
String itemName = archiveFileItem.getPath();
|
||||
logger.log(Level.INFO, "Possible zip bomb detected, compression ration: {0} for in archive item: {1}", new Object[]{cRatio, itemName}); //NON-NLS
|
||||
String msg = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.isZipBombCheck.warnMsg", archiveName, itemName);
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnMsg", archiveName, itemName);
|
||||
String details = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.isZipBombCheck.warnDetails", cRatio);
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.isZipBombCheck.warnDetails", cRatio);
|
||||
//MessageNotifyUtil.Notify.error(msg, details);
|
||||
services.postMessage(IngestMessage.createWarningMessage(ArchiveFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
services.postMessage(IngestMessage.createWarningMessage(EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
@ -265,53 +206,51 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check file extension and return appropriate input options for SevenZip.openInArchive()
|
||||
* Check file extension and return appropriate input options for
|
||||
* SevenZip.openInArchive()
|
||||
*
|
||||
* @param archiveFile file to check file extension
|
||||
* @return input parameter for SevenZip.openInArchive()
|
||||
*/
|
||||
private ArchiveFormat get7ZipOptions(AbstractFile archiveFile)
|
||||
{
|
||||
private ArchiveFormat get7ZipOptions(AbstractFile archiveFile) {
|
||||
// try to get the file type from the BB
|
||||
String detectedFormat = null;
|
||||
String detectedFormat = null;
|
||||
try {
|
||||
ArrayList<BlackboardAttribute> attributes = archiveFile.getGenInfoAttributes(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG);
|
||||
for (BlackboardAttribute attribute : attributes) {
|
||||
detectedFormat = attribute.getValueString();
|
||||
break;
|
||||
}
|
||||
} catch (TskCoreException ex) {
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.WARNING, "Couldn't obtain file attributes for file: " + archiveFile.toString(), ex); //NON-NLS
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (detectedFormat == null) {
|
||||
logger.log(Level.WARNING, "Could not detect format for file: {0}", archiveFile); //NON-NLS
|
||||
|
||||
logger.log(Level.WARNING, "Could not detect format for file: " + archiveFile); //NON-NLS
|
||||
|
||||
// if we don't have attribute info then use file extension
|
||||
String extension = archiveFile.getNameExtension();
|
||||
if ("rar".equals(extension)) //NON-NLS
|
||||
{
|
||||
// for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
|
||||
// it will be opened incorrectly when using 7zip's built-in auto-detect functionality
|
||||
return RAR;
|
||||
return RAR;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise open the archive using 7zip's built-in auto-detect functionality
|
||||
return null;
|
||||
}
|
||||
else if (detectedFormat.contains("application/x-rar-compressed")) //NON-NLS
|
||||
} else if (detectedFormat.contains("application/x-rar-compressed")) //NON-NLS
|
||||
{
|
||||
// for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
|
||||
// it will be opened incorrectly when using 7zip's built-in auto-detect functionality
|
||||
return RAR;
|
||||
return RAR;
|
||||
}
|
||||
|
||||
// Otherwise open the archive using 7zip's built-in auto-detect functionality
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Unpack the file to local folder and return a list of derived files
|
||||
@ -320,23 +259,23 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
* @param archiveFile file to unpack
|
||||
* @return list of unpacked derived files
|
||||
*/
|
||||
private List<AbstractFile> unpack(AbstractFile archiveFile) {
|
||||
protected void unpack(AbstractFile archiveFile) {
|
||||
List<AbstractFile> unpackedFiles = Collections.<AbstractFile>emptyList();
|
||||
|
||||
//recursion depth check for zip bomb
|
||||
final long archiveId = archiveFile.getId();
|
||||
ArchiveDepthCountTree.Archive parentAr = archiveDepthCountTree.findArchive(archiveId);
|
||||
SevenZipExtractor.ArchiveDepthCountTree.Archive parentAr = archiveDepthCountTree.findArchive(archiveId);
|
||||
if (parentAr == null) {
|
||||
parentAr = archiveDepthCountTree.addArchive(null, archiveId);
|
||||
} else if (parentAr.getDepth() == MAX_DEPTH) {
|
||||
String msg = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.unpack.warnMsg.zipBomb", archiveFile.getName());
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnMsg.zipBomb", archiveFile.getName());
|
||||
String details = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.unpack.warnDetails.zipBomb",
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.warnDetails.zipBomb",
|
||||
parentAr.getDepth());
|
||||
//MessageNotifyUtil.Notify.error(msg, details);
|
||||
services.postMessage(IngestMessage.createWarningMessage(ArchiveFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
return unpackedFiles;
|
||||
services.postMessage(IngestMessage.createWarningMessage(EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
return;
|
||||
}
|
||||
|
||||
boolean hasEncrypted = false;
|
||||
@ -346,7 +285,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
SevenZipContentReadStream stream = null;
|
||||
|
||||
final ProgressHandle progress = ProgressHandleFactory.createHandle(
|
||||
NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.moduleName"));
|
||||
NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.moduleName"));
|
||||
int processedItems = 0;
|
||||
|
||||
String compressMethod = null;
|
||||
@ -357,7 +296,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
// for RAR files we need to open them explicitly as RAR. Otherwise, if there is a ZIP archive inside RAR archive
|
||||
// it will be opened incorrectly when using 7zip's built-in auto-detect functionality.
|
||||
// All other archive formats are still opened using 7zip built-in auto-detect functionality.
|
||||
ArchiveFormat options = get7ZipOptions(archiveFile);
|
||||
ArchiveFormat options = get7ZipOptions(archiveFile);
|
||||
inArchive = SevenZip.openInArchive(options, stream);
|
||||
|
||||
int numItems = inArchive.getNumberOfItems();
|
||||
@ -368,8 +307,8 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
final ISimpleInArchive simpleInArchive = inArchive.getSimpleInterface();
|
||||
|
||||
//setup the archive local root folder
|
||||
final String uniqueArchiveFileName = getUniqueName(archiveFile);
|
||||
final String localRootAbsPath = getLocalRootAbsPath(uniqueArchiveFileName);
|
||||
final String uniqueArchiveFileName = EmbeddedFileExtractorIngestModule.getUniqueName(archiveFile);
|
||||
final String localRootAbsPath = EmbeddedFileExtractorIngestModule.getLocalRootAbsPath(uniqueArchiveFileName);
|
||||
final File localRoot = new File(localRootAbsPath);
|
||||
if (!localRoot.exists()) {
|
||||
try {
|
||||
@ -377,12 +316,12 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
} catch (SecurityException e) {
|
||||
logger.log(Level.SEVERE, "Error setting up output path for archive root: {0}", localRootAbsPath); //NON-NLS
|
||||
//bail
|
||||
return unpackedFiles;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//initialize tree hierarchy to keep track of unpacked file structure
|
||||
UnpackedTree unpackedTree = new UnpackedTree(moduleDirRelative + "/" + uniqueArchiveFileName, archiveFile);
|
||||
SevenZipExtractor.UnpackedTree unpackedTree = new SevenZipExtractor.UnpackedTree(EmbeddedFileExtractorIngestModule.moduleDirRelative + "/" + uniqueArchiveFileName, archiveFile);
|
||||
|
||||
long freeDiskSpace = services.getFreeDiskSpace();
|
||||
|
||||
@ -390,7 +329,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
int itemNumber = 0;
|
||||
for (ISimpleInArchiveItem item : simpleInArchive.getArchiveItems()) {
|
||||
String pathInArchive = item.getPath();
|
||||
|
||||
|
||||
if (pathInArchive == null || pathInArchive.isEmpty()) {
|
||||
//some formats (.tar.gz) may not be handled correctly -- file in archive has no name/path
|
||||
//handle this for .tar.gz and tgz but assuming the child is tar,
|
||||
@ -419,7 +358,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
pathInArchive = "/" + useName;
|
||||
}
|
||||
|
||||
String msg = NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.unpack.unknownPath.msg",
|
||||
String msg = NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.unknownPath.msg",
|
||||
archiveFile.getName(), pathInArchive);
|
||||
logger.log(Level.WARNING, msg);
|
||||
|
||||
@ -433,7 +372,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
|
||||
//find this node in the hierarchy, create if needed
|
||||
UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive);
|
||||
SevenZipExtractor.UnpackedTree.UnpackedNode unpackedNode = unpackedTree.addNode(pathInArchive);
|
||||
|
||||
String fileName = unpackedNode.getFileName();
|
||||
|
||||
@ -463,12 +402,12 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
long newDiskSpace = freeDiskSpace - size;
|
||||
if (newDiskSpace < MIN_FREE_DISK_SPACE) {
|
||||
String msg = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.unpack.notEnoughDiskSpace.msg",
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.msg",
|
||||
archiveFile.getName(), fileName);
|
||||
String details = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.unpack.notEnoughDiskSpace.details");
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.notEnoughDiskSpace.details");
|
||||
//MessageNotifyUtil.Notify.error(msg, details);
|
||||
services.postMessage(IngestMessage.createErrorMessage(ArchiveFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
services.postMessage(IngestMessage.createErrorMessage(EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
logger.log(Level.INFO, "Skipping archive item due to insufficient disk space: {0}, {1}", new Object[]{archiveFile.getUniquePath(), fileName}); //NON-NLS
|
||||
logger.log(Level.INFO, "Available disk space: {0}", new Object[]{freeDiskSpace}); //NON-NLS
|
||||
continue; //skip this file
|
||||
@ -479,10 +418,10 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
|
||||
final String uniqueExtractedName = uniqueArchiveFileName + File.separator + (item.getItemIndex() / 1000) + File.separator + item.getItemIndex() + new File(pathInArchive).getName();
|
||||
|
||||
|
||||
//final String localRelPath = unpackDir + File.separator + localFileRelPath;
|
||||
final String localRelPath = moduleDirRelative + File.separator + uniqueExtractedName;
|
||||
final String localAbsPath = moduleDirAbsolute + File.separator + uniqueExtractedName;
|
||||
final String localRelPath = EmbeddedFileExtractorIngestModule.moduleDirRelative + File.separator + uniqueExtractedName;
|
||||
final String localAbsPath = EmbeddedFileExtractorIngestModule.moduleDirAbsolute + File.separator + uniqueExtractedName;
|
||||
|
||||
//create local dirs and empty files before extracted
|
||||
File localFile = new java.io.File(localAbsPath);
|
||||
@ -504,7 +443,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
//TODO consider bail out / msg to the user
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// skip the rest of this loop if we couldn't create the file
|
||||
if (localFile.exists() == false) {
|
||||
continue;
|
||||
@ -523,9 +462,9 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
|
||||
//unpack locally if a file
|
||||
if (!isDir) {
|
||||
UnpackStream unpackStream = null;
|
||||
SevenZipExtractor.UnpackStream unpackStream = null;
|
||||
try {
|
||||
unpackStream = new UnpackStream(localAbsPath);
|
||||
unpackStream = new SevenZipExtractor.UnpackStream(localAbsPath);
|
||||
item.extractSlow(unpackStream);
|
||||
} catch (Exception e) {
|
||||
//could be something unexpected with this file, move on
|
||||
@ -540,16 +479,22 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
//update units for progress bar
|
||||
++processedItems;
|
||||
}
|
||||
|
||||
|
||||
// add them to the DB. We wait until the end so that we have the metadata on all of the
|
||||
// intermediate nodes since the order is not guaranteed
|
||||
try {
|
||||
unpackedTree.addDerivedFilesToCase();
|
||||
unpackedFiles = unpackedTree.getAllFileObjects();
|
||||
|
||||
if (!unpackedFiles.isEmpty()) {
|
||||
//currently sending a single event for all new files
|
||||
services.fireModuleContentEvent(new ModuleContentEvent(archiveFile));
|
||||
context.addFilesToJob(unpackedFiles);
|
||||
}
|
||||
|
||||
//check if children are archives, update archive depth tracking
|
||||
for (AbstractFile unpackedFile : unpackedFiles) {
|
||||
if (isSupported(unpackedFile)) {
|
||||
if (isSevenZipExtractionSupported(unpackedFile)) {
|
||||
archiveDepthCountTree.addArchive(parentAr, unpackedFile.getId());
|
||||
}
|
||||
}
|
||||
@ -571,12 +516,12 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
|
||||
// print a message if the file is allocated
|
||||
if (archiveFile.isMetaFlagSet(TskData.TSK_FS_META_FLAG_ENUM.ALLOC)) {
|
||||
String msg = NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.unpack.errUnpacking.msg",
|
||||
String msg = NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.msg",
|
||||
archiveFile.getName());
|
||||
String details = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.unpack.errUnpacking.details",
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.errUnpacking.details",
|
||||
fullName, ex.getMessage());
|
||||
services.postMessage(IngestMessage.createErrorMessage(ArchiveFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
services.postMessage(IngestMessage.createErrorMessage(EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
}
|
||||
} finally {
|
||||
if (inArchive != null) {
|
||||
@ -606,65 +551,17 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
String encryptionType = fullEncryption ? ENCRYPTION_FULL : ENCRYPTION_FILE_LEVEL;
|
||||
try {
|
||||
BlackboardArtifact artifact = archiveFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
|
||||
artifact.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME.getTypeID(), ArchiveFileExtractorModuleFactory.getModuleName(), encryptionType));
|
||||
services.fireModuleDataEvent(new ModuleDataEvent(ArchiveFileExtractorModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED));
|
||||
artifact.addAttribute(new BlackboardAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_NAME.getTypeID(), EmbeddedFileExtractorModuleFactory.getModuleName(), encryptionType));
|
||||
services.fireModuleDataEvent(new ModuleDataEvent(EmbeddedFileExtractorModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED));
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.SEVERE, "Error creating blackboard artifact for encryption detected for file: " + archiveFile, ex); //NON-NLS
|
||||
}
|
||||
|
||||
String msg = NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.unpack.encrFileDetected.msg");
|
||||
String msg = NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.msg");
|
||||
String details = NbBundle.getMessage(this.getClass(),
|
||||
"SevenZipIngestModule.unpack.encrFileDetected.details",
|
||||
archiveFile.getName(), ArchiveFileExtractorModuleFactory.getModuleName());
|
||||
services.postMessage(IngestMessage.createWarningMessage(ArchiveFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
}
|
||||
|
||||
return unpackedFiles;
|
||||
}
|
||||
|
||||
private boolean isSupported(AbstractFile file) {
|
||||
// see if it is on the list of extensions
|
||||
final String extension = file.getNameExtension();
|
||||
for (int i = 0; i < SUPPORTED_EXTENSIONS.length; ++i) {
|
||||
if (extension.equals(SUPPORTED_EXTENSIONS[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// if no extension match, check the blackboard for the file type
|
||||
boolean attributeFound = false;
|
||||
try {
|
||||
ArrayList<BlackboardAttribute> attributes = file.getGenInfoAttributes(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG);
|
||||
for (BlackboardAttribute attribute : attributes) {
|
||||
attributeFound = true;
|
||||
String fileType = attribute.getValueString();
|
||||
if (!fileType.isEmpty() && fileType.equals("application/zip")) { //NON-NLS
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} catch (TskCoreException ex) {
|
||||
}
|
||||
|
||||
// if no blackboard entry for file type, do it manually for ZIP files:
|
||||
if (attributeFound) {
|
||||
return false;
|
||||
} else {
|
||||
return isZipFileHeader(file);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if is zip file based on header
|
||||
*
|
||||
* @param file
|
||||
* @return true if zip file, false otherwise
|
||||
*/
|
||||
private boolean isZipFileHeader(AbstractFile file) {
|
||||
try {
|
||||
return fileTypeDetector.getFileType(file).equals("application/zip"); //NON-NLS
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.SEVERE, "Failed to detect file type", ex); //NON-NLS
|
||||
return false;
|
||||
"EmbeddedFileExtractorIngestModule.ArchiveExtractor.unpack.encrFileDetected.details",
|
||||
archiveFile.getName(), EmbeddedFileExtractorModuleFactory.getModuleName());
|
||||
services.postMessage(IngestMessage.createWarningMessage(EmbeddedFileExtractorModuleFactory.getModuleName(), msg, details));
|
||||
}
|
||||
}
|
||||
|
||||
@ -691,8 +588,8 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
output.write(bytes);
|
||||
} catch (IOException ex) {
|
||||
throw new SevenZipException(
|
||||
NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.UnpackStream.write.exception.msg",
|
||||
localAbsPath), ex);
|
||||
NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackStream.write.exception.msg",
|
||||
localAbsPath), ex);
|
||||
}
|
||||
return bytes.length;
|
||||
}
|
||||
@ -710,21 +607,22 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
|
||||
/**
|
||||
* Representation of the files in the archive. Used to track
|
||||
* of local tree file hierarchy, archive depth, and files created to easily
|
||||
* and reliably get parent AbstractFile for unpacked file. So that we don't
|
||||
* have to depend on type of traversal of unpacked files handed to us by
|
||||
* 7zip unpacker.
|
||||
* Representation of the files in the archive. Used to track of local tree
|
||||
* file hierarchy, archive depth, and files created to easily and reliably
|
||||
* get parent AbstractFile for unpacked file. So that we don't have to
|
||||
* depend on type of traversal of unpacked files handed to us by 7zip
|
||||
* unpacker.
|
||||
*/
|
||||
private class UnpackedTree {
|
||||
|
||||
final UnpackedNode rootNode;
|
||||
final UnpackedNode rootNode;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param localPathRoot Path in module output folder that files will be saved to
|
||||
*
|
||||
* @param localPathRoot Path in module output folder that files will be
|
||||
* saved to
|
||||
* @param archiveFile Archive file being extracted
|
||||
* @param fileManager
|
||||
* @param fileManager
|
||||
*/
|
||||
UnpackedTree(String localPathRoot, AbstractFile archiveFile) {
|
||||
this.rootNode = new UnpackedNode();
|
||||
@ -734,9 +632,9 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a node in the tree at the given path. Makes intermediate
|
||||
* nodes if needed. If a node already exists at that path, it is
|
||||
* returned.
|
||||
* Creates a node in the tree at the given path. Makes intermediate
|
||||
* nodes if needed. If a node already exists at that path, it is
|
||||
* returned.
|
||||
*
|
||||
* @param filePath file path with 1 or more tokens separated by /
|
||||
* @return child node for the last file token in the filePath
|
||||
@ -753,7 +651,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
|
||||
/**
|
||||
* recursive method that traverses the path
|
||||
* recursive method that traverses the path
|
||||
*
|
||||
* @param tokenPath
|
||||
* @return
|
||||
@ -765,13 +663,13 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
|
||||
// get the next name in the path and look it up
|
||||
String childName = tokenPath.remove(0);
|
||||
String childName = tokenPath.remove(0);
|
||||
UnpackedNode child = parent.getChild(childName);
|
||||
// create new node
|
||||
if (child == null) {
|
||||
child = new UnpackedNode(childName, parent);
|
||||
}
|
||||
|
||||
|
||||
// go down one more level
|
||||
return addNode(child, tokenPath);
|
||||
}
|
||||
@ -816,7 +714,7 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
* files for the entire hierarchy
|
||||
*/
|
||||
void addDerivedFilesToCase() throws TskCoreException {
|
||||
final FileManager fileManager = Case.getCurrentCase().getServices().getFileManager();
|
||||
final FileManager fileManager = Case.getCurrentCase().getServices().getFileManager();
|
||||
for (UnpackedNode child : rootNode.children) {
|
||||
addDerivedFilesToCaseRec(child, fileManager);
|
||||
}
|
||||
@ -828,14 +726,14 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
try {
|
||||
DerivedFile df = fileManager.addDerivedFile(fileName, node.getLocalRelPath(), node.getSize(),
|
||||
node.getCtime(), node.getCrtime(), node.getAtime(), node.getMtime(),
|
||||
node.isIsFile(), node.getParent().getFile(), "", ArchiveFileExtractorModuleFactory.getModuleName(), "", "");
|
||||
node.isIsFile(), node.getParent().getFile(), "", EmbeddedFileExtractorModuleFactory.getModuleName(), "", "");
|
||||
node.setFile(df);
|
||||
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.SEVERE, "Error adding a derived file to db:" + fileName, ex); //NON-NLS
|
||||
throw new TskCoreException(
|
||||
NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.UnpackedTree.exception.msg",
|
||||
fileName), ex);
|
||||
NbBundle.getMessage(this.getClass(), "EmbeddedFileExtractorIngestModule.ArchiveExtractor.UnpackedTree.exception.msg",
|
||||
fileName), ex);
|
||||
}
|
||||
|
||||
//recurse
|
||||
@ -844,9 +742,8 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A node in the unpacked tree that represents a file or folder.
|
||||
* A node in the unpacked tree that represents a file or folder.
|
||||
*/
|
||||
private class UnpackedNode {
|
||||
|
||||
@ -1018,4 +915,5 @@ public final class SevenZipIngestModule implements FileIngestModule {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
OpenIDE-Module-Display-Category=Ingest Module
|
||||
OpenIDE-Module-Long-Description=\
|
||||
7Zip Ingest Module\n\nThe 7Zip ingest module processes archive files (such as zip and others archive types supported by the 7zip extractor).\n\
|
||||
Contents of the archives are extracted and the derived files are added back to the current ingest to be processed by the configured ingest modules.\n\
|
||||
If the derived file happens to be an archive file, it will be re-processed by the 7zip extractor - the extractor will process archive files N-levels deep.\n\n\
|
||||
The extracted files are navigable in the directory tree.\n\n\
|
||||
The module is supported on Windows, Linux and Mac operating systems.
|
||||
OpenIDE-Module-Name=SevenZip
|
||||
OpenIDE-Module-Short-Description=7Zip Ingest Module
|
||||
SevenZipContentReadStream.seek.exception.invalidOrigin=Invalid seek origin\: {0}
|
||||
SevenZipContentReadStream.read.exception.errReadStream=Error reading content stream.
|
||||
SevenZipIngestModule.moduleName=Archive Extractor
|
||||
SevenZipIngestModule.moduleDesc.text=Extracts archive files (zip, rar, arj, 7z, gzip, bzip2, tar), reschedules them to current ingest and populates directory tree with new files.
|
||||
SevenZipIngestModule.encryptionFileLevel=File-level Encryption
|
||||
SevenZipIngestModule.encryptionFull=Full Encryption
|
||||
SevenZipIngestModule.init.errInitModule.msg=Error initializing {0}
|
||||
SevenZipIngestModule.init.errInitModule.details=Error initializing output dir\: {0}\: {1}
|
||||
SevenZipIngestModule.init.errCantInitLib=Could not initialize 7-ZIP library\: {0}
|
||||
SevenZipIngestModule.isZipBombCheck.warnMsg=Possible ZIP bomb detected in archive\: {0}, item\: {1}
|
||||
SevenZipIngestModule.isZipBombCheck.warnDetails=The archive item compression ratio is {0}, skipping processing of this archive item.
|
||||
SevenZipIngestModule.unpack.warnMsg.zipBomb=Possible ZIP bomb detected\: {0}
|
||||
SevenZipIngestModule.unpack.warnDetails.zipBomb=The archive is {0} levels deep, skipping processing of this archive and its contents
|
||||
SevenZipIngestModule.unpack.unknownPath.msg=Unknown item path in archive\: {0}, will use\: {1}
|
||||
SevenZipIngestModule.unpack.notEnoughDiskSpace.msg=Not enough disk space to unpack archive item\: {0}, {1}
|
||||
SevenZipIngestModule.unpack.notEnoughDiskSpace.details=The archive item is too large to unpack, skipping unpacking this item.
|
||||
SevenZipIngestModule.unpack.errUnpacking.msg=Error unpacking {0}
|
||||
SevenZipIngestModule.unpack.errUnpacking.details=Error unpacking {0}. {1}
|
||||
SevenZipIngestModule.unpack.encrFileDetected.msg=Encrypted files in archive detected.
|
||||
SevenZipIngestModule.unpack.encrFileDetected.details=Some files in archive\: {0} are encrypted. {1} extractor was unable to extract all files from this archive.
|
||||
SevenZipIngestModule.UnpackStream.write.exception.msg=Error writing unpacked file to\: {0}
|
||||
SevenZipIngestModule.UnpackedTree.exception.msg=Error adding a derived file to db\:{0}
|
||||
SevenZipIngestModule.startUp.fileTypeDetectorInitializationException.msg=Error initializing the file type detector.
|
Loading…
x
Reference in New Issue
Block a user