diff --git a/Core/src/org/sleuthkit/autopsy/modules/exif/Bundle.properties b/Core/src/org/sleuthkit/autopsy/modules/exif/Bundle.properties index 2987fc2ae8..391cfdac7f 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/exif/Bundle.properties +++ b/Core/src/org/sleuthkit/autopsy/modules/exif/Bundle.properties @@ -6,3 +6,4 @@ OpenIDE-Module-Name=ExifParser OpenIDE-Module-Short-Description=Exif metadata ingest module ExifParserFileIngestModule.moduleName.text=Exif Parser ExifParserFileIngestModule.getDesc.text=Ingests JPEG files and retrieves their EXIF metadata. +ExifParserFileIngestModule.startUp.fileTypeDetectorInitializationException.msg=Error initializing the File Type Detector. \ No newline at end of file diff --git a/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java index a045f48a5e..f3f7ea3313 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java @@ -34,13 +34,14 @@ import java.util.Collection; import java.util.Date; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; -import org.sleuthkit.autopsy.coreutils.ImageUtils; +import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.ingest.FileIngestModule; import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.IngestServices; import org.sleuthkit.autopsy.ingest.ModuleDataEvent; import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter; +import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; @@ -63,6 +64,7 @@ public final class ExifParserFileIngestModule implements FileIngestModule { private volatile boolean filesToFire = false; private long jobId; private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter(); + private FileTypeDetector fileTypeDetector; ExifParserFileIngestModule() { } @@ -71,6 +73,12 @@ public final class ExifParserFileIngestModule implements FileIngestModule { public void startUp(IngestJobContext context) throws IngestModuleException { jobId = context.getJobId(); refCounter.incrementAndGet(jobId); + try { + fileTypeDetector = new FileTypeDetector(); + } catch (FileTypeDetector.FileTypeDetectorInitException ex) { + logger.log(Level.SEVERE, NbBundle.getMessage(this.getClass(), "ExifParserFileIngestModule.startUp.fileTypeDetectorInitializationException.msg"), ex); + throw new IngestModuleException(NbBundle.getMessage(this.getClass(), "ExifParserFileIngestModule.startUp.fileTypeDetectorInitializationException.msg")); + } } @@ -197,7 +205,7 @@ public final class ExifParserFileIngestModule implements FileIngestModule { * @return true if to be processed */ private boolean parsableFormat(AbstractFile f) { - return ImageUtils.isJpegFileHeader(f); + return fileTypeDetector.getFileType(f).equals("image/jpeg"); } @Override diff --git a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java index bd7418d2a2..3f0b47d7f4 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java +++ b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java @@ -18,15 +18,19 @@ */ package org.sleuthkit.autopsy.modules.filetypeid; +import java.util.ArrayList; import java.util.Map; import java.util.SortedSet; +import java.util.logging.Level; import org.apache.tika.Tika; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MimeTypes; +import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.TskCoreException; +import org.sleuthkit.datamodel.TskData; /** * Detects the type of a file by an inspection of its contents. @@ -37,6 +41,7 @@ public class FileTypeDetector { private static final int BUFFER_SIZE = 64 * 1024; private final byte buffer[] = new byte[BUFFER_SIZE]; private final Map userDefinedFileTypes; + private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName()); /** * Constructs an object that detects the type of a file by an inspection of @@ -93,17 +98,59 @@ public class FileTypeDetector { return false; } + /** + * This method returns a string representing the mimetype of the provided + * abstractFile. Blackboard-lookup is performed to check if the mimetype has + * been already detected. If not, mimetype is determined using Apache Tika. + * + * @param abstractFile the file whose mimetype is to be determined. + * @return mimetype of the abstractFile is returned. Empty String returned + * in case of error. + */ + public String getFileType(AbstractFile abstractFile) { + String identifiedFileType = ""; + + // check BB + try { + ArrayList attributes = abstractFile.getGenInfoAttributes(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG); + for (BlackboardAttribute attribute : attributes) { + identifiedFileType = attribute.getValueString(); + break; + } + if (identifiedFileType != null && !identifiedFileType.isEmpty()) { + return identifiedFileType; + } + } catch (TskCoreException ex) { + logger.log(Level.WARNING, "Error performing mimetype blackboard-lookup for " + abstractFile.getName(), ex); + } + + try { + // check UDF and TDF + identifiedFileType = detectAndPostToBlackboard(abstractFile); + if (identifiedFileType != null && !identifiedFileType.isEmpty()) { + return identifiedFileType; + } + } catch (TskCoreException ex) { + logger.log(Level.WARNING, "Error determining the mimetype for " + abstractFile.getName(), ex); // NON-NLS + return ""; // NON-NLS + } + + logger.log(Level.WARNING, "Unable to determine the mimetype for {0}", abstractFile.getName()); // NON-NLS + return ""; // NON-NLS + } + /** * Detect the MIME type of a file, posting it to the blackboard if detection * succeeds. * * @param file The file to test. - * @param moduleName The name of the module posting to the blackboard. * @return The MIME type name id detection was successful, null otherwise. * @throws TskCoreException if there is an error posting to the blackboard. */ - public synchronized String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException { - String mimeType = detect(file); + public String detectAndPostToBlackboard(AbstractFile file) throws TskCoreException { + + String mimeType; + mimeType = detect(file); if (null != mimeType) { /** * Add the file type attribute to the general info artifact. Note @@ -125,6 +172,13 @@ public class FileTypeDetector { * @return The MIME type name id detection was successful, null otherwise. */ public String detect(AbstractFile file) throws TskCoreException { + // Consistently mark unallocated and unused space as file type application/octet-stream + if ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) + || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS) + || (file.isFile() == false)) { + return MimeTypes.OCTET_STREAM; + } + String fileType = detectUserDefinedType(file); if (null == fileType) { try { diff --git a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeIdIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeIdIngestModule.java index f1fe3e26bd..6625c7c616 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeIdIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeIdIngestModule.java @@ -27,7 +27,6 @@ import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.IngestMessage; import org.sleuthkit.autopsy.ingest.IngestServices; import org.sleuthkit.datamodel.AbstractFile; -import org.sleuthkit.datamodel.TskData; import org.sleuthkit.datamodel.TskData.FileKnown; import org.sleuthkit.autopsy.ingest.IngestModule.ProcessResult; import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter; @@ -95,15 +94,6 @@ public class FileTypeIdIngestModule implements FileIngestModule { @Override public ProcessResult process(AbstractFile file) { - /** - * Skip unallocated space and unused blocks files. - */ - if ((file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) - || (file.getType() == TskData.TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS) - || (file.isFile() == false)) { - return ProcessResult.OK; - } - /** * Skip known files if configured to do so. */ diff --git a/Core/src/org/sleuthkit/autopsy/modules/sevenzip/Bundle.properties b/Core/src/org/sleuthkit/autopsy/modules/sevenzip/Bundle.properties index f0540e3482..badd35146a 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/sevenzip/Bundle.properties +++ b/Core/src/org/sleuthkit/autopsy/modules/sevenzip/Bundle.properties @@ -29,3 +29,4 @@ SevenZipIngestModule.unpack.encrFileDetected.msg=Encrypted files in archive dete SevenZipIngestModule.unpack.encrFileDetected.details=Some files in archive\: {0} are encrypted. {1} extractor was unable to extract all files from this archive. SevenZipIngestModule.UnpackStream.write.exception.msg=Error writing unpacked file to\: {0} SevenZipIngestModule.UnpackedTree.exception.msg=Error adding a derived file to db\:{0} +SevenZipIngestModule.startUp.fileTypeDetectorInitializationException.msg=Error initializing the File Type Detector. diff --git a/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java index 9ccb53ae17..59553ae89f 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java @@ -24,7 +24,6 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.Date; @@ -62,6 +61,7 @@ import org.sleuthkit.autopsy.ingest.ModuleDataEvent; import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter; import net.sf.sevenzipjbinding.ArchiveFormat; import static net.sf.sevenzipjbinding.ArchiveFormat.RAR; +import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector; /** * 7Zip ingest module extracts supported archives, adds extracted DerivedFiles, @@ -87,13 +87,10 @@ public final class SevenZipIngestModule implements FileIngestModule { private static final long MIN_FREE_DISK_SPACE = 1 * 1000 * 1000000L; //1GB //counts archive depth private ArchiveDepthCountTree archiveDepthCountTree; - //buffer for checking file headers and signatures - private static final int readHeaderSize = 4; - private final byte[] fileHeaderBuffer = new byte[readHeaderSize]; - private static final int ZIP_SIGNATURE_BE = 0x504B0304; private IngestJobContext context; private long jobId; private final static IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter(); + private FileTypeDetector fileTypeDetector; SevenZipIngestModule() { } @@ -103,6 +100,13 @@ public final class SevenZipIngestModule implements FileIngestModule { this.context = context; jobId = context.getJobId(); + try { + fileTypeDetector = new FileTypeDetector(); + } catch (FileTypeDetector.FileTypeDetectorInitException ex) { + logger.log(Level.SEVERE, NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.startUp.fileTypeDetectorInitializationException.msg"), ex); + throw new IngestModuleException(NbBundle.getMessage(this.getClass(), "SevenZipIngestModule.startUp.fileTypeDetectorInitializationException.msg")); + } + final Case currentCase = Case.getCurrentCase(); moduleDirRelative = Case.getModulesOutputDirRelPath() + File.separator + ArchiveFileExtractorModuleFactory.getModuleName(); @@ -657,24 +661,7 @@ public final class SevenZipIngestModule implements FileIngestModule { * @return true if zip file, false otherwise */ private boolean isZipFileHeader(AbstractFile file) { - if (file.getSize() < readHeaderSize) { - return false; - } - - try { - int bytesRead = file.read(fileHeaderBuffer, 0, readHeaderSize); - if (bytesRead != readHeaderSize) { - return false; - } - } catch (TskCoreException ex) { - //ignore if can't read the first few bytes, not a ZIP - return false; - } - - ByteBuffer bytes = ByteBuffer.wrap(fileHeaderBuffer); - int signature = bytes.getInt(); - - return signature == ZIP_SIGNATURE_BE; + return fileTypeDetector.getFileType(file).equals("application/zip"); //NON-NLS } /** diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties index 03c68141f3..7e784739a7 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties @@ -283,3 +283,4 @@ KeywordSearchModuleFactory.createFileIngestModule.exception.msg=Expected setting SearchRunner.Searcher.done.err.msg=Error performing keyword search KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.toolTipText=Fastest overall, but no results until the end KeywordSearchGlobalSearchSettingsPanel.timeRadioButton5.text=No periodic searches +KeywordSearchIngestModule.startUp.fileTypeDetectorInitializationException.msg=Error initializing the File Type Detector. diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java index 97e4e14119..5a4931574c 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -37,8 +37,6 @@ import org.sleuthkit.autopsy.ingest.IngestServices; import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException; import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector; import org.sleuthkit.datamodel.AbstractFile; -import org.sleuthkit.datamodel.BlackboardAttribute; -import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskData; import org.sleuthkit.datamodel.TskData.FileKnown; @@ -74,7 +72,8 @@ public final class KeywordSearchIngestModule implements FileIngestModule { private final IngestServices services = IngestServices.getInstance(); private Ingester ingester = null; private Indexer indexer; - //only search images from current ingest, not images previously ingested/indexed + private FileTypeDetector fileTypeDetector; +//only search images from current ingest, not images previously ingested/indexed //accessed read-only by searcher thread private boolean startedSearching = false; @@ -130,6 +129,12 @@ public final class KeywordSearchIngestModule implements FileIngestModule { jobId = context.getJobId(); dataSourceId = context.getDataSource().getId(); + try { + fileTypeDetector = new FileTypeDetector(); + } catch (FileTypeDetector.FileTypeDetectorInitException ex) { + logger.log(Level.SEVERE, NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.startUp.fileTypeDetectorInitializationException.msg"), ex); + throw new IngestModuleException(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.startUp.fileTypeDetectorInitializationException.msg")); + } ingester = Server.getIngester(); this.context = context; @@ -470,30 +475,10 @@ public final class KeywordSearchIngestModule implements FileIngestModule { return; } - - - // try to get the file type from the BB - String detectedFormat = null; - try { - ArrayList attributes = aFile.getGenInfoAttributes(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG); - for (BlackboardAttribute attribute : attributes) { - detectedFormat = attribute.getValueString(); - break; - } - } catch (TskCoreException ex) { - } - // else, use FileType module to detect the format + String detectedFormat = fileTypeDetector.getFileType(aFile); if (detectedFormat == null) { - try { - detectedFormat = new FileTypeDetector().detectAndPostToBlackboard(aFile); - } catch (FileTypeDetector.FileTypeDetectorInitException | TskCoreException ex) { - logger.log(Level.WARNING, "Could not detect format using file type detector for file: {0}", aFile); //NON-NLS - return; - } - if (detectedFormat == null) { - logger.log(Level.WARNING, "Could not detect format using file type detector for file: {0}", aFile); //NON-NLS - return; - } + logger.log(Level.WARNING, "Could not detect format using fileTypeDetector for file: {0}", aFile); //NON-NLS + return; } // we skip archive formats that are opened by the archive module.