From 30a84fdceb14f8440aec15a3a37cd166d47ce8a3 Mon Sep 17 00:00:00 2001 From: sidheshenator Date: Tue, 19 May 2015 13:15:07 -0400 Subject: [PATCH] getFileType() added and used --- .../exif/ExifParserFileIngestModule.java | 10 +++- .../modules/filetypeid/FileTypeDetector.java | 46 +++++++++++++++++++ .../sevenzip/SevenZipIngestModule.java | 31 ++++--------- .../KeywordSearchIngestModule.java | 34 ++++---------- 4 files changed, 74 insertions(+), 47 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java index a045f48a5e..7cc563a7a2 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/modules/exif/ExifParserFileIngestModule.java @@ -34,6 +34,7 @@ import java.util.Collection; import java.util.Date; import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; +import org.openide.util.Exceptions; import org.sleuthkit.autopsy.coreutils.ImageUtils; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.ingest.FileIngestModule; @@ -41,6 +42,7 @@ import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.IngestServices; import org.sleuthkit.autopsy.ingest.ModuleDataEvent; import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter; +import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; @@ -63,6 +65,7 @@ public final class ExifParserFileIngestModule implements FileIngestModule { private volatile boolean filesToFire = false; private long jobId; private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter(); + private static FileTypeDetector fileTypeDetector; ExifParserFileIngestModule() { } @@ -71,6 +74,11 @@ public final class ExifParserFileIngestModule implements FileIngestModule { public void startUp(IngestJobContext context) throws IngestModuleException { jobId = context.getJobId(); refCounter.incrementAndGet(jobId); + try { + fileTypeDetector = new FileTypeDetector(); + } catch (FileTypeDetector.FileTypeDetectorInitException ex) { + logger.log(Level.WARNING, "Error initializing FileTypeDetector", ex); // NON-NLS + } } @@ -197,7 +205,7 @@ public final class ExifParserFileIngestModule implements FileIngestModule { * @return true if to be processed */ private boolean parsableFormat(AbstractFile f) { - return ImageUtils.isJpegFileHeader(f); + return fileTypeDetector.getFileType(f).equals("image/jpeg"); } @Override diff --git a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java index bd7418d2a2..08631210ec 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java +++ b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java @@ -18,11 +18,14 @@ */ package org.sleuthkit.autopsy.modules.filetypeid; +import java.util.ArrayList; import java.util.Map; import java.util.SortedSet; +import java.util.logging.Level; import org.apache.tika.Tika; import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MimeTypes; +import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; @@ -37,6 +40,7 @@ public class FileTypeDetector { private static final int BUFFER_SIZE = 64 * 1024; private final byte buffer[] = new byte[BUFFER_SIZE]; private final Map userDefinedFileTypes; + private static final Logger logger = Logger.getLogger(FileTypeDetector.class.getName()); /** * Constructs an object that detects the type of a file by an inspection of @@ -93,6 +97,48 @@ public class FileTypeDetector { return false; } + /** + * This method returns a string representing the mimetype of the provided + * abstractFile. Blackboard-lookup is performed to check if the mimetype has + * been already detected. If not, mimetype is determined using Apache Tika. + * + * @param abstractFile the file whose mimetype is to be determined. + * @return mimetype of the abstractFile is returned. Null value returned in + * case of error. + */ + public synchronized String getFileType(AbstractFile abstractFile) { + String identifiedFileType = null; + + // check BB + ArrayList attributes = null; + try { + attributes = abstractFile.getGenInfoAttributes(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG); + } catch (TskCoreException ex) { + logger.log(Level.WARNING, "Error performing mimetype blackboard-lookup for " + abstractFile.getName(), ex); + } + for (BlackboardAttribute attribute : attributes) { + identifiedFileType = attribute.getValueString(); + break; + } + + if (identifiedFileType != null) { + return identifiedFileType; + } + + try { + // check UDF and TDF + identifiedFileType = detectAndPostToBlackboard(abstractFile); + if (identifiedFileType != null) { + return identifiedFileType; + } + } catch (TskCoreException ex) { + logger.log(Level.WARNING, "Error determining the mimetype for " + abstractFile.getName(), ex); + return null; + } + + return null; + } + /** * Detect the MIME type of a file, posting it to the blackboard if detection * succeeds. diff --git a/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java b/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java index 9ccb53ae17..70413e532a 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java +++ b/Core/src/org/sleuthkit/autopsy/modules/sevenzip/SevenZipIngestModule.java @@ -62,6 +62,7 @@ import org.sleuthkit.autopsy.ingest.ModuleDataEvent; import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter; import net.sf.sevenzipjbinding.ArchiveFormat; import static net.sf.sevenzipjbinding.ArchiveFormat.RAR; +import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector; /** * 7Zip ingest module extracts supported archives, adds extracted DerivedFiles, @@ -87,13 +88,10 @@ public final class SevenZipIngestModule implements FileIngestModule { private static final long MIN_FREE_DISK_SPACE = 1 * 1000 * 1000000L; //1GB //counts archive depth private ArchiveDepthCountTree archiveDepthCountTree; - //buffer for checking file headers and signatures - private static final int readHeaderSize = 4; - private final byte[] fileHeaderBuffer = new byte[readHeaderSize]; - private static final int ZIP_SIGNATURE_BE = 0x504B0304; private IngestJobContext context; private long jobId; private final static IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter(); + private static FileTypeDetector fileTypeDetector; SevenZipIngestModule() { } @@ -103,6 +101,12 @@ public final class SevenZipIngestModule implements FileIngestModule { this.context = context; jobId = context.getJobId(); + try { + fileTypeDetector = new FileTypeDetector(); + } catch (FileTypeDetector.FileTypeDetectorInitException ex) { + logger.log(Level.WARNING, "Error initializing FileTypeDetector", ex); // NON-NLS + } + final Case currentCase = Case.getCurrentCase(); moduleDirRelative = Case.getModulesOutputDirRelPath() + File.separator + ArchiveFileExtractorModuleFactory.getModuleName(); @@ -657,24 +661,7 @@ public final class SevenZipIngestModule implements FileIngestModule { * @return true if zip file, false otherwise */ private boolean isZipFileHeader(AbstractFile file) { - if (file.getSize() < readHeaderSize) { - return false; - } - - try { - int bytesRead = file.read(fileHeaderBuffer, 0, readHeaderSize); - if (bytesRead != readHeaderSize) { - return false; - } - } catch (TskCoreException ex) { - //ignore if can't read the first few bytes, not a ZIP - return false; - } - - ByteBuffer bytes = ByteBuffer.wrap(fileHeaderBuffer); - int signature = bytes.getInt(); - - return signature == ZIP_SIGNATURE_BE; + return fileTypeDetector.getFileType(file).equals("application/zip"); //NON-NLS } /** diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java index 80462dd1af..a891f2721b 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -74,7 +74,8 @@ public final class KeywordSearchIngestModule implements FileIngestModule { private final IngestServices services = IngestServices.getInstance(); private Ingester ingester = null; private Indexer indexer; - //only search images from current ingest, not images previously ingested/indexed + private static FileTypeDetector fileTypeDetector; +//only search images from current ingest, not images previously ingested/indexed //accessed read-only by searcher thread private boolean startedSearching = false; @@ -130,6 +131,11 @@ public final class KeywordSearchIngestModule implements FileIngestModule { jobId = context.getJobId(); dataSourceId = context.getDataSource().getId(); + try { + fileTypeDetector = new FileTypeDetector(); + } catch (FileTypeDetector.FileTypeDetectorInitException ex) { + logger.log(Level.WARNING, "Error initializing FileTypeDetector", ex); // NON-NLS + } ingester = Server.getIngester(); this.context = context; @@ -469,30 +475,10 @@ public final class KeywordSearchIngestModule implements FileIngestModule { return; } - - - // try to get the file type from the BB - String detectedFormat = null; - try { - ArrayList attributes = aFile.getGenInfoAttributes(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_FILE_TYPE_SIG); - for (BlackboardAttribute attribute : attributes) { - detectedFormat = attribute.getValueString(); - break; - } - } catch (TskCoreException ex) { - } - // else, use FileType module to detect the format + String detectedFormat = fileTypeDetector.getFileType(aFile); if (detectedFormat == null) { - try { - detectedFormat = new FileTypeDetector().detectAndPostToBlackboard(aFile); - } catch (FileTypeDetector.FileTypeDetectorInitException | TskCoreException ex) { - logger.log(Level.WARNING, "Could not detect format using file type detector for file: {0}", aFile); //NON-NLS - return; - } - if (detectedFormat == null) { - logger.log(Level.WARNING, "Could not detect format using file type detector for file: {0}", aFile); //NON-NLS - return; - } + logger.log(Level.WARNING, "Could not detect format using fileTypeDetector for file: {0}", aFile); //NON-NLS + return; } // we skip archive formats that are opened by the archive module.