From 78687b1babd7311b2c299fa0a5161324894685be Mon Sep 17 00:00:00 2001 From: Brian Carrier Date: Mon, 22 Jul 2013 09:56:12 -0400 Subject: [PATCH 1/3] Cleaned up thunderbird parser a bit and added comments --- .../ThunderbirdEmailParser.java | 2 +- .../ThunderbirdMboxFileIngestModule.java | 269 +++++++++--------- 2 files changed, 137 insertions(+), 134 deletions(-) diff --git a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdEmailParser.java b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdEmailParser.java index 6a69776dcb..11d2ca91b7 100644 --- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdEmailParser.java +++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdEmailParser.java @@ -94,7 +94,7 @@ public class ThunderbirdEmailParser { return this.tika.detect(firstFewBytes, inDocName); } - public boolean isValidMimeTypeMbox(byte[] buffer) { + static public boolean isValidMimeTypeMbox(byte[] buffer) { return (new String(buffer)).startsWith("From "); } diff --git a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java index b2d7384c3c..926ea686a0 100644 --- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java +++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011 Basis Technology Corp. + * Copyright 2011-2013 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,30 +29,33 @@ import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.logging.Level; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; -import org.sleuthkit.autopsy.ingest.IngestServices; -import org.sleuthkit.autopsy.ingest.IngestModuleAbstract.*; -import org.sleuthkit.autopsy.ingest.IngestModuleAbstractFile; -import org.sleuthkit.autopsy.ingest.ModuleDataEvent; -import org.sleuthkit.datamodel.BlackboardArtifact; -import org.sleuthkit.datamodel.BlackboardAttribute; -import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; -import org.sleuthkit.datamodel.ReadContentInputStream; -import org.sleuthkit.datamodel.TskCoreException; -import org.sleuthkit.datamodel.TskException; -import org.xml.sax.SAXException; -import org.apache.commons.lang.StringEscapeUtils; import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.datamodel.ContentUtils; -import org.sleuthkit.autopsy.ingest.PipelineContext; +import org.sleuthkit.autopsy.ingest.IngestModuleAbstractFile; import org.sleuthkit.autopsy.ingest.IngestModuleInit; +import org.sleuthkit.autopsy.ingest.IngestServices; +import org.sleuthkit.autopsy.ingest.ModuleDataEvent; +import org.sleuthkit.autopsy.ingest.PipelineContext; import org.sleuthkit.datamodel.AbstractFile; +import org.sleuthkit.datamodel.BlackboardArtifact; +import org.sleuthkit.datamodel.BlackboardAttribute; +import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; import org.sleuthkit.datamodel.Content; +import org.sleuthkit.datamodel.ReadContentInputStream; import org.sleuthkit.datamodel.SleuthkitCase; +import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskData; +import org.sleuthkit.datamodel.TskException; +import org.xml.sax.SAXException; +/** + * File-level ingest module that detects MBOX files based on signature. + * Understands Thunderbird folder layout to provide additional structure and metadata. + */ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile { private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()); @@ -73,91 +76,93 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile { @Override public ProcessResult process(PipelineContextingestContext, AbstractFile abstractFile) { - ThunderbirdEmailParser mbox = new ThunderbirdEmailParser(); - boolean isMbox = false; - - IngestModuleAbstractFile.ProcessResult hashDBResult = - services.getAbstractFileModuleResult(hashDBModuleName); - if (abstractFile.getKnown().equals( TskData.FileKnown.KNOWN)) { return ProcessResult.OK; //file is known, stop processing it - } else if (hashDBResult == IngestModuleAbstractFile.ProcessResult.ERROR) { + } + + IngestModuleAbstractFile.ProcessResult hashDBResult = + services.getAbstractFileModuleResult(hashDBModuleName); + if (hashDBResult == IngestModuleAbstractFile.ProcessResult.ERROR) { return ProcessResult.ERROR; //file has read error, stop processing it } if (abstractFile.isVirtual()) { return ProcessResult.OK; } - + + boolean isMbox = false; try { byte[] t = new byte[64]; if (abstractFile.getSize() > 64) { int byteRead = abstractFile.read(t, 0, 64); if (byteRead > 0) { - isMbox = mbox.isValidMimeTypeMbox(t); + isMbox = ThunderbirdEmailParser.isValidMimeTypeMbox(t); } } } catch (TskException ex) { logger.log(Level.WARNING, null, ex); } + + if (isMbox == false) { + return ProcessResult.OK; + } + + logger.log(Level.INFO, "ThunderbirdMboxFileIngestModule: Parsing {0}", abstractFile.getName()); + + String mboxName = abstractFile.getName(); + String msfName = mboxName + ".msf"; + //Long mboxId = fsContent.getId(); + String mboxPath = abstractFile.getParentPath(); + Long msfId = 0L; + currentCase = Case.getCurrentCase(); // get the most updated case + SleuthkitCase tskCase = currentCase.getSleuthkitCase(); - if (isMbox) { - logger.log(Level.INFO, "ThunderbirdMboxFileIngestModule: Parsing {0}", abstractFile.getName()); - - String mboxName = abstractFile.getName(); - String msfName = mboxName + ".msf"; - //Long mboxId = fsContent.getId(); - String mboxPath = abstractFile.getParentPath(); - Long msfId = 0L; - currentCase = Case.getCurrentCase(); // get the most updated case - SleuthkitCase tskCase = currentCase.getSleuthkitCase(); - - - try { - ResultSet resultset = tskCase.runQuery("SELECT obj_id FROM tsk_files WHERE parent_path = '" + mboxPath + "' and name = '" + msfName + "'"); - if (!resultset.next()) { - logger.log(Level.WARNING, "Could not find msf file in mbox dir: " + mboxPath + " file: " + msfName); - tskCase.closeRunQuery(resultset); - return ProcessResult.OK; - } else { - msfId = resultset.getLong(1); - tskCase.closeRunQuery(resultset); - } - - } catch (SQLException ex) { + try { + ResultSet resultset = tskCase.runQuery("SELECT obj_id FROM tsk_files WHERE parent_path = '" + mboxPath + "' and name = '" + msfName + "'"); + if (!resultset.next()) { logger.log(Level.WARNING, "Could not find msf file in mbox dir: " + mboxPath + " file: " + msfName); - } - - try { - Content msfContent = tskCase.getContentById(msfId); - if (msfContent != null) { - ContentUtils.writeToFile(msfContent, new File(currentCase.getTempDirectory() + File.separator + msfName)); - } - } catch (IOException ex) { - logger.log(Level.WARNING, "Unable to obtain msf file for mbox parsing:" + msfName, ex); - } catch (TskCoreException ex) { - logger.log(Level.WARNING, "Unable to obtain msf file for mbox parsing:" + msfName, ex); - } - int index = 0; - String replace = ""; - boolean a = mboxPath.indexOf("/ImapMail/") > 0; - boolean b = mboxPath.indexOf("/Mail/") > 0; - if (b == true) { - index = mboxPath.indexOf("/Mail/"); - replace = "/Mail"; - } else if (a == true) { - index = mboxPath.indexOf("/ImapMail/"); - replace = "/ImapMail"; + tskCase.closeRunQuery(resultset); + return ProcessResult.OK; } else { - replace = ""; - + msfId = resultset.getLong(1); + tskCase.closeRunQuery(resultset); } - String folderPath = mboxPath.substring(index); - folderPath = folderPath.replaceAll(replace, ""); - folderPath = folderPath + mboxName; - folderPath = folderPath.replaceAll(".sbd", ""); + + } catch (SQLException ex) { + logger.log(Level.WARNING, "Could not find msf file in mbox dir: " + mboxPath + " file: " + msfName); + } + + try { + Content msfContent = tskCase.getContentById(msfId); + if (msfContent != null) { + ContentUtils.writeToFile(msfContent, new File(currentCase.getTempDirectory() + File.separator + msfName)); + } + } catch (IOException ex) { + logger.log(Level.WARNING, "Unable to obtain msf file for mbox parsing:" + msfName, ex); + } catch (TskCoreException ex) { + logger.log(Level.WARNING, "Unable to obtain msf file for mbox parsing:" + msfName, ex); + } + int index = 0; + String replace = ""; + boolean a = mboxPath.indexOf("/ImapMail/") > 0; + boolean b = mboxPath.indexOf("/Mail/") > 0; + if (b == true) { + index = mboxPath.indexOf("/Mail/"); + replace = "/Mail"; + } else if (a == true) { + index = mboxPath.indexOf("/ImapMail/"); + replace = "/ImapMail"; + } else { + replace = ""; + + } + + String folderPath = mboxPath.substring(index); + folderPath = folderPath.replaceAll(replace, ""); + folderPath = folderPath + mboxName; + folderPath = folderPath.replaceAll(".sbd", ""); // Reader reader = null; // try { // reader = new FileReader(currentCase.getTempDirectory() + File.separator + msfName); @@ -170,66 +175,64 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile { // String path = dict.getValue("81").toString(); // String account = dict.getValue("8D").toString(); // } - String emailId = ""; - String content = ""; - String from = ""; - String to = ""; - String stringDate = ""; - Long date = 0L; - String subject = ""; - String cc = ""; - String bcc = ""; - try { - ReadContentInputStream contentStream = new ReadContentInputStream(abstractFile); - mbox.parse(contentStream); - HashMap> emailMap = new HashMap>(); - emailMap = mbox.getAllEmails(); - for (Entry> entry : emailMap.entrySet()) { - Map propertyMap = new HashMap(); - emailId = ((entry.getKey() != null) ? entry.getKey() : "Not Available"); - propertyMap = entry.getValue(); - content = ((propertyMap.get("content") != null) ? propertyMap.get("content") : ""); - from = ((propertyMap.get(Metadata.AUTHOR) != null) ? propertyMap.get(Metadata.AUTHOR) : ""); - to = ((propertyMap.get(Metadata.MESSAGE_TO) != null) ? propertyMap.get(Metadata.MESSAGE_TO) : ""); - stringDate = ((propertyMap.get("date") != null) ? propertyMap.get("date") : ""); - if (!"".equals(stringDate)) { - date = mbox.getDateCreated(stringDate); - } - subject = ((propertyMap.get(Metadata.SUBJECT) != null) ? propertyMap.get(Metadata.SUBJECT) : ""); - cc = ((propertyMap.get(Metadata.MESSAGE_CC) != null) ? propertyMap.get(Metadata.MESSAGE_CC) : ""); - bcc = ((propertyMap.get(Metadata.MESSAGE_BCC) != null) ? propertyMap.get(Metadata.MESSAGE_BCC) : ""); - - Collection bbattributes = new ArrayList(); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), MODULE_NAME, to)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), MODULE_NAME, cc)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), MODULE_NAME, bcc)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), MODULE_NAME, from)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), MODULE_NAME, content.replaceAll("\\<[^>]*>", ""))); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), MODULE_NAME, content)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), MODULE_NAME, StringEscapeUtils.escapeHtml(emailId))); - //bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_REPLY_ID.getTypeID(), MODULE_NAME, "",)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), MODULE_NAME, date)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), MODULE_NAME, date)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), MODULE_NAME, subject)); - bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PATH.getTypeID(), MODULE_NAME, folderPath)); - BlackboardArtifact bbart; - try { - bbart = abstractFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG); - bbart.addAttributes(bbattributes); - } catch (TskCoreException ex) { - Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); - } - services.fireModuleDataEvent(new ModuleDataEvent(MODULE_NAME, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG)); + String emailId = ""; + String content = ""; + String from = ""; + String to = ""; + String stringDate = ""; + Long date = 0L; + String subject = ""; + String cc = ""; + String bcc = ""; + ThunderbirdEmailParser mbox = new ThunderbirdEmailParser(); + try { + ReadContentInputStream contentStream = new ReadContentInputStream(abstractFile); + mbox.parse(contentStream); + HashMap> emailMap = new HashMap>(); + emailMap = mbox.getAllEmails(); + for (Entry> entry : emailMap.entrySet()) { + Map propertyMap = new HashMap(); + emailId = ((entry.getKey() != null) ? entry.getKey() : "Not Available"); + propertyMap = entry.getValue(); + content = ((propertyMap.get("content") != null) ? propertyMap.get("content") : ""); + from = ((propertyMap.get(Metadata.AUTHOR) != null) ? propertyMap.get(Metadata.AUTHOR) : ""); + to = ((propertyMap.get(Metadata.MESSAGE_TO) != null) ? propertyMap.get(Metadata.MESSAGE_TO) : ""); + stringDate = ((propertyMap.get("date") != null) ? propertyMap.get("date") : ""); + if (!"".equals(stringDate)) { + date = mbox.getDateCreated(stringDate); } - } catch (FileNotFoundException ex) { - Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); - } catch (IOException ex) { - Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); - } catch (SAXException ex) { - Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); - } catch (TikaException ex) { - Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); + subject = ((propertyMap.get(Metadata.SUBJECT) != null) ? propertyMap.get(Metadata.SUBJECT) : ""); + cc = ((propertyMap.get(Metadata.MESSAGE_CC) != null) ? propertyMap.get(Metadata.MESSAGE_CC) : ""); + bcc = ((propertyMap.get(Metadata.MESSAGE_BCC) != null) ? propertyMap.get(Metadata.MESSAGE_BCC) : ""); + + Collection bbattributes = new ArrayList(); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), MODULE_NAME, to)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), MODULE_NAME, cc)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), MODULE_NAME, bcc)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), MODULE_NAME, from)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), MODULE_NAME, content.replaceAll("\\<[^>]*>", ""))); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), MODULE_NAME, content)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), MODULE_NAME, StringEscapeUtils.escapeHtml(emailId))); + //bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_REPLY_ID.getTypeID(), MODULE_NAME, "",)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), MODULE_NAME, date)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), MODULE_NAME, date)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), MODULE_NAME, subject)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PATH.getTypeID(), MODULE_NAME, folderPath)); + BlackboardArtifact bbart; + try { + bbart = abstractFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG); + bbart.addAttributes(bbattributes); + } catch (TskCoreException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); + } + services.fireModuleDataEvent(new ModuleDataEvent(MODULE_NAME, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG)); } + } catch (FileNotFoundException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); + } catch (IOException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); + } catch (SAXException | TikaException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()).log(Level.WARNING, null, ex); } return ProcessResult.OK; From 524e753e2160ce4519363a5c2e63fef79b5e51dc Mon Sep 17 00:00:00 2001 From: Brian Carrier Date: Mon, 22 Jul 2013 10:00:59 -0400 Subject: [PATCH 2/3] removed ingest manger message that prints every file being analyzed --- Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java b/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java index b6a9d8efb2..171cd5ca61 100644 --- a/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java +++ b/Core/src/org/sleuthkit/autopsy/ingest/IngestManager.java @@ -902,7 +902,7 @@ public class IngestManager { abstractFileModulesRetValues.clear(); } - logger.log(Level.INFO, "IngestManager: Processing: {0}", fileToProcess.getName()); + //logger.log(Level.INFO, "IngestManager: Processing: {0}", fileToProcess.getName()); progress.progress(fileToProcess.getName(), processedFiles); for (IngestModuleAbstractFile module : fileIngestTask.getModules()) { //process the file with every file module From 3d18532fc2b899d81c477dfb72a80586667f14a4 Mon Sep 17 00:00:00 2001 From: Brian Carrier Date: Mon, 22 Jul 2013 10:04:41 -0400 Subject: [PATCH 3/3] removed extra log statemetns about each file being indexed --- .../autopsy/keywordsearch/KeywordSearchIngestModule.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java index 001ae393b0..16aeb4eb58 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -781,7 +781,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAbstractFile { } } } - logger.log(Level.INFO, "Detected format: " + aFile.getName() + " " + detectedFormat); + //logger.log(Level.INFO, "Detected format: " + aFile.getName() + " " + detectedFormat); // we skip archive formats that are opened by the archive module. // @@@ We could have a check here to see if the archive module was enabled though...