Added new mbox parser.

This commit is contained in:
Jeff Wallace 2013-11-13 17:04:44 -05:00
parent 1181667fdd
commit e2e4fd59df
12 changed files with 626 additions and 7 deletions

View File

@ -1,3 +1,4 @@
file.reference.java-libpst-1.0-SNAPSHOT.jar=release/modules/ext/java-libpst-1.0-SNAPSHOT.jar
javac.source=1.7
javac.compilerargs=-Xlint -Xlint:-serial
license.file=../LICENSE-2.0.txt

View File

@ -26,10 +26,42 @@
</dependency>
</module-dependencies>
<public-packages/>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-dom-0.8.0-SNAPSHOT-sources.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT-sources.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-core-0.8.0-SNAPSHOT-sources.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT-sources.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-project-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-project-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/java-libpst-1.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/java-libpst-1.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-project-0.8.0-SNAPSHOT-tests.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-project-0.8.0-SNAPSHOT-tests.jar</binary-origin>
</class-path-extension>
</data>
</configuration>
</project>

View File

@ -0,0 +1,235 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2013 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
/**
*
* @author jwallace
*/
public class EmailMessage {
private String recipients = "";
private String bcc = "";
private String cc = "";
private String sender = "";
private String subject = "";
private String textBody = "";
private String htmlBody = "";
private String rtfBody = "";
private String localPath = "";
private boolean hasAttachment = false;
private long sentDate = 0L;
private List<Attachment> attachments = new ArrayList<>();
private long id = -1L;
boolean hasAttachment() {
return hasAttachment;
}
String getRecipients() {
return recipients;
}
void setRecipients(String recipients) {
this.recipients = recipients;
}
String getSender() {
return sender;
}
void setSender(String sender) {
this.sender = sender;
}
String getSubject() {
return subject;
}
void setSubject(String subject) {
this.subject = subject;
}
String getTextBody() {
return textBody;
}
void setTextBody(String textBody) {
this.textBody = textBody;
}
String getHtmlBody() {
return htmlBody;
}
void setHtmlBody(String htmlBody) {
this.htmlBody = htmlBody;
}
String getRtfBody() {
return rtfBody;
}
void setRtfBody(String rtfBody) {
this.rtfBody = rtfBody;
}
long getSentDate() {
return sentDate;
}
void setSentDate(Date sentDate) {
this.sentDate = sentDate.getTime() / 1000;
}
void setSentDate(long sentDate) {
this.sentDate = sentDate;
}
String getBcc() {
return bcc;
}
void setBcc(String bcc) {
this.bcc = bcc;
}
String getCc() {
return cc;
}
void setCc(String cc) {
this.cc = cc;
}
void addAttachment(Attachment a) {
attachments.add(a);
hasAttachment = true;
}
List<Attachment> getAttachments() {
return attachments;
}
long getId() {
return id;
}
void setId(long id) {
this.id = id;
}
String getLocalPath() {
return localPath;
}
void setLocalPath(String localPath) {
this.localPath = localPath;
}
}
/**
*
* @author jwallace
*/
class Attachment {
private String name = "";
private String localPath = "";
private long size = 0L;
private long crTime = 0L;
private long cTime = 0L;
private long aTime = 0L;
private long mTime = 0L;
String getName() {
return name;
}
void setName(String name) {
this.name = name;
}
String getLocalPath() {
return localPath;
}
void setLocalPath(String localPath) {
this.localPath = localPath;
}
long getSize() {
return size;
}
void setSize(long size) {
this.size = size;
}
long getCrTime() {
return crTime;
}
void setCrTime(long crTime) {
this.crTime = crTime;
}
void setCrTime(Date crTime) {
this.crTime = crTime.getTime() / 1000;
}
long getcTime() {
return cTime;
}
void setcTime(long cTime) {
this.cTime = cTime;
}
void setcTime(Date cTime) {
this.cTime = cTime.getTime() / 1000;
}
long getaTime() {
return aTime;
}
void setaTime(long aTime) {
this.aTime = aTime;
}
void setaTime(Date aTime) {
this.aTime = aTime.getTime() / 1000;
}
long getmTime() {
return mTime;
}
void setmTime(long mTime) {
this.mTime = mTime;
}
void setmTime(Date mTime) {
this.mTime = mTime.getTime() / 1000;
}
}

View File

@ -0,0 +1,209 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2013 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.dom.BinaryBody;
import org.apache.james.mime4j.dom.Entity;
import org.apache.james.mime4j.dom.Message;
import org.apache.james.mime4j.dom.MessageBuilder;
import org.apache.james.mime4j.dom.Multipart;
import org.apache.james.mime4j.dom.TextBody;
import org.apache.james.mime4j.dom.address.AddressList;
import org.apache.james.mime4j.dom.address.Mailbox;
import org.apache.james.mime4j.dom.address.MailboxList;
import org.apache.james.mime4j.dom.field.ContentDispositionField;
import org.apache.james.mime4j.dom.field.ContentTypeField;
import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
import org.apache.james.mime4j.mboxiterator.MboxIterator;
import org.apache.james.mime4j.message.DefaultMessageBuilder;
/**
*
* @author jwallace
*/
public class MboxParser {
private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
private MessageBuilder messageBuilder;
private static final String HTML_TYPE = "text/html";
private String localPath = null;
MboxParser() {
messageBuilder = new DefaultMessageBuilder();
}
MboxParser(String localPath) {
this();
this.localPath = localPath;
}
List<EmailMessage> parse(File mboxFile) {
//JWTODO: detect charset
CharsetEncoder encoder = StandardCharsets.ISO_8859_1.newEncoder();
List<EmailMessage> emails = new ArrayList<>();
try {
for (CharBufferWrapper message : MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build()) {
try {
Message msg = messageBuilder.parseMessage(message.asInputStream(encoder.charset()));
emails.add(extractEmail(msg));
} catch (MimeException ex) {
logger.log(Level.WARNING, "Failed to get message from mbox.", ex);
}
}
} catch (FileNotFoundException ex) {
logger.log(Level.WARNING, "couldn't find mbox file.", ex);
} catch (IOException ex) {
logger.log(Level.WARNING, "Error getting messsages from mbox file.");
}
return emails;
}
private EmailMessage extractEmail(Message msg) {
EmailMessage email = new EmailMessage();
// Basic Info
email.setSender(getAddresses(msg.getFrom()));
email.setRecipients(getAddresses(msg.getTo()));
email.setBcc(getAddresses(msg.getBcc()));
email.setCc(getAddresses(msg.getCc()));
email.setSubject(msg.getSubject());
email.setSentDate(msg.getDate());
if (localPath != null) {
email.setLocalPath(localPath);
}
// Body
if (msg.isMultipart()) {
handleMultipart(email, (Multipart) msg.getBody());
} else {
handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType());
}
return email;
}
private void handleMultipart(EmailMessage email, Multipart multi) {
for (Entity e : multi.getBodyParts()) {
if (e.isMultipart()) {
handleMultipart(email, (Multipart) e.getBody());
} else if (e.getDispositionType() != null
&& e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
handleAttachment(email, e);
} else if (e.getMimeType().equals(HTML_TYPE) ||
e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
handleTextBody(email, (TextBody) e.getBody(), e.getMimeType());
} else {
logger.log(Level.INFO, "Found unrecognized entity: " + e);
}
}
}
private void handleTextBody(EmailMessage email, TextBody tb, String type) {
BufferedReader r;
try {
r = new BufferedReader(tb.getReader());
StringBuilder bodyString = new StringBuilder();
String line = "";
while ((line = r.readLine()) != null) {
bodyString.append(line).append("\n");
}
switch (type) {
case ContentTypeField.TYPE_TEXT_PLAIN:
email.setTextBody(bodyString.toString());
break;
case HTML_TYPE:
email.setHtmlBody(bodyString.toString());
break;
default:
logger.log(Level.INFO, "Found unrecognized mime type: " + type);
}
} catch (IOException ex) {
logger.log(Level.WARNING, "Error getting text body of mbox message", ex);
}
}
private void handleAttachment(EmailMessage email, Entity e) {
String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
String filename = e.getFilename();
String outPath = outputDirPath + filename;
FileOutputStream fos;
BinaryBody bb;
try {
fos = new FileOutputStream(outPath);
} catch (FileNotFoundException ex) {
logger.log(Level.INFO, "", ex);
return;
}
try {
bb = (BinaryBody) e.getBody();
bb.writeTo(fos);
} catch (IOException ex) {
logger.log(Level.INFO, "", ex);
return;
} finally {
try {
fos.close();
} catch (IOException ex) {
logger.log(Level.INFO, "Failed to close file output stream", ex);
}
}
Attachment attach = new Attachment();
attach.setName(filename);
attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath()
+ File.separator + filename);
// JWTODO: find appropriate constant or make one.
// ContentDispositionField disposition = (ContentDispositionField) e.getHeader().getField("Content-Disposition");
// if (disposition != null) {
// attach.setSize(disposition.getSize());
// attach.setCrTime(disposition.getCreationDate());
// attach.setmTime(disposition.getModificationDate());
// attach.setaTime(disposition.getReadDate());
// }
email.addAttachment(attach);
}
private String getAddresses(MailboxList mailboxList) {
if (mailboxList == null) {
return "";
}
StringBuilder addresses = new StringBuilder();
for (Mailbox m : mailboxList) {
addresses.append(m.toString()).append("; ");
}
return addresses.toString();
}
private String getAddresses(AddressList addressList) {
return (addressList == null) ? "" : getAddresses(addressList.flatten());
}
}

View File

@ -123,7 +123,7 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile {
}
if (isMbox) {
return processMBox(abstractFile);
return processMBox(abstractFile, ingestContext);
}
int extIndex = abstractFile.getName().lastIndexOf(".");
@ -135,7 +135,7 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile {
return ProcessResult.OK;
}
private ProcessResult processMBox(AbstractFile abstractFile) {
private ProcessResult processMBoxOld(AbstractFile abstractFile) {
logger.log(Level.INFO, "ThunderbirdMboxFileIngestModule: Parsing {0}", abstractFile.getName());
String mboxFileName = abstractFile.getName();
@ -175,7 +175,6 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile {
// logger.log(Level.WARNING, "Unable to obtain msf file for mbox parsing:" + msfName, ex);
// }
// use the local path to determine the e-mail folder structure
String emailFolder = "";
// email folder is everything after "Mail" or ImapMail
@ -511,7 +510,7 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile {
* Get a path to a temporary folder.
* @return
*/
private static String getTempPath() {
public static String getTempPath() {
String tmpDir = Case.getCurrentCase().getTempDirectory() + File.separator
+ "EmailParser";
File dir = new File(tmpDir);
@ -521,7 +520,7 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile {
return tmpDir;
}
private static String getModuleOutputPath() {
public static String getModuleOutputPath() {
String outDir = Case.getCurrentCase().getModulesOutputDirAbsPath() + File.separator +
MODULE_NAME;
File dir = new File(outDir);
@ -531,7 +530,7 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile {
return outDir;
}
private static String getRelModuleOutputPath() {
public static String getRelModuleOutputPath() {
return Case.getModulesOutputDirRelPath() + File.separator +
MODULE_NAME;
}
@ -570,4 +569,147 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile {
public boolean hasBackgroundJobsRunning() {
return false;
}
private ProcessResult processMBox(AbstractFile abstractFile, PipelineContext<IngestModuleAbstractFile>ingestContext) {
String mboxFileName = abstractFile.getName();
String mboxParentDir = abstractFile.getParentPath();
// use the local path to determine the e-mail folder structure
String emailFolder = "";
// email folder is everything after "Mail" or ImapMail
if (mboxParentDir.contains("/Mail/")) {
emailFolder = mboxParentDir.substring(mboxParentDir.indexOf("/Mail/") + 5);
}
else if (mboxParentDir.contains("/ImapMail/")) {
emailFolder = mboxParentDir.substring(mboxParentDir.indexOf("/ImapMail/") + 9);
}
emailFolder = emailFolder + mboxFileName;
emailFolder = emailFolder.replaceAll(".sbd", "");
String fileName = getTempPath() + File.separator + abstractFile.getName()
+ "-" + String.valueOf(abstractFile.getId());
File file = new File(fileName);
if (abstractFile.getSize() >= services.getFreeDiskSpace()) {
logger.log(Level.WARNING, "Not enough disk space to write file to disk.");
IngestMessage msg = IngestMessage.createErrorMessage(messageId++, this, getName(), "Out of disk space. Can't copy " + abstractFile.getName() + " to parse.");
services.postMessage(msg);
return ProcessResult.OK;
}
try {
ContentUtils.writeToFile(abstractFile, file);
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed writing mbox file to disk.", ex);
return ProcessResult.OK;
}
MboxParser parser = new MboxParser(emailFolder);
List<EmailMessage> emails = parser.parse(file);
processEmails(emails, abstractFile, ingestContext);
return ProcessResult.OK;
}
private void processEmails(List<EmailMessage> emails, AbstractFile abstractFile, PipelineContext<IngestModuleAbstractFile>ingestContext) {
List<AbstractFile> derivedFiles = new ArrayList<>();
for (EmailMessage email : emails) {
if (email.hasAttachment()) {
derivedFiles.addAll(handleAttachments(email.getAttachments(), abstractFile));
}
addArtifact(email, abstractFile);
}
if (derivedFiles.isEmpty() == false) {
for (AbstractFile derived : derivedFiles) {
services.fireModuleContentEvent(new ModuleContentEvent(abstractFile));
services.scheduleFile(derived, ingestContext);
}
}
services.fireModuleDataEvent(new ModuleDataEvent(MODULE_NAME, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
}
private List<AbstractFile> handleAttachments(List<Attachment> attachments, AbstractFile abstractFile) {
List<AbstractFile> files = new ArrayList<>();
for (Attachment attach : attachments) {
String filename = attach.getName();
long crTime = attach.getCrTime();
long mTime = attach.getmTime();
long aTime = attach.getaTime();
long cTime = attach.getcTime();
String relPath = attach.getLocalPath();
long size = attach.getSize();
try {
DerivedFile df = fileManager.addDerivedFile(filename, relPath,
size, cTime, crTime, aTime, mTime, true, abstractFile, "",
MODULE_NAME, MODULE_VERSION, "");
files.add(df);
} catch (TskCoreException ex) {
// JWTODO
logger.log(Level.INFO, "", ex);
}
}
return files;
}
private void addArtifact(EmailMessage email, AbstractFile abstractFile) {
List<BlackboardAttribute> bbattributes = new ArrayList<>();
String to = email.getRecipients();
String cc = email.getCc();
String bcc = email.getBcc();
String from = email.getSender();
long dateL = email.getSentDate();
String body = email.getTextBody();
String bodyHTML = email.getHtmlBody();
String rtf = email.getRtfBody();
String subject = email.getSubject();
long id = email.getId();
String localPath = email.getLocalPath();
if (to.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), MODULE_NAME, to));
}
if (cc.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), MODULE_NAME, cc));
}
if (bcc.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), MODULE_NAME, bcc));
}
if (from.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), MODULE_NAME, from));
}
if (dateL > 0) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), MODULE_NAME, dateL));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), MODULE_NAME, dateL));
}
if (body.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), MODULE_NAME, body));
}
if (bodyHTML.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), MODULE_NAME, bodyHTML));
}
if (rtf.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_RTF.getTypeID(), MODULE_NAME, rtf));
}
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), MODULE_NAME, ((id < 0L) ? "Not available" : String.valueOf(id))));
if (subject.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), MODULE_NAME, subject));
}
if (localPath.isEmpty() == false) {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PATH.getTypeID(), MODULE_NAME, localPath));
} else {
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PATH.getTypeID(), MODULE_NAME, "/foo/bar"));
}
try {
BlackboardArtifact bbart;
bbart = abstractFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG);
bbart.addAttributes(bbattributes);
} catch (TskCoreException ex) {
logger.log(Level.WARNING, null, ex);
}
}
}