Merge pull request #5141 from kellykelly3/1326-eml-parser

1326 eml email file parser
This commit is contained in:
Richard Cordovano 2019-09-16 15:22:07 -04:00 committed by GitHub
commit 2a0ea278e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 504 additions and 353 deletions

View File

@ -1,4 +1,4 @@
MboxParser.handleAttch.noOpenCase.errMsg=Exception while getting open case. MimeJ4MessageParser.handleAttch.noOpenCase.errMsg=Exception while getting open case.
OpenIDE-Module-Display-Category=Ingest Module OpenIDE-Module-Display-Category=Ingest Module
OpenIDE-Module-Long-Description=Email Parser ingest module.\n\nThe module extracts MBOX and PST e-mail files and posts the results to the blackboard.\nIt knows about the Thunderbird folder structure for MBOX files. OpenIDE-Module-Long-Description=Email Parser ingest module.\n\nThe module extracts MBOX and PST e-mail files and posts the results to the blackboard.\nIt knows about the Thunderbird folder structure for MBOX files.
OpenIDE-Module-Name=Email Parser OpenIDE-Module-Name=Email Parser

View File

@ -0,0 +1,72 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.dom.Message;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.ReadContentInputStream;
/**
* EML file parser. An .eml file contains a single email message.
*
*/
class EMLParser extends MimeJ4MessageParser {
/**
* If the extention of the AbstractFile is eml and 'To:' is found close to
* the beginning of the file, then its probably an eml file.
*
* @param abFile AbstractFile to test
* @param buffer A byte buffer of the beginning of the file.
*
* @return True, if we think this is an eml file, false otherwise.
*/
static boolean isEMLFile(AbstractFile abFile, byte[] buffer) {
String ext = abFile.getNameExtension();
boolean isEMLFile = ext != null && ext.equals("eml");
if (isEMLFile) {
isEMLFile = (new String(buffer)).contains("To:"); //NON-NLS
}
return isEMLFile;
}
/**
*
* @param sourceFile AbstractFile source file for eml message
* @param localPath The local path to the eml file
*
* @return EmailMessage object for message in eml file
*
* @throws FileNotFoundException
* @throws IOException
* @throws MimeException
*/
static EmailMessage parse(AbstractFile sourceFile) throws FileNotFoundException, IOException, MimeException {
try (ReadContentInputStream fis = new ReadContentInputStream(sourceFile)) {
EMLParser parser = new EMLParser();
parser.setLocalPath(sourceFile.getParentPath());
Message mimeMsg = parser.getMessageBuilder().parseMessage(fis);
return parser.extractEmail(mimeMsg, "", sourceFile.getId());
}
}
}

View File

@ -19,12 +19,10 @@
package org.sleuthkit.autopsy.thunderbirdparser; package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.CharConversionException; import java.io.CharConversionException;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.Charset; import java.nio.charset.Charset;
@ -35,60 +33,26 @@ import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.UUID;
import java.util.logging.Level; import java.util.logging.Level;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.apache.james.mime4j.dom.BinaryBody;
import org.apache.james.mime4j.dom.Body;
import org.apache.james.mime4j.dom.Entity;
import org.apache.james.mime4j.dom.Message; import org.apache.james.mime4j.dom.Message;
import org.apache.james.mime4j.dom.Multipart;
import org.apache.james.mime4j.dom.TextBody;
import org.apache.james.mime4j.dom.address.AddressList;
import org.apache.james.mime4j.dom.address.Mailbox;
import org.apache.james.mime4j.dom.address.MailboxList;
import org.apache.james.mime4j.dom.field.ContentDispositionField;
import org.apache.james.mime4j.dom.field.ContentTypeField;
import org.apache.james.mime4j.mboxiterator.CharBufferWrapper; import org.apache.james.mime4j.mboxiterator.CharBufferWrapper;
import org.apache.james.mime4j.mboxiterator.MboxIterator; import org.apache.james.mime4j.mboxiterator.MboxIterator;
import org.apache.james.mime4j.message.DefaultMessageBuilder;
import org.apache.james.mime4j.stream.Field;
import org.apache.james.mime4j.stream.MimeConfig;
import org.apache.tika.parser.txt.CharsetDetector; import org.apache.tika.parser.txt.CharsetDetector;
import org.apache.tika.parser.txt.CharsetMatch; import org.apache.tika.parser.txt.CharsetMatch;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.coreutils.FileUtil;
import org.sleuthkit.datamodel.TskData;
import org.sleuthkit.datamodel.EncodedFileOutputStream;
/** /**
* An Iterator for parsing mbox files. Wraps an instance of MBoxEmailIterator. * An Iterator for parsing mbox files. Wraps an instance of MBoxEmailIterator.
*/ */
class MboxParser implements Iterator<EmailMessage> { class MboxParser extends MimeJ4MessageParser implements Iterator<EmailMessage> {
private static final Logger logger = Logger.getLogger(MboxParser.class.getName()); private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
private final DefaultMessageBuilder messageBuilder;
private final List<String> errorList = new ArrayList<>();
/**
* The mime type string for html text.
*/
private static final String HTML_TYPE = "text/html"; //NON-NLS
/**
* The local path of the mbox file.
*/
private String localPath;
private Iterator<EmailMessage> emailIterator = null; private Iterator<EmailMessage> emailIterator = null;
private MboxParser(String localPath) { private MboxParser(String localPath) {
this.localPath = localPath; setLocalPath(localPath);
messageBuilder = new DefaultMessageBuilder();
MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
// disable line length checks.
messageBuilder.setMimeEntityConfig(config);
} }
static boolean isValidMimeTypeMbox(byte[] buffer) { static boolean isValidMimeTypeMbox(byte[] buffer) {
@ -170,276 +134,6 @@ class MboxParser implements Iterator<EmailMessage> {
return emailIterator != null ? emailIterator.next() : null; return emailIterator != null ? emailIterator.next() : null;
} }
String getErrors() {
String result = "";
for (String msg: errorList) {
result += "<li>" + msg + "</li>";
}
return result;
}
/**
* Use the information stored in the given mime4j message to populate an
* EmailMessage.
*
* @param msg
*
* @return
*/
private EmailMessage extractEmail(Message msg, long fileID) {
EmailMessage email = new EmailMessage();
// Basic Info
email.setSender(getAddresses(msg.getFrom()));
email.setRecipients(getAddresses(msg.getTo()));
email.setBcc(getAddresses(msg.getBcc()));
email.setCc(getAddresses(msg.getCc()));
email.setSubject(msg.getSubject());
email.setSentDate(msg.getDate());
email.setLocalPath(localPath);
email.setMessageID(msg.getMessageId());
Field field = msg.getHeader().getField("in-reply-to"); //NON-NLS
String inReplyTo = null;
if (field != null) {
inReplyTo = field.getBody();
email.setInReplyToID(inReplyTo);
}
field = msg.getHeader().getField("references");
if (field != null) {
List<String> references = new ArrayList<>();
for (String id : field.getBody().split(">")) {
references.add(id.trim() + ">");
}
if (!references.contains(inReplyTo)) {
references.add(inReplyTo);
}
email.setReferences(references);
}
// Body
if (msg.isMultipart()) {
handleMultipart(email, (Multipart) msg.getBody(), fileID);
} else {
handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
}
return email;
}
/**
* Extract the subject, inReplyTo, message-ID and references from the
* Message object and returns them in a new EmailMessage object.
*
* @param msg Message object
*
* @return EmailMessage instance with only some of the message information
*/
private EmailMessage extractPartialEmail(Message msg) {
EmailMessage email = new EmailMessage();
email.setSubject(msg.getSubject());
email.setMessageID(msg.getMessageId());
Field field = msg.getHeader().getField("in-reply-to"); //NON-NLS
String inReplyTo = null;
if (field != null) {
inReplyTo = field.getBody();
email.setInReplyToID(inReplyTo);
}
field = msg.getHeader().getField("references");
if (field != null) {
List<String> references = new ArrayList<>();
for (String id : field.getBody().split(">")) {
references.add(id.trim() + ">");
}
if (!references.contains(inReplyTo)) {
references.add(inReplyTo);
}
email.setReferences(references);
}
return email;
}
/**
* Handle a multipart mime message. Recursively calls handleMultipart if one
* of the body parts is another multipart. Otherwise, calls the correct
* method to extract information out of each part of the body.
*
* @param email
* @param multi
*/
private void handleMultipart(EmailMessage email, Multipart multi, long fileID) {
List<Entity> entities = multi.getBodyParts();
for (int index = 0; index < entities.size(); index++) {
Entity e = entities.get(index);
if (e.isMultipart()) {
handleMultipart(email, (Multipart) e.getBody(), fileID);
} else if (e.getDispositionType() != null
&& e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
handleAttachment(email, e, fileID, index);
} else if (e.getMimeType().equals(HTML_TYPE)
|| e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
} else {
// Ignore other types.
}
}
}
/**
* Extract text out of a body part of the message.
*
* Handles text and html mime types. Throws away all other types. (only
* other example I've seen is text/calendar)
*
* @param email
* @param tb
* @param type The Mime type of the body.
*/
private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
BufferedReader r;
try {
r = new BufferedReader(tb.getReader());
StringBuilder bodyString = new StringBuilder();
StringBuilder headersString = new StringBuilder();
String line;
while ((line = r.readLine()) != null) {
bodyString.append(line).append("\n");
}
headersString.append("\n-----HEADERS-----\n");
for (Field field : fields) {
String nextLine = field.getName() + ": " + field.getBody();
headersString.append("\n").append(nextLine);
}
headersString.append("\n\n---END HEADERS--\n\n");
email.setHeaders(headersString.toString());
switch (type) {
case ContentTypeField.TYPE_TEXT_PLAIN:
email.setTextBody(bodyString.toString());
break;
case HTML_TYPE:
email.setHtmlBody(bodyString.toString());
break;
default:
// Not interested in other text types.
break;
}
} catch (IOException ex) {
logger.log(Level.WARNING, "Error getting text body of mbox message", ex); //NON-NLS
}
}
/**
* Extract the attachment out of the given entity. Should only be called if
* e.getDispositionType() == "attachment"
*
* @param email
* @param e
*/
@NbBundle.Messages({"MboxParser.handleAttch.noOpenCase.errMsg=Exception while getting open case."})
private void handleAttachment(EmailMessage email, Entity e, long fileID, int index) {
String outputDirPath;
String relModuleOutputPath;
try {
outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
relModuleOutputPath = ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() + File.separator;
} catch (NoCurrentCaseException ex) {
addErrorMessage(Bundle.MboxParser_handleAttch_noOpenCase_errMsg());
logger.log(Level.SEVERE, Bundle.MboxParser_handleAttch_noOpenCase_errMsg(), ex); //NON-NLS
return;
}
String filename = FileUtil.escapeFileName(e.getFilename());
// also had some crazy long names, so make random one if we get those.
// also from Japanese image that had encoded name
if (filename.length() > 64) {
filename = UUID.randomUUID().toString();
}
String uniqueFilename = fileID + "-" + index + "-" + email.getSentDate() + "-" + filename;
String outPath = outputDirPath + uniqueFilename;
EncodedFileOutputStream fos;
BinaryBody bb;
try {
fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1);
} catch (IOException ex) {
addErrorMessage(
NbBundle.getMessage(this.getClass(),
"MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
return;
}
try {
Body b = e.getBody();
if (b instanceof BinaryBody) {
bb = (BinaryBody) b;
bb.writeTo(fos);
} else {
// This could potentially be other types. Only seen this once.
}
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
return;
} finally {
try {
fos.close();
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed to close file output stream", ex); //NON-NLS
}
}
EmailMessage.Attachment attach = new EmailMessage.Attachment();
attach.setName(filename);
attach.setLocalPath(relModuleOutputPath + uniqueFilename);
attach.setSize(new File(outPath).length());
attach.setEncodingType(TskData.EncodingType.XOR1);
email.addAttachment(attach);
}
/**
* Get a String representation of the MailboxList (which is a list of email
* addresses).
*
* @param mailboxList
*
* @return
*/
private String getAddresses(MailboxList mailboxList) {
if (mailboxList == null) {
return "";
}
StringBuilder addresses = new StringBuilder();
for (Mailbox m : mailboxList) {
addresses.append(m.toString()).append("; ");
}
return addresses.toString();
}
/**
* Get a String representation of the AddressList (which is a list of email
* addresses).
*
* @param addressList
*
* @return
*/
private String getAddresses(AddressList addressList) {
return (addressList == null) ? "" : getAddresses(addressList.flatten());
}
/** /**
* Get a list of the possible encoders for the given mboxFile using Tika's * Get a list of the possible encoders for the given mboxFile using Tika's
* CharsetDetector. At a minimum, returns the standard built in charsets. * CharsetDetector. At a minimum, returns the standard built in charsets.
@ -490,10 +184,6 @@ class MboxParser implements Iterator<EmailMessage> {
} }
} }
private void addErrorMessage(String msg) {
errorList.add(msg);
}
/** /**
* An Interator for mbox email messages. * An Interator for mbox email messages.
*/ */
@ -521,9 +211,9 @@ class MboxParser implements Iterator<EmailMessage> {
CharBufferWrapper messageBuffer = mboxIterator.next(); CharBufferWrapper messageBuffer = mboxIterator.next();
try { try {
Message msg = messageBuilder.parseMessage(messageBuffer.asInputStream(encoder.charset())); Message msg = getMessageBuilder().parseMessage(messageBuffer.asInputStream(encoder.charset()));
if (wholeMsg) { if (wholeMsg) {
return extractEmail(msg, fileID); return extractEmail(msg, getLocalPath(), fileID);
} else { } else {
return extractPartialEmail(msg); return extractPartialEmail(msg);
} }

View File

@ -0,0 +1,379 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.logging.Level;
import org.apache.james.mime4j.dom.BinaryBody;
import org.apache.james.mime4j.dom.Body;
import org.apache.james.mime4j.dom.Entity;
import org.apache.james.mime4j.dom.Message;
import org.apache.james.mime4j.dom.Multipart;
import org.apache.james.mime4j.dom.TextBody;
import org.apache.james.mime4j.dom.address.AddressList;
import org.apache.james.mime4j.dom.address.Mailbox;
import org.apache.james.mime4j.dom.address.MailboxList;
import org.apache.james.mime4j.dom.field.ContentDispositionField;
import org.apache.james.mime4j.dom.field.ContentTypeField;
import org.apache.james.mime4j.message.DefaultMessageBuilder;
import org.apache.james.mime4j.stream.Field;
import org.apache.james.mime4j.stream.MimeConfig;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.coreutils.FileUtil;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.datamodel.EncodedFileOutputStream;
import org.sleuthkit.datamodel.TskData;
/**
* Super class for email parsers that can use the james.mime4J.Message objects.
*/
class MimeJ4MessageParser {
private static final Logger logger = Logger.getLogger(MimeJ4MessageParser.class.getName());
/**
* The mime type string for html text.
*/
private static final String HTML_TYPE = "text/html"; //NON-NLS
private DefaultMessageBuilder messageBuilder = null;
private final List<String> errorList = new ArrayList<>();
/**
* The local path of the email message(s) file.
*/
private String localPath;
DefaultMessageBuilder getMessageBuilder() {
if (messageBuilder == null) {
messageBuilder = new DefaultMessageBuilder();
MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).setMaxHeaderLen(-1).setMaxHeaderCount(-1).build();
// disable line length checks.
messageBuilder.setMimeEntityConfig(config);
}
return messageBuilder;
}
/**
* Sets the local path of the email messages file.
*
* @param localPath Local path of the file the email messages
*/
final void setLocalPath(String localPath) {
this.localPath = localPath;
}
/**
* Gets the local path.
*
* @return
*/
String getLocalPath() {
return localPath;
}
/**
* Get a list of the parsing error message.
*
* @return String containing all of the parse error message. Empty string is
* returned if there are no error messages.
*/
String getErrors() {
String result = "";
for (String msg : errorList) {
result += "<li>" + msg + "</li>";
}
return result;
}
/**
* Adds a message to the error Message list.
*
* @param msg Message to add to the list.
*/
void addErrorMessage(String msg) {
errorList.add(msg);
}
/**
* Use the information stored in the given mime4j message to populate an
* EmailMessage.
*
* @param msg The Message to extract data from.
*
* @return EmailMessage for the Message.
*/
EmailMessage extractEmail(Message msg, String localPath, long sourceFileID) {
EmailMessage email = new EmailMessage();
// Basic Info
email.setSender(getAddresses(msg.getFrom()));
email.setRecipients(getAddresses(msg.getTo()));
email.setBcc(getAddresses(msg.getBcc()));
email.setCc(getAddresses(msg.getCc()));
email.setSubject(msg.getSubject());
email.setSentDate(msg.getDate());
email.setLocalPath(localPath);
email.setMessageID(msg.getMessageId());
Field field = msg.getHeader().getField("in-reply-to"); //NON-NLS
String inReplyTo = null;
if (field != null) {
inReplyTo = field.getBody();
email.setInReplyToID(inReplyTo);
}
field = msg.getHeader().getField("references");
if (field != null) {
List<String> references = new ArrayList<>();
for (String id : field.getBody().split(">")) {
references.add(id.trim() + ">");
}
if (!references.contains(inReplyTo)) {
references.add(inReplyTo);
}
email.setReferences(references);
}
// Body
if (msg.isMultipart()) {
handleMultipart(email, (Multipart) msg.getBody(), sourceFileID);
} else {
handleTextBody(email, (TextBody) msg.getBody(), msg.getMimeType(), msg.getHeader().getFields());
}
return email;
}
/**
* Extract the subject, inReplyTo, message-ID and references from the
* Message object and returns them in a new EmailMessage object.
*
* @param msg Message object
*
* @return EmailMessage instance with only some of the message information
*/
EmailMessage extractPartialEmail(Message msg) {
EmailMessage email = new EmailMessage();
email.setSubject(msg.getSubject());
email.setMessageID(msg.getMessageId());
Field field = msg.getHeader().getField("in-reply-to"); //NON-NLS
String inReplyTo = null;
if (field != null) {
inReplyTo = field.getBody();
email.setInReplyToID(inReplyTo);
}
field = msg.getHeader().getField("references");
if (field != null) {
List<String> references = new ArrayList<>();
for (String id : field.getBody().split(">")) {
references.add(id.trim() + ">");
}
if (!references.contains(inReplyTo)) {
references.add(inReplyTo);
}
email.setReferences(references);
}
return email;
}
/**
* Handle a multipart mime message. Recursively calls handleMultipart if one
* of the body parts is another multipart. Otherwise, calls the correct
* method to extract information out of each part of the body.
*
* @param email
* @param multi
*/
private void handleMultipart(EmailMessage email, Multipart multi, long fileID) {
List<Entity> entities = multi.getBodyParts();
for (int index = 0; index < entities.size(); index++) {
Entity e = entities.get(index);
if (e.isMultipart()) {
handleMultipart(email, (Multipart) e.getBody(), fileID);
} else if (e.getDispositionType() != null
&& e.getDispositionType().equals(ContentDispositionField.DISPOSITION_TYPE_ATTACHMENT)) {
handleAttachment(email, e, fileID, index);
} else if (e.getMimeType().equals(HTML_TYPE)
|| e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) {
handleTextBody(email, (TextBody) e.getBody(), e.getMimeType(), e.getHeader().getFields());
} else {
// Ignore other types.
}
}
}
/**
* Extract text out of a body part of the message.
*
* Handles text and html mime types. Throws away all other types. (only
* other example I've seen is text/calendar)
*
* @param email
* @param tb
* @param type The Mime type of the body.
*/
private void handleTextBody(EmailMessage email, TextBody tb, String type, List<Field> fields) {
BufferedReader r;
try {
r = new BufferedReader(tb.getReader());
StringBuilder bodyString = new StringBuilder();
StringBuilder headersString = new StringBuilder();
String line;
while ((line = r.readLine()) != null) {
bodyString.append(line).append("\n");
}
headersString.append("\n-----HEADERS-----\n");
for (Field field : fields) {
String nextLine = field.getName() + ": " + field.getBody();
headersString.append("\n").append(nextLine);
}
headersString.append("\n\n---END HEADERS--\n\n");
email.setHeaders(headersString.toString());
switch (type) {
case ContentTypeField.TYPE_TEXT_PLAIN:
email.setTextBody(bodyString.toString());
break;
case HTML_TYPE:
email.setHtmlBody(bodyString.toString());
break;
default:
// Not interested in other text types.
break;
}
} catch (IOException ex) {
logger.log(Level.WARNING, "Error getting text body of mbox message", ex); //NON-NLS
}
}
/**
* Extract the attachment out of the given entity. Should only be called if
* e.getDispositionType() == "attachment"
*
* @param email
* @param e
*/
@NbBundle.Messages({"MimeJ4MessageParser.handleAttch.noOpenCase.errMsg=Exception while getting open case."})
private static void handleAttachment(EmailMessage email, Entity e, long fileID, int index) {
String outputDirPath;
String relModuleOutputPath;
try {
outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator;
relModuleOutputPath = ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() + File.separator;
} catch (NoCurrentCaseException ex) {
logger.log(Level.SEVERE, Bundle.MimeJ4MessageParser_handleAttch_noOpenCase_errMsg(), ex); //NON-NLS
return;
}
String filename = FileUtil.escapeFileName(e.getFilename());
// also had some crazy long names, so make random one if we get those.
// also from Japanese image that had encoded name
if (filename.length() > 64) {
filename = UUID.randomUUID().toString();
}
String uniqueFilename = fileID + "-" + index + "-" + email.getSentDate() + "-" + filename;
String outPath = outputDirPath + uniqueFilename;
EncodedFileOutputStream fos;
BinaryBody bb;
try {
fos = new EncodedFileOutputStream(new FileOutputStream(outPath), TskData.EncodingType.XOR1);
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
return;
}
try {
Body b = e.getBody();
if (b instanceof BinaryBody) {
bb = (BinaryBody) b;
bb.writeTo(fos);
} else {
// This could potentially be other types. Only seen this once.
}
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
return;
} finally {
try {
fos.close();
} catch (IOException ex) {
logger.log(Level.WARNING, "Failed to close file output stream", ex); //NON-NLS
}
}
EmailMessage.Attachment attach = new EmailMessage.Attachment();
attach.setName(filename);
attach.setLocalPath(relModuleOutputPath + uniqueFilename);
attach.setSize(new File(outPath).length());
attach.setEncodingType(TskData.EncodingType.XOR1);
email.addAttachment(attach);
}
/**
* Get a String representation of the MailboxList (which is a list of email
* addresses).
*
* @param mailboxList
*
* @return String list of email addresses separated by a ; or empty string
* if no addresses were found.
*/
private static String getAddresses(MailboxList mailboxList) {
if (mailboxList == null) {
return "";
}
StringBuilder addresses = new StringBuilder();
for (Mailbox m : mailboxList) {
addresses.append(m.toString()).append("; ");
}
return addresses.toString();
}
/**
* Get a String representation of the AddressList (which is a list of email
* addresses).
*
* @param addressList
*
* @return String list of email addresses separated by a ; or empty string
* if no addresses were found.
*/
private static String getAddresses(AddressList addressList) {
return (addressList == null) ? "" : getAddresses(addressList.flatten());
}
}

View File

@ -29,6 +29,7 @@ import java.util.Set;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.james.mime4j.MimeException;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.openide.util.NbBundle.Messages; import org.openide.util.NbBundle.Messages;
import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.Case;
@ -113,12 +114,15 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
// check its signature // check its signature
boolean isMbox = false; boolean isMbox = false;
boolean isEMLFile = false;
try { try {
byte[] t = new byte[64]; byte[] t = new byte[64];
if (abstractFile.getSize() > 64) { if (abstractFile.getSize() > 64) {
int byteRead = abstractFile.read(t, 0, 64); int byteRead = abstractFile.read(t, 0, 64);
if (byteRead > 0) { if (byteRead > 0) {
isMbox = MboxParser.isValidMimeTypeMbox(t); isMbox = MboxParser.isValidMimeTypeMbox(t);
isEMLFile = EMLParser.isEMLFile(abstractFile, t);
} }
} }
} catch (TskException ex) { } catch (TskException ex) {
@ -129,6 +133,10 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return processMBox(abstractFile); return processMBox(abstractFile);
} }
if (isEMLFile) {
return processEMLFile(abstractFile);
}
if (PstParser.isPstFile(abstractFile)) { if (PstParser.isPstFile(abstractFile)) {
return processPst(abstractFile); return processPst(abstractFile);
} }
@ -310,44 +318,45 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
"ThunderbirdMboxFileIngestModule.errorMessage.outOfDiskSpace=Out of disk space. Cannot copy '{0}' (id={1}) to parse." "ThunderbirdMboxFileIngestModule.errorMessage.outOfDiskSpace=Out of disk space. Cannot copy '{0}' (id={1}) to parse."
}) })
private ProcessResult processVcard(AbstractFile abstractFile) { private ProcessResult processVcard(AbstractFile abstractFile) {
String fileName;
try {
fileName = getTempPath() + File.separator + abstractFile.getName()
+ "-" + String.valueOf(abstractFile.getId());
} catch (NoCurrentCaseException ex) {
logger.log(Level.SEVERE, "Exception while getting open case.", ex); //NON-NLS
return ProcessResult.ERROR;
}
File file = new File(fileName);
long freeSpace = services.getFreeDiskSpace();
if ((freeSpace != IngestMonitor.DISK_FREE_SPACE_UNKNOWN) && (abstractFile.getSize() >= freeSpace)) {
logger.log(Level.WARNING, String.format("Not enough disk space to write file '%s' (id=%d) to disk.",
abstractFile.getName(), abstractFile.getId())); //NON-NLS
IngestMessage msg = IngestMessage.createErrorMessage(EmailParserModuleFactory.getModuleName(), EmailParserModuleFactory.getModuleName(),
Bundle.ThunderbirdMboxFileIngestModule_errorMessage_outOfDiskSpace(abstractFile.getName(), abstractFile.getId()));
services.postMessage(msg);
return ProcessResult.OK;
}
try {
ContentUtils.writeToFile(abstractFile, file, context::fileIngestIsCancelled);
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Failed writing the vCard file '%s' (id=%d) to disk.",
abstractFile.getName(), abstractFile.getId()), ex); //NON-NLS
return ProcessResult.OK;
}
try { try {
VcardParser parser = new VcardParser(currentCase, context); VcardParser parser = new VcardParser(currentCase, context);
parser.parse(file, abstractFile); parser.parse(abstractFile);
} catch (IOException | NoCurrentCaseException ex) { } catch (IOException | NoCurrentCaseException ex) {
logger.log(Level.WARNING, String.format("Exception while parsing the file '%s' (id=%d).", file.getName(), abstractFile.getId()), ex); //NON-NLS logger.log(Level.WARNING, String.format("Exception while parsing the file '%s' (id=%d).", abstractFile.getName(), abstractFile.getId()), ex); //NON-NLS
return ProcessResult.OK;
}
return ProcessResult.OK; return ProcessResult.OK;
} }
if (file.delete() == false) { private ProcessResult processEMLFile(AbstractFile abstractFile) {
logger.log(Level.INFO, "Failed to delete temp file: {0}", file.getName()); //NON-NLS try {
EmailMessage message = EMLParser.parse(abstractFile);
if (message == null) {
return ProcessResult.OK;
}
List<AbstractFile> derivedFiles = new ArrayList<>();
BlackboardArtifact msgArtifact = addEmailArtifact(message, abstractFile);
if ((msgArtifact != null) && (message.hasAttachment())) {
derivedFiles.addAll(handleAttachments(message.getAttachments(), abstractFile, msgArtifact));
}
if (derivedFiles.isEmpty() == false) {
for (AbstractFile derived : derivedFiles) {
services.fireModuleContentEvent(new ModuleContentEvent(derived));
}
}
context.addFilesToJob(derivedFiles);
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Error reading eml file %s", abstractFile.getName()), ex);
return ProcessResult.ERROR;
} catch (MimeException ex) {
logger.log(Level.WARNING, String.format("Error reading eml file %s", abstractFile.getName()), ex);
return ProcessResult.ERROR;
} }
return ProcessResult.OK; return ProcessResult.OK;
@ -580,7 +589,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
addArtifactAttribute(((id < 0L) ? NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.notAvail") : String.valueOf(id)), addArtifactAttribute(((id < 0L) ? NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.notAvail") : String.valueOf(id)),
ATTRIBUTE_TYPE.TSK_MSG_ID, bbattributes); ATTRIBUTE_TYPE.TSK_MSG_ID, bbattributes);
addArtifactAttribute(((localPath.isEmpty() == false) ? localPath : "/foo/bar"), addArtifactAttribute(((localPath.isEmpty() == false) ? localPath : ""),
ATTRIBUTE_TYPE.TSK_PATH, bbattributes); ATTRIBUTE_TYPE.TSK_PATH, bbattributes);
addArtifactAttribute(cc, ATTRIBUTE_TYPE.TSK_EMAIL_CC, bbattributes); addArtifactAttribute(cc, ATTRIBUTE_TYPE.TSK_EMAIL_CC, bbattributes);

View File

@ -57,6 +57,7 @@ import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.DataSource; import org.sleuthkit.datamodel.DataSource;
import org.sleuthkit.datamodel.ReadContentInputStream;
import org.sleuthkit.datamodel.Relationship; import org.sleuthkit.datamodel.Relationship;
import org.sleuthkit.datamodel.SleuthkitCase; import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskCoreException;
@ -141,8 +142,8 @@ final class VcardParser {
* file. * file.
* @throws NoCurrentCaseException If there is no open case. * @throws NoCurrentCaseException If there is no open case.
*/ */
void parse(File vcardFile, AbstractFile abstractFile) throws IOException, NoCurrentCaseException { void parse(AbstractFile abstractFile) throws IOException, NoCurrentCaseException {
for (VCard vcard: Ezvcard.parse(vcardFile).all()) { for (VCard vcard: Ezvcard.parse(new ReadContentInputStream(abstractFile)).all()) {
addContactArtifact(vcard, abstractFile); addContactArtifact(vcard, abstractFile);
} }
} }