Cleaned up EmailParser code

This commit is contained in:
Kelly Kelly 2021-01-27 17:13:28 -05:00
parent 4974955ffa
commit 2103ce95f3
2 changed files with 162 additions and 181 deletions

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2019 Basis Technology Corp. * Copyright 2019-2020 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -31,7 +31,6 @@ import org.apache.james.mime4j.dom.Entity;
import org.apache.james.mime4j.dom.Message; import org.apache.james.mime4j.dom.Message;
import org.apache.james.mime4j.dom.MessageWriter; import org.apache.james.mime4j.dom.MessageWriter;
import org.apache.james.mime4j.dom.Multipart; import org.apache.james.mime4j.dom.Multipart;
import org.apache.james.mime4j.dom.SingleBody;
import org.apache.james.mime4j.dom.TextBody; import org.apache.james.mime4j.dom.TextBody;
import org.apache.james.mime4j.dom.address.AddressList; import org.apache.james.mime4j.dom.address.AddressList;
import org.apache.james.mime4j.dom.address.Mailbox; import org.apache.james.mime4j.dom.address.Mailbox;
@ -348,8 +347,6 @@ class MimeJ4MessageParser implements AutoCloseable{
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
} }
} }
} }
/** /**

View File

@ -73,6 +73,7 @@ import org.sleuthkit.datamodel.blackboardutils.attributes.MessageAttachments.Fil
* structure and metadata. * structure and metadata.
*/ */
public final class ThunderbirdMboxFileIngestModule implements FileIngestModule { public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName()); private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName());
private final IngestServices services = IngestServices.getInstance(); private final IngestServices services = IngestServices.getInstance();
private FileManager fileManager; private FileManager fileManager;
@ -90,7 +91,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
@Override @Override
@Messages ({"ThunderbirdMboxFileIngestModule.noOpenCase.errMsg=Exception while getting open case."}) @Messages({"ThunderbirdMboxFileIngestModule.noOpenCase.errMsg=Exception while getting open case."})
public void startUp(IngestJobContext context) throws IngestModuleException { public void startUp(IngestJobContext context) throws IngestModuleException {
this.context = context; this.context = context;
try { try {
@ -113,8 +114,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
//skip unalloc //skip unalloc
if ((abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) || if ((abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS))
(abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK))) { || (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK))) {
return ProcessResult.OK; return ProcessResult.OK;
} }
@ -146,7 +147,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK; return ProcessResult.OK;
} }
if (isMbox || isEMLFile || isPstFile || isVcardFile ) { if (isMbox || isEMLFile || isPstFile || isVcardFile) {
try { try {
communicationArtifactsHelper = new CommunicationArtifactsHelper(currentCase.getSleuthkitCase(), communicationArtifactsHelper = new CommunicationArtifactsHelper(currentCase.getSleuthkitCase(),
EmailParserModuleFactory.getModuleName(), abstractFile, Account.Type.EMAIL); EmailParserModuleFactory.getModuleName(), abstractFile, Account.Type.EMAIL);
@ -205,7 +206,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK; return ProcessResult.OK;
} }
try (PstParser parser = new PstParser(services)){ try (PstParser parser = new PstParser(services)) {
try { try {
ContentUtils.writeToFile(abstractFile, file, context::fileIngestIsCancelled); ContentUtils.writeToFile(abstractFile, file, context::fileIngestIsCancelled);
} catch (IOException ex) { } catch (IOException ex) {
@ -215,7 +216,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
PstParser.ParseResult result = parser.open(file, abstractFile.getId()); PstParser.ParseResult result = parser.open(file, abstractFile.getId());
switch( result) { switch (result) {
case OK: case OK:
Iterator<EmailMessage> pstMsgIterator = parser.getEmailMessageIterator(); Iterator<EmailMessage> pstMsgIterator = parser.getEmailMessageIterator();
if (pstMsgIterator != null) { if (pstMsgIterator != null) {
@ -263,7 +264,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
logger.log(Level.INFO, "PSTParser failed to parse {0}", abstractFile.getName()); //NON-NLS logger.log(Level.INFO, "PSTParser failed to parse {0}", abstractFile.getName()); //NON-NLS
return ProcessResult.ERROR; return ProcessResult.ERROR;
} }
} catch(Exception ex) { } catch (Exception ex) {
logger.log(Level.WARNING, String.format("Failed to close temp pst file %s", file.getAbsolutePath())); logger.log(Level.WARNING, String.format("Failed to close temp pst file %s", file.getAbsolutePath()));
} finally { } finally {
file.delete(); file.delete();
@ -322,18 +323,18 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK; return ProcessResult.OK;
} }
try{ try {
processMboxFile(file, abstractFile, emailFolder); processMboxFile(file, abstractFile, emailFolder);
if (context.fileIngestIsCancelled()) { if (context.fileIngestIsCancelled()) {
return ProcessResult.OK; return ProcessResult.OK;
} }
}finally { } finally {
file.delete(); file.delete();
} }
} else { } else {
List<Long> mboxSplitOffsets = new ArrayList<>(); List<Long> mboxSplitOffsets = new ArrayList<>();
try{ try {
mboxSplitOffsets = findMboxSplitOffset(abstractFile, file); mboxSplitOffsets = findMboxSplitOffset(abstractFile, file);
} catch (IOException ex) { } catch (IOException ex) {
logger.log(Level.WARNING, String.format("Failed finding split offsets for mbox file {0}.", fileName), ex); //NON-NLS logger.log(Level.WARNING, String.format("Failed finding split offsets for mbox file {0}.", fileName), ex); //NON-NLS
@ -349,7 +350,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
logger.log(Level.WARNING, "Failed writing split mbox file to disk.", ex); //NON-NLS logger.log(Level.WARNING, "Failed writing split mbox file to disk.", ex); //NON-NLS
return ProcessResult.OK; return ProcessResult.OK;
} }
try{ try {
processMboxFile(splitFile, abstractFile, emailFolder); processMboxFile(splitFile, abstractFile, emailFolder);
startingOffset = mboxSplitOffset; startingOffset = mboxSplitOffset;
} finally { } finally {
@ -375,9 +376,9 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
int len = in.read(buffer); int len = in.read(buffer);
while (len != -1) { while (len != -1) {
len = in.read(buffer); len = in.read(buffer);
if (buffer[0] == 13 && buffer[1] == 10 && buffer[2] == 70 && buffer[3] == 114 && if (buffer[0] == 13 && buffer[1] == 10 && buffer[2] == 70 && buffer[3] == 114
buffer[4] == 111 && buffer[5] == 109 && buffer[6] == 32) { && buffer[4] == 111 && buffer[5] == 109 && buffer[6] == 32) {
mboxSplitOffset.add(in.getCurPosition() - 5 ); mboxSplitOffset.add(in.getCurPosition() - 5);
in.skip(MBOX_SIZE_TO_SPLIT); in.skip(MBOX_SIZE_TO_SPLIT);
} }
} }
@ -386,18 +387,17 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
private void processMboxFile(File file, AbstractFile abstractFile, String emailFolder) { private void processMboxFile(File file, AbstractFile abstractFile, String emailFolder) {
try(MboxParser emailIterator = MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId())) { try (MboxParser emailIterator = MboxParser.getEmailIterator(emailFolder, file, abstractFile.getId())) {
List<EmailMessage> emails = new ArrayList<>(); List<EmailMessage> emails = new ArrayList<>();
if(emailIterator != null) { if (emailIterator != null) {
while(emailIterator.hasNext()) { while (emailIterator.hasNext()) {
if (context.fileIngestIsCancelled()) { if (context.fileIngestIsCancelled()) {
return; return;
} }
EmailMessage emailMessage = emailIterator.next(); EmailMessage emailMessage = emailIterator.next();
if(emailMessage != null) { if (emailMessage != null) {
emails.add(emailMessage); emails.add(emailMessage);
} }
} }
@ -409,8 +409,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
abstractFile.getName()), errors); abstractFile.getName()), errors);
} }
} }
processEmails(emails, MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId()), abstractFile); processEmails(emails, MboxParser.getEmailIterator(emailFolder, file, abstractFile.getId()), abstractFile);
} catch(Exception ex) { } catch (Exception ex) {
logger.log(Level.WARNING, String.format("Failed to close mbox temp file %s", file.getAbsolutePath())); logger.log(Level.WARNING, String.format("Failed to close mbox temp file %s", file.getAbsolutePath()));
} }
@ -451,16 +451,9 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
List<AbstractFile> derivedFiles = new ArrayList<>(); List<AbstractFile> derivedFiles = new ArrayList<>();
AccountFileInstanceCache accountFileInstanceCache = new AccountFileInstanceCache(abstractFile, currentCase); AccountFileInstanceCache accountFileInstanceCache = new AccountFileInstanceCache(abstractFile, currentCase);
// BlackboardArtifact msgArtifact = addEmailArtifact(message, abstractFile, accountFileInstanceCache);
createEmailArtifact(message, abstractFile, accountFileInstanceCache, derivedFiles); createEmailArtifact(message, abstractFile, accountFileInstanceCache, derivedFiles);
accountFileInstanceCache.clear(); accountFileInstanceCache.clear();
// if ((msgArtifact != null) && (message.hasAttachment())) {
// derivedFiles.addAll(handleAttachments(message.getAttachments(), abstractFile, msgArtifact));
// }
if (derivedFiles.isEmpty() == false) { if (derivedFiles.isEmpty() == false) {
for (AbstractFile derived : derivedFiles) { for (AbstractFile derived : derivedFiles) {
services.fireModuleContentEvent(new ModuleContentEvent(derived)); services.fireModuleContentEvent(new ModuleContentEvent(derived));
@ -539,41 +532,34 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
// Putting try/catch around this to catch any exception and still allow // Putting try/catch around this to catch any exception and still allow
// the creation of the artifacts to continue. // the creation of the artifacts to continue.
try{ try {
EmailMessageThreader.threadMessages(partialEmailsForThreading); EmailMessageThreader.threadMessages(partialEmailsForThreading);
} catch(Exception ex) { } catch (Exception ex) {
logger.log(Level.WARNING, String.format("Exception thrown parsing emails from %s", abstractFile.getName()), ex); logger.log(Level.WARNING, String.format("Exception thrown parsing emails from %s", abstractFile.getName()), ex);
} }
List<AbstractFile> derivedFiles = new ArrayList<>(); List<AbstractFile> derivedFiles = new ArrayList<>();
int msgCnt = 0; int msgCnt = 0;
while(fullMessageIterator.hasNext()) { while (fullMessageIterator.hasNext()) {
if (context.fileIngestIsCancelled()) { if (context.fileIngestIsCancelled()) {
return; return;
} }
EmailMessage current = fullMessageIterator.next(); EmailMessage current = fullMessageIterator.next();
if(current == null) { if (current == null) {
continue; continue;
} }
if(partialEmailsForThreading.size() > msgCnt) { if (partialEmailsForThreading.size() > msgCnt) {
EmailMessage threaded = partialEmailsForThreading.get(msgCnt++); EmailMessage threaded = partialEmailsForThreading.get(msgCnt++);
if(threaded.getMessageID().equals(current.getMessageID()) && if (threaded.getMessageID().equals(current.getMessageID())
threaded.getSubject().equals(current.getSubject())) { && threaded.getSubject().equals(current.getSubject())) {
current.setMessageThreadID(threaded.getMessageThreadID()); current.setMessageThreadID(threaded.getMessageThreadID());
} }
} }
// BlackboardArtifact msgArtifact = addEmailArtifact(current, abstractFile, accountFileInstanceCache);
//
// if ((msgArtifact != null) && (current.hasAttachment())) {
// derivedFiles.addAll(handleAttachments(current.getAttachments(), abstractFile, msgArtifact ));
// }
createEmailArtifact(current, abstractFile, accountFileInstanceCache, derivedFiles); createEmailArtifact(current, abstractFile, accountFileInstanceCache, derivedFiles);
} }
@ -592,10 +578,10 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
BlackboardArtifact msgArtifact = addEmailArtifact(email, abstractFile, accountFileInstanceCache); BlackboardArtifact msgArtifact = addEmailArtifact(email, abstractFile, accountFileInstanceCache);
if ((msgArtifact != null) && (email.hasAttachment())) { if ((msgArtifact != null) && (email.hasAttachment())) {
derivedFiles.addAll(handleAttachments(email.getAttachments(), abstractFile, msgArtifact )); derivedFiles.addAll(handleAttachments(email.getAttachments(), abstractFile, msgArtifact));
for (EmailMessage.Attachment attach : email.getAttachments()) { for (EmailMessage.Attachment attach : email.getAttachments()) {
if(attach instanceof AttachedEmailMessage) { if (attach instanceof AttachedEmailMessage) {
createEmailArtifact(((AttachedEmailMessage) attach).getEmailMessage(), abstractFile, accountFileInstanceCache, derivedFiles); createEmailArtifact(((AttachedEmailMessage) attach).getEmailMessage(), abstractFile, accountFileInstanceCache, derivedFiles);
} }
} }
@ -614,7 +600,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
*/ */
@NbBundle.Messages({ @NbBundle.Messages({
"ThunderbirdMboxFileIngestModule.handleAttch.addAttachmentsErrorMsg=Failed to add attachments to email message." "ThunderbirdMboxFileIngestModule.handleAttch.addAttachmentsErrorMsg=Failed to add attachments to email message."
}) })
private List<AbstractFile> handleAttachments(List<EmailMessage.Attachment> attachments, AbstractFile abstractFile, BlackboardArtifact messageArtifact) { private List<AbstractFile> handleAttachments(List<EmailMessage.Attachment> attachments, AbstractFile abstractFile, BlackboardArtifact messageArtifact) {
List<AbstractFile> files = new ArrayList<>(); List<AbstractFile> files = new ArrayList<>();
List<FileAttachment> fileAttachments = new ArrayList<>(); List<FileAttachment> fileAttachments = new ArrayList<>();
@ -648,7 +634,6 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
} }
try { try {
communicationArtifactsHelper.addAttachments(messageArtifact, new MessageAttachments(fileAttachments, Collections.emptyList())); communicationArtifactsHelper.addAttachments(messageArtifact, new MessageAttachments(fileAttachments, Collections.emptyList()));
} catch (TskCoreException ex) { } catch (TskCoreException ex) {
@ -675,7 +660,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
/** /**
* Finds and returns a set of unique email addresses found in the input string * Finds and returns a set of unique email addresses found in the input
* string
* *
* @param input - input string, like the To/CC line from an email header * @param input - input string, like the To/CC line from an email header
* *
@ -687,7 +673,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
Matcher m = p.matcher(input); Matcher m = p.matcher(input);
Set<String> emailAddresses = new HashSet<>(); Set<String> emailAddresses = new HashSet<>();
while (m.find()) { while (m.find()) {
emailAddresses.add( m.group()); emailAddresses.add(m.group());
} }
return emailAddresses; return emailAddresses;
} }
@ -733,12 +719,10 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
senderAddress = senderAddressList.get(0); senderAddress = senderAddressList.get(0);
try { try {
senderAccountInstance = accountFileInstanceCache.getAccountInstance(senderAddress); senderAccountInstance = accountFileInstanceCache.getAccountInstance(senderAddress);
} } catch (TskCoreException ex) {
catch(TskCoreException ex) {
logger.log(Level.WARNING, "Failed to create account for email address " + senderAddress, ex); //NON-NLS logger.log(Level.WARNING, "Failed to create account for email address " + senderAddress, ex); //NON-NLS
} }
} } else {
else {
logger.log(Level.WARNING, "Failed to find sender address, from = {0}", from); //NON-NLS logger.log(Level.WARNING, "Failed to find sender address, from = {0}", from); //NON-NLS
} }
@ -759,8 +743,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
try { try {
AccountFileInstance recipientAccountInstance = accountFileInstanceCache.getAccountInstance(addr); AccountFileInstance recipientAccountInstance = accountFileInstanceCache.getAccountInstance(addr);
recipientAccountInstances.add(recipientAccountInstance); recipientAccountInstances.add(recipientAccountInstance);
} } catch (TskCoreException ex) {
catch(TskCoreException ex) {
logger.log(Level.WARNING, "Failed to create account for email address " + addr, ex); //NON-NLS logger.log(Level.WARNING, "Failed to create account for email address " + addr, ex); //NON-NLS
} }
} }
@ -786,7 +769,6 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
addArtifactAttribute(rtf, ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_RTF, bbattributes); addArtifactAttribute(rtf, ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_RTF, bbattributes);
addArtifactAttribute(threadID, ATTRIBUTE_TYPE.TSK_THREAD_ID, bbattributes); addArtifactAttribute(threadID, ATTRIBUTE_TYPE.TSK_THREAD_ID, bbattributes);
try { try {
if (context.fileIngestIsCancelled()) { if (context.fileIngestIsCancelled()) {
return null; return null;
@ -800,7 +782,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
// Add account relationships // Add account relationships
currentCase.getSleuthkitCase().getCommunicationsManager().addRelationships(senderAccountInstance, recipientAccountInstances, bbart,Relationship.Type.MESSAGE, dateL); currentCase.getSleuthkitCase().getCommunicationsManager().addRelationships(senderAccountInstance, recipientAccountInstances, bbart, Relationship.Type.MESSAGE, dateL);
if (context.fileIngestIsCancelled()) { if (context.fileIngestIsCancelled()) {
return null; return null;
@ -860,22 +842,24 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
} }
/** /**
* Cache for storing AccountFileInstance. * Cache for storing AccountFileInstance. The idea is that emails will be
* The idea is that emails will be used multiple times in a file and * used multiple times in a file and we shouldn't do a database lookup each
* we shouldn't do a database lookup each time. * time.
*/ */
static private class AccountFileInstanceCache { static private class AccountFileInstanceCache {
private final Map<String, AccountFileInstance> cacheMap; private final Map<String, AccountFileInstance> cacheMap;
private final AbstractFile file; private final AbstractFile file;
private final Case currentCase; private final Case currentCase;
/** /**
* Create a new cache. Caches are linked to a specific file. * Create a new cache. Caches are linked to a specific file.
*
* @param file * @param file
* @param currentCase * @param currentCase
*/ */
AccountFileInstanceCache(AbstractFile file, Case currentCase) { AccountFileInstanceCache(AbstractFile file, Case currentCase) {
cacheMap= new HashMap<>(); cacheMap = new HashMap<>();
this.file = file; this.file = file;
this.currentCase = currentCase; this.currentCase = currentCase;
} }
@ -894,8 +878,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return cacheMap.get(email); return cacheMap.get(email);
} }
AccountFileInstance accountInstance = AccountFileInstance accountInstance
currentCase.getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.EMAIL, email, = currentCase.getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.EMAIL, email,
EmailParserModuleFactory.getModuleName(), file); EmailParserModuleFactory.getModuleName(), file);
cacheMap.put(email, accountInstance); cacheMap.put(email, accountInstance);
return accountInstance; return accountInstance;