Cleaned up EmailParser code

This commit is contained in:
Kelly Kelly 2021-01-27 17:13:28 -05:00
parent 4974955ffa
commit 2103ce95f3
2 changed files with 162 additions and 181 deletions

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2019 Basis Technology Corp.
* Copyright 2019-2020 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -31,7 +31,6 @@ import org.apache.james.mime4j.dom.Entity;
import org.apache.james.mime4j.dom.Message;
import org.apache.james.mime4j.dom.MessageWriter;
import org.apache.james.mime4j.dom.Multipart;
import org.apache.james.mime4j.dom.SingleBody;
import org.apache.james.mime4j.dom.TextBody;
import org.apache.james.mime4j.dom.address.AddressList;
import org.apache.james.mime4j.dom.address.Mailbox;
@ -348,8 +347,6 @@ class MimeJ4MessageParser implements AutoCloseable{
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
}
}
}
/**

View File

@ -73,6 +73,7 @@ import org.sleuthkit.datamodel.blackboardutils.attributes.MessageAttachments.Fil
* structure and metadata.
*/
public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestModule.class.getName());
private final IngestServices services = IngestServices.getInstance();
private FileManager fileManager;
@ -90,7 +91,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
@Override
@Messages ({"ThunderbirdMboxFileIngestModule.noOpenCase.errMsg=Exception while getting open case."})
@Messages({"ThunderbirdMboxFileIngestModule.noOpenCase.errMsg=Exception while getting open case."})
public void startUp(IngestJobContext context) throws IngestModuleException {
this.context = context;
try {
@ -113,8 +114,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
//skip unalloc
if ((abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS)) ||
(abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK))) {
if ((abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS))
|| (abstractFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.SLACK))) {
return ProcessResult.OK;
}
@ -146,7 +147,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK;
}
if (isMbox || isEMLFile || isPstFile || isVcardFile ) {
if (isMbox || isEMLFile || isPstFile || isVcardFile) {
try {
communicationArtifactsHelper = new CommunicationArtifactsHelper(currentCase.getSleuthkitCase(),
EmailParserModuleFactory.getModuleName(), abstractFile, Account.Type.EMAIL);
@ -205,7 +206,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK;
}
try (PstParser parser = new PstParser(services)){
try (PstParser parser = new PstParser(services)) {
try {
ContentUtils.writeToFile(abstractFile, file, context::fileIngestIsCancelled);
} catch (IOException ex) {
@ -215,7 +216,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
PstParser.ParseResult result = parser.open(file, abstractFile.getId());
switch( result) {
switch (result) {
case OK:
Iterator<EmailMessage> pstMsgIterator = parser.getEmailMessageIterator();
if (pstMsgIterator != null) {
@ -263,7 +264,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
logger.log(Level.INFO, "PSTParser failed to parse {0}", abstractFile.getName()); //NON-NLS
return ProcessResult.ERROR;
}
} catch(Exception ex) {
} catch (Exception ex) {
logger.log(Level.WARNING, String.format("Failed to close temp pst file %s", file.getAbsolutePath()));
} finally {
file.delete();
@ -322,18 +323,18 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK;
}
try{
try {
processMboxFile(file, abstractFile, emailFolder);
if (context.fileIngestIsCancelled()) {
return ProcessResult.OK;
}
}finally {
} finally {
file.delete();
}
} else {
List<Long> mboxSplitOffsets = new ArrayList<>();
try{
try {
mboxSplitOffsets = findMboxSplitOffset(abstractFile, file);
} catch (IOException ex) {
logger.log(Level.WARNING, String.format("Failed finding split offsets for mbox file {0}.", fileName), ex); //NON-NLS
@ -349,7 +350,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
logger.log(Level.WARNING, "Failed writing split mbox file to disk.", ex); //NON-NLS
return ProcessResult.OK;
}
try{
try {
processMboxFile(splitFile, abstractFile, emailFolder);
startingOffset = mboxSplitOffset;
} finally {
@ -375,9 +376,9 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
int len = in.read(buffer);
while (len != -1) {
len = in.read(buffer);
if (buffer[0] == 13 && buffer[1] == 10 && buffer[2] == 70 && buffer[3] == 114 &&
buffer[4] == 111 && buffer[5] == 109 && buffer[6] == 32) {
mboxSplitOffset.add(in.getCurPosition() - 5 );
if (buffer[0] == 13 && buffer[1] == 10 && buffer[2] == 70 && buffer[3] == 114
&& buffer[4] == 111 && buffer[5] == 109 && buffer[6] == 32) {
mboxSplitOffset.add(in.getCurPosition() - 5);
in.skip(MBOX_SIZE_TO_SPLIT);
}
}
@ -386,18 +387,17 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
private void processMboxFile(File file, AbstractFile abstractFile, String emailFolder) {
try(MboxParser emailIterator = MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId())) {
try (MboxParser emailIterator = MboxParser.getEmailIterator(emailFolder, file, abstractFile.getId())) {
List<EmailMessage> emails = new ArrayList<>();
if(emailIterator != null) {
while(emailIterator.hasNext()) {
if (emailIterator != null) {
while (emailIterator.hasNext()) {
if (context.fileIngestIsCancelled()) {
return;
}
EmailMessage emailMessage = emailIterator.next();
if(emailMessage != null) {
if (emailMessage != null) {
emails.add(emailMessage);
}
}
@ -409,8 +409,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
abstractFile.getName()), errors);
}
}
processEmails(emails, MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId()), abstractFile);
} catch(Exception ex) {
processEmails(emails, MboxParser.getEmailIterator(emailFolder, file, abstractFile.getId()), abstractFile);
} catch (Exception ex) {
logger.log(Level.WARNING, String.format("Failed to close mbox temp file %s", file.getAbsolutePath()));
}
@ -451,16 +451,9 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
List<AbstractFile> derivedFiles = new ArrayList<>();
AccountFileInstanceCache accountFileInstanceCache = new AccountFileInstanceCache(abstractFile, currentCase);
// BlackboardArtifact msgArtifact = addEmailArtifact(message, abstractFile, accountFileInstanceCache);
createEmailArtifact(message, abstractFile, accountFileInstanceCache, derivedFiles);
accountFileInstanceCache.clear();
// if ((msgArtifact != null) && (message.hasAttachment())) {
// derivedFiles.addAll(handleAttachments(message.getAttachments(), abstractFile, msgArtifact));
// }
if (derivedFiles.isEmpty() == false) {
for (AbstractFile derived : derivedFiles) {
services.fireModuleContentEvent(new ModuleContentEvent(derived));
@ -539,41 +532,34 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
// Putting try/catch around this to catch any exception and still allow
// the creation of the artifacts to continue.
try{
try {
EmailMessageThreader.threadMessages(partialEmailsForThreading);
} catch(Exception ex) {
} catch (Exception ex) {
logger.log(Level.WARNING, String.format("Exception thrown parsing emails from %s", abstractFile.getName()), ex);
}
List<AbstractFile> derivedFiles = new ArrayList<>();
int msgCnt = 0;
while(fullMessageIterator.hasNext()) {
while (fullMessageIterator.hasNext()) {
if (context.fileIngestIsCancelled()) {
return;
}
EmailMessage current = fullMessageIterator.next();
if(current == null) {
if (current == null) {
continue;
}
if(partialEmailsForThreading.size() > msgCnt) {
if (partialEmailsForThreading.size() > msgCnt) {
EmailMessage threaded = partialEmailsForThreading.get(msgCnt++);
if(threaded.getMessageID().equals(current.getMessageID()) &&
threaded.getSubject().equals(current.getSubject())) {
if (threaded.getMessageID().equals(current.getMessageID())
&& threaded.getSubject().equals(current.getSubject())) {
current.setMessageThreadID(threaded.getMessageThreadID());
}
}
// BlackboardArtifact msgArtifact = addEmailArtifact(current, abstractFile, accountFileInstanceCache);
//
// if ((msgArtifact != null) && (current.hasAttachment())) {
// derivedFiles.addAll(handleAttachments(current.getAttachments(), abstractFile, msgArtifact ));
// }
createEmailArtifact(current, abstractFile, accountFileInstanceCache, derivedFiles);
}
@ -592,10 +578,10 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
BlackboardArtifact msgArtifact = addEmailArtifact(email, abstractFile, accountFileInstanceCache);
if ((msgArtifact != null) && (email.hasAttachment())) {
derivedFiles.addAll(handleAttachments(email.getAttachments(), abstractFile, msgArtifact ));
derivedFiles.addAll(handleAttachments(email.getAttachments(), abstractFile, msgArtifact));
for (EmailMessage.Attachment attach : email.getAttachments()) {
if(attach instanceof AttachedEmailMessage) {
if (attach instanceof AttachedEmailMessage) {
createEmailArtifact(((AttachedEmailMessage) attach).getEmailMessage(), abstractFile, accountFileInstanceCache, derivedFiles);
}
}
@ -614,7 +600,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
*/
@NbBundle.Messages({
"ThunderbirdMboxFileIngestModule.handleAttch.addAttachmentsErrorMsg=Failed to add attachments to email message."
})
})
private List<AbstractFile> handleAttachments(List<EmailMessage.Attachment> attachments, AbstractFile abstractFile, BlackboardArtifact messageArtifact) {
List<AbstractFile> files = new ArrayList<>();
List<FileAttachment> fileAttachments = new ArrayList<>();
@ -648,7 +634,6 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
}
try {
communicationArtifactsHelper.addAttachments(messageArtifact, new MessageAttachments(fileAttachments, Collections.emptyList()));
} catch (TskCoreException ex) {
@ -675,7 +660,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
/**
* Finds and returns a set of unique email addresses found in the input string
* Finds and returns a set of unique email addresses found in the input
* string
*
* @param input - input string, like the To/CC line from an email header
*
@ -687,7 +673,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
Matcher m = p.matcher(input);
Set<String> emailAddresses = new HashSet<>();
while (m.find()) {
emailAddresses.add( m.group());
emailAddresses.add(m.group());
}
return emailAddresses;
}
@ -733,12 +719,10 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
senderAddress = senderAddressList.get(0);
try {
senderAccountInstance = accountFileInstanceCache.getAccountInstance(senderAddress);
}
catch(TskCoreException ex) {
} catch (TskCoreException ex) {
logger.log(Level.WARNING, "Failed to create account for email address " + senderAddress, ex); //NON-NLS
}
}
else {
} else {
logger.log(Level.WARNING, "Failed to find sender address, from = {0}", from); //NON-NLS
}
@ -759,8 +743,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
try {
AccountFileInstance recipientAccountInstance = accountFileInstanceCache.getAccountInstance(addr);
recipientAccountInstances.add(recipientAccountInstance);
}
catch(TskCoreException ex) {
} catch (TskCoreException ex) {
logger.log(Level.WARNING, "Failed to create account for email address " + addr, ex); //NON-NLS
}
}
@ -786,7 +769,6 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
addArtifactAttribute(rtf, ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_RTF, bbattributes);
addArtifactAttribute(threadID, ATTRIBUTE_TYPE.TSK_THREAD_ID, bbattributes);
try {
if (context.fileIngestIsCancelled()) {
return null;
@ -800,7 +782,7 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
// Add account relationships
currentCase.getSleuthkitCase().getCommunicationsManager().addRelationships(senderAccountInstance, recipientAccountInstances, bbart,Relationship.Type.MESSAGE, dateL);
currentCase.getSleuthkitCase().getCommunicationsManager().addRelationships(senderAccountInstance, recipientAccountInstances, bbart, Relationship.Type.MESSAGE, dateL);
if (context.fileIngestIsCancelled()) {
return null;
@ -860,22 +842,24 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
/**
* Cache for storing AccountFileInstance.
* The idea is that emails will be used multiple times in a file and
* we shouldn't do a database lookup each time.
* Cache for storing AccountFileInstance. The idea is that emails will be
* used multiple times in a file and we shouldn't do a database lookup each
* time.
*/
static private class AccountFileInstanceCache {
private final Map<String, AccountFileInstance> cacheMap;
private final AbstractFile file;
private final Case currentCase;
/**
* Create a new cache. Caches are linked to a specific file.
*
* @param file
* @param currentCase
*/
AccountFileInstanceCache(AbstractFile file, Case currentCase) {
cacheMap= new HashMap<>();
cacheMap = new HashMap<>();
this.file = file;
this.currentCase = currentCase;
}
@ -894,8 +878,8 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return cacheMap.get(email);
}
AccountFileInstance accountInstance =
currentCase.getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.EMAIL, email,
AccountFileInstance accountInstance
= currentCase.getSleuthkitCase().getCommunicationsManager().createAccountFileInstance(Account.Type.EMAIL, email,
EmailParserModuleFactory.getModuleName(), file);
cacheMap.put(email, accountInstance);
return accountInstance;