Reduced the memory foot print of the email parser

This commit is contained in:
Kelly Kelly 2019-07-02 15:17:45 -04:00
parent 516d987bb8
commit f05cb9590b
6 changed files with 541 additions and 186 deletions

View File

@ -13,5 +13,6 @@
<dependency conf="autopsy->default" org="org.apache.james" name="apache-mime4j-mbox-iterator" rev="0.8.0"/>
<dependency conf="autopsy->default" org="com.googlecode.ez-vcard" name="ez-vcard" rev="0.10.5"/>
<dependency conf="autopsy->default" org="com.github.mangstadt" name="vinnie" rev="2.0.2"/>
<dependency org="com.google.guava" name="guava" rev="19.0"/>
</dependencies>
</ivy-module>

View File

@ -1,13 +1,19 @@
file.reference.apache-mime4j-core-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar
file.reference.apache-mime4j-dom-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar
file.reference.apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar
file.reference.commons-lang3-3.8.1.jar=release/modules/ext/commons-lang3-3.8.1.jar
file.reference.apache-mime4j-core-0.8.0.jar=release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar
file.reference.apache-mime4j-dom-0.8.0.jar=release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar
file.reference.apache-mime4j-mbox-iterator-0.8.0.jar=release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar
file.reference.guava-19.0.jar=release/modules/ext/guava-19.0.jar
file.reference.java-libpst-1.0-SNAPSHOT.jar=release/modules/ext/java-libpst-1.0-SNAPSHOT.jar
file.reference.ez-vcard-0.10.5.jar=release/modules/ext/ez-vcard-0.10.5.jar
file.reference.vinnie-2.0.2.jar=release/modules/ext/vinnie-2.0.2.jar
javac.source=1.8
javac.compilerargs=-Xlint -Xlint:-serial
javadoc.reference.guava-19.0.jar=release/modules/ext/guava-19.0-javadoc.jar
license.file=../LICENSE-2.0.txt
nbm.homepage=http://www.sleuthkit.org/autopsy/
nbm.needs.restart=true
source.reference.guava-19.0.jar=release/modules/ext/guava-19.0-sources.jar
spec.version.base=4.0

View File

@ -54,18 +54,6 @@
<runtime-relative-path>ext/commons-lang3-3.8.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/commons-lang3-3.8.1.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/java-libpst-1.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/java-libpst-1.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar</binary-origin>
@ -74,6 +62,22 @@
<runtime-relative-path>ext/ez-vcard-0.10.5.jar</runtime-relative-path>
<binary-origin>release/modules/ext/ez-vcard-0.10.5.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/java-libpst-1.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/java-libpst-1.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/guava-19.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/guava-19.0.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</runtime-relative-path>
<binary-origin>release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/vinnie-2.0.2.jar</runtime-relative-path>
<binary-origin>release/modules/ext/vinnie-2.0.2.jar</binary-origin>

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2014 Basis Technology Corp.
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -33,6 +33,7 @@ import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.logging.Level;
@ -57,22 +58,17 @@ import org.apache.tika.parser.txt.CharsetDetector;
import org.apache.tika.parser.txt.CharsetMatch;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.ingest.IngestServices;
import org.sleuthkit.datamodel.TskData;
import org.sleuthkit.datamodel.EncodedFileOutputStream;
/**
* A parser that extracts information about email messages and attachments from
* a mbox file.
*
* @author jwallace
* An Iterator for parsing mbox files. Wraps an instance of MBoxEmailIterator.
*/
class MboxParser {
class MboxParser implements Iterator<EmailMessage>{
private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
private DefaultMessageBuilder messageBuilder;
private IngestServices services;
private StringBuilder errors;
private final DefaultMessageBuilder messageBuilder;
private final StringBuilder errors;
/**
* The mime type string for html text.
@ -83,9 +79,11 @@ class MboxParser {
* The local path of the mbox file.
*/
private String localPath;
private Iterator<EmailMessage> emailIterator = null;
MboxParser(IngestServices services, String localPath) {
this.services = services;
private MboxParser( String localPath) {
this.localPath = localPath;
messageBuilder = new DefaultMessageBuilder();
MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
@ -97,63 +95,76 @@ class MboxParser {
static boolean isValidMimeTypeMbox(byte[] buffer) {
return (new String(buffer)).startsWith("From "); //NON-NLS
}
/**
* Parse the mbox file and get the email messages.
*
* @param mboxFile
*
* @return a list of the email messages in the mbox file.
* Returns an instance of MBoxParser that will iterate and return EMailMessage
* objects with only the information needed for threading emails.
*
* @param localPath String path to the mboxFile
* @param mboxFile The mboxFile to parse
* @return Instance of MboxParser
*/
List<EmailMessage> parse(File mboxFile, long fileID) {
static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
MboxParser parser = new MboxParser(localPath);
parser.createIterator(mboxFile, 0, false);
return parser;
}
/**
* Returns an instance of MBoxParser that will iterate "whole" EmailMessages.
*
* @param localPath String path to the mboxFile
* @param mboxFile The mboxFile to parse
* @param fileID The fileID of the abstractFile that mboxFile was found
* @return Instance of MboxParser
*/
static MboxParser getEmailIterator(String localPath, File mboxFile, long fileID) {
MboxParser parser = new MboxParser(localPath);
parser.createIterator(mboxFile, fileID, true);
return parser;
}
/**
* Creates the real Iterator object instance.
*
* @param mboxFile The mboxFile to parse
* @param fileID The fileID of the abstractFile that mboxFile was found
* @param wholeMsg True if EmailMessage should have the whole message,
* not just the thread information.
*/
private void createIterator(File mboxFile, long fileID, boolean wholeMsg) {
// Detect possible charsets
List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
CharsetEncoder theEncoder = null;
Iterable<CharBufferWrapper> mboxIterator = null;
// Loop through the possible encoders and find the first one that works.
// That will usually be one of the first ones.
for (CharsetEncoder encoder : encoders) {
try {
mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
theEncoder = encoder;
Iterable<CharBufferWrapper> mboxIterable = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
if(mboxIterable != null) {
emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
}
break;
} catch (CharConversionException | UnsupportedCharsetException ex) {
// Not the right encoder
} catch (IllegalArgumentException ex) {
// Not the right encoder
} catch (IOException ex) {
logger.log(Level.WARNING, "couldn't find mbox file.", ex); //NON-NLS
logger.log(Level.WARNING, String.format("Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex); //NON-NLS
addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
return new ArrayList<>();
}
}
// If no encoders work, post an error message and return.
if (mboxIterator == null || theEncoder == null) {
addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.couldntFindCharset"));
return new ArrayList<>();
}
List<EmailMessage> emails = new ArrayList<>();
long failCount = 0;
// Parse each message and extract an EmailMessage structure
for (CharBufferWrapper message : mboxIterator) {
try {
Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
emails.add(extractEmail(msg, fileID));
} catch (RuntimeException | IOException ex) {
logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
failCount++;
}
}
if (failCount > 0) {
addErrorMessage(
NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
}
return emails;
}
@Override
public boolean hasNext() {
return emailIterator != null ? emailIterator.hasNext() : false;
}
@Override
public EmailMessage next() {
return emailIterator != null ? emailIterator.next() : null;
}
String getErrors() {
@ -211,6 +222,44 @@ class MboxParser {
return email;
}
/**
* Extract the subject, inReplyTo, message-ID and references from the Message
* object and returns them in a new EmailMessage object.
*
* @param msg Message object
*
* @return EmailMessage instance with only some of the message information
*/
private EmailMessage extractPartialEmail(Message msg) {
EmailMessage email = new EmailMessage();
email.setSubject(msg.getSubject());
email.setMessageID(msg.getMessageId());
Field field = msg.getHeader().getField("in-reply-to"); //NON-NLS
String inReplyTo = null;
if (field != null) {
inReplyTo = field.getBody();
email.setInReplyToID(inReplyTo);
}
field = msg.getHeader().getField("references");
if (field != null) {
List<String> references = new ArrayList<>();
for (String id : field.getBody().split(">")) {
references.add(id.trim() + ">");
}
if (!references.contains(inReplyTo)) {
references.add(inReplyTo);
}
email.setReferences(references);
}
return email;
}
/**
* Handle a multipart mime message. Recursively calls handleMultipart if one
@ -333,7 +382,7 @@ class MboxParser {
addErrorMessage(
NbBundle.getMessage(this.getClass(),
"MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
return;
}
@ -346,14 +395,14 @@ class MboxParser {
// This could potentially be other types. Only seen this once.
}
} catch (IOException ex) {
logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
logger.log(Level.WARNING, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
return;
} finally {
try {
fos.close();
} catch (IOException ex) {
logger.log(Level.INFO, "Failed to close file output stream", ex); //NON-NLS
logger.log(Level.WARNING, "Failed to close file output stream", ex); //NON-NLS
}
}
@ -441,7 +490,7 @@ class MboxParser {
try {
is.close();
} catch (IOException ex) {
logger.log(Level.INFO, "Failed to close input stream"); //NON-NLS
logger.log(Level.WARNING, "Failed to close input stream"); //NON-NLS
}
}
}
@ -449,4 +498,45 @@ class MboxParser {
private void addErrorMessage(String msg) {
errors.append("<li>").append(msg).append("</li>"); //NON-NLS
}
/**
* An Interator for mbox email messages.
*/
final class MBoxEmailIterator implements Iterator<EmailMessage> {
private final Iterator<CharBufferWrapper> mboxIterator;
private final CharsetEncoder encoder;
private final long fileID;
private final boolean wholeMsg;
MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder, long fileID, boolean wholeMsg) {
mboxIterator = mboxIter;
this.encoder = encoder;
this.fileID = fileID;
this.wholeMsg = wholeMsg;
}
@Override
public boolean hasNext() {
return (mboxIterator != null && encoder != null) ? mboxIterator.hasNext() : false;
}
@Override
public EmailMessage next() {
CharBufferWrapper messageBuffer = mboxIterator.next();
try {
Message msg = messageBuilder.parseMessage(messageBuffer.asInputStream(encoder.charset()));
if(wholeMsg) {
return extractEmail(msg, fileID);
} else {
return extractPartialEmail(msg);
}
} catch (RuntimeException | IOException ex) {
logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
}
return null;
}
}
}

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011-2014 Basis Technology Corp.
* Copyright 2011-2019 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -18,6 +18,7 @@
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import com.google.common.collect.Iterables;
import com.pff.PSTAttachment;
import com.pff.PSTException;
import com.pff.PSTFile;
@ -29,6 +30,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Scanner;
import java.util.logging.Level;
@ -55,112 +57,193 @@ class PstParser {
* First four bytes of a pst file.
*/
private static int PST_HEADER = 0x2142444E;
private IngestServices services;
/**
* A map of PSTMessages to their Local path within the file's internal
* directory structure.
*/
private List<EmailMessage> results;
private StringBuilder errors;
private final StringBuilder errors;
private final IngestServices services;
private PSTFile pstFile;
private long fileID;
private int failureCount = 0;
PstParser(IngestServices services) {
results = new ArrayList<>();
this.services = services;
errors = new StringBuilder();
this.services = services;
}
enum ParseResult {
OK, ERROR, ENCRYPT;
}
/**
* Parse and extract email messages from the pst/ost file.
*
* @param file A pst or ost file.
*
* @return ParseResult: OK on success, ERROR on an error, ENCRYPT if failed
* because the file is encrypted.
* Create an instance of PSTFile for the given File object.
*
* The constructor for PSTFile object will throw a generic PSTException if the
* file is encrypted.
* <a href=https://github.com/rjohnsondev/java-libpst/blob/5436a7abc8ac8c1622bf5dba0f4f9428fdbcd634/src/main/java/com/pff/PSTFile.java> PSTFile.java</a>
*
* @param file File to open
* @param fileID File id for use when creating the EmailMessage objects
* @return ParserResult value OK if the PSTFile was successfully created,
* ENCRYPT will be returned for PSTExceptions that matches at specific
* message or IllegalArgumentExceptions
*/
ParseResult parse(File file, long fileID) {
PSTFile pstFile;
long failures;
ParseResult open(File file, long fileID) {
if(file == null) {
return ParseResult.ERROR;
}
try {
pstFile = new PSTFile(file);
failures = processFolder(pstFile.getRootFolder(), "\\", true, fileID);
if (failures > 0) {
addErrorMessage(
NbBundle.getMessage(this.getClass(), "PstParser.parse.errMsg.failedToParseNMsgs", failures));
} catch(PSTException ex) {
// This is the message thrown from the PSTFile constructor if it
// detects that the file is encrypted.
if(ex.getMessage().equals("Only unencrypted and compressable PST files are supported at this time")) { //NON-NLS
logger.log(Level.INFO, "Found encrypted PST file."); //NON-NLS
return ParseResult.ENCRYPT;
}
return ParseResult.OK;
} catch (PSTException | IOException ex) {
String msg = file.getName() + ": Failed to create internal java-libpst PST file to parse:\n" + ex.getMessage(); //NON-NLS
logger.log(Level.WARNING, msg);
logger.log(Level.WARNING, msg, ex);
return ParseResult.ERROR;
} catch (IllegalArgumentException ex) {
} catch (IOException ex) {
String msg = file.getName() + ": Failed to create internal java-libpst PST file to parse:\n" + ex.getMessage(); //NON-NLS
logger.log(Level.WARNING, msg, ex);
return ParseResult.ERROR;
} catch (IllegalArgumentException ex) { // Not sure if this is true, was in previous version of code.
logger.log(Level.INFO, "Found encrypted PST file."); //NON-NLS
return ParseResult.ENCRYPT;
}
return ParseResult.OK;
}
/**
* Get the results of the parsing.
*
* @return
* Creates an EmailMessage iterator for pstFile. These Email objects will be
* complete and with all available information.
*
* @return A instance of an EmailMessage Iterator
*/
List<EmailMessage> getResults() {
return results;
Iterator<EmailMessage> getEmailMessageIterator() {
if(pstFile == null) {
return null;
}
Iterable<EmailMessage> iterable = null;
try {
iterable = getEmaiMessageIterator(pstFile.getRootFolder(), "\\", fileID, true);
} catch (PSTException | IOException ex) {
logger.log(Level.WARNING, String.format("Exception thrown while parsing fileID: %d", fileID), ex);
}
if(iterable == null) {
return null;
}
return iterable.iterator();
}
/**
* Get a List of EmailMessages which contain only the information needed for
* threading the emails.
*
* @return A list of EmailMessage or an empty list if non were found.
*/
List<EmailMessage> getPartialEmailMessages() {
List<EmailMessage> messages = new ArrayList<>();
Iterator<EmailMessage> iterator = getPartialEmailMessageIterator();
if(iterator != null) {
while(iterator.hasNext()) {
messages.add(iterator.next());
}
}
return messages;
}
/**
*
* @return
*/
String getErrors() {
return errors.toString();
}
/**
* Process this folder and all subfolders, adding every email found to
* results. Accumulates the folder hierarchy path as it navigates the folder
* structure.
*
* @param folder The folder to navigate and process
* @param path The path to the folder within the pst/ost file's directory
* structure
*
* @throws PSTException
* @throws IOException
int getFailureCount() {
return failureCount;
}
/**
* Get an Iterator to which will iterate over the PSTFile, but return EmailMessages
* with only the information needed for putting the emails into threads.
*
* @return A EmailMessage iterator or null if no messages where found
*/
private long processFolder(PSTFolder folder, String path, boolean root, long fileID) {
String newPath = (root ? path : path + "\\" + folder.getDisplayName());
long failCount = 0L; // Number of emails that failed
if (folder.hasSubfolders()) {
List<PSTFolder> subFolders;
try {
subFolders = folder.getSubFolders();
} catch (PSTException | IOException ex) {
subFolders = new ArrayList<>();
logger.log(Level.INFO, "java-libpst exception while getting subfolders: {0}", ex.getMessage()); //NON-NLS
}
for (PSTFolder f : subFolders) {
failCount += processFolder(f, newPath, false, fileID);
}
private Iterator<EmailMessage> getPartialEmailMessageIterator() {
if(pstFile == null) {
return null;
}
if (folder.getContentCount() != 0) {
PSTMessage email;
// A folder's children are always emails, never other folders.
try {
while ((email = (PSTMessage) folder.getNextChild()) != null) {
results.add(extractEmailMessage(email, newPath, fileID));
}
} catch (PSTException | IOException ex) {
failCount++;
logger.log(Level.INFO, "java-libpst exception while getting emails from a folder: {0}", ex.getMessage()); //NON-NLS
}
Iterable<EmailMessage> iterable = null;
try {
iterable = getEmaiMessageIterator(pstFile.getRootFolder(), "\\", fileID, false);
} catch (PSTException | IOException ex) {
logger.log(Level.WARNING, String.format("Exception thrown while parsing fileID: %d", fileID), ex);
}
return failCount;
if(iterable == null) {
return null;
}
return iterable.iterator();
}
/**
* Creates an Iterable object of Email messages for the given folder.
*
* @param folder PSTFolder to process
* @param path String path to folder
* @param fileID FileID of the AbstractFile folder was found in
* @param partialEmail Whether or not fill the EMailMessage with all data
*
* @return An Iterable for iterating email message, or null if there were no
* messages or children in folder.
*
* @throws PSTException
* @throws IOException
*/
private Iterable<EmailMessage> getEmaiMessageIterator(PSTFolder folder, String path, long fileID, boolean wholeMsg) throws PSTException, IOException {
Iterable<EmailMessage> iterable = null;
if(folder.getContentCount() > 0) {
iterable = new PstEmailIterator(folder, path, fileID, wholeMsg).getIterable();
}
if(folder.hasSubfolders()) {
List<PSTFolder> subFolders = folder.getSubFolders();
for(PSTFolder subFolder: subFolders) {
String newpath = path + "\\" + subFolder.getDisplayName();
Iterable<EmailMessage> subIterable = getEmaiMessageIterator(subFolder, newpath, fileID, wholeMsg);
if(subIterable == null) {
continue;
}
if(iterable != null) {
iterable = Iterables.concat(iterable, subIterable);
} else {
iterable = subIterable;
}
}
}
return iterable;
}
/**
* Create an EmailMessage from a PSTMessage.
*
@ -214,6 +297,33 @@ class PstParser {
return email;
}
/**
* Create an EmailMessage from a PSTMessage with only the information
* needed for threading emails.
*
* @return EmailMessage object with only some information, not all of the msg.
*/
private EmailMessage extractPartialEmailMessage(PSTMessage msg) {
EmailMessage email = new EmailMessage();
email.setSubject(msg.getSubject());
email.setId(msg.getDescriptorNodeId());
email.setMessageID(msg.getInternetMessageId());
String inReplyToID = msg.getInReplyToId();
email.setInReplyToID(inReplyToID);
List<String> references = extractReferences(msg.getTransportMessageHeaders());
if (inReplyToID != null && !inReplyToID.isEmpty()) {
if (references == null) {
references = new ArrayList<>();
references.add(inReplyToID);
} else if (!references.contains(inReplyToID)) {
references.add(inReplyToID);
}
}
email.setReferences(references);
return email;
}
/**
* Add the attachments within the PSTMessage to the EmailMessage.
@ -388,5 +498,95 @@ class PstParser {
}
return null;
}
/**
* A iterator for processing the PST email folder structure and returning
* instances of the EmailMessage object.
*/
private final class PstEmailIterator implements Iterator<EmailMessage> {
private final PSTFolder folder;
private EmailMessage current;
private EmailMessage next;
private final String currentPath;
private final long fileID;
private final boolean wholeMsg;
/**
* Class constructor, initializes the "next" message;
*
* @param folder PSTFolder object to iterate across
* @param path String path value to the location of folder
* @param fileID Long fileID of the abstract file this PSTFolder was found
*/
PstEmailIterator(PSTFolder folder, String path, long fileID, boolean wholeMsg) {
this.folder = folder;
this.fileID = fileID;
this.currentPath = path;
this.wholeMsg = wholeMsg;
if(folder.getContentCount() > 0) {
try {
PSTMessage message = (PSTMessage)folder.getNextChild();
if(message != null) {
if(wholeMsg) {
next = extractEmailMessage(message, currentPath, fileID);
} else {
next = extractPartialEmailMessage(message);
}
}
} catch (PSTException | IOException ex) {
failureCount++;
logger.log(Level.WARNING, String.format("Unable to extract emails for path: %s file ID: %d ", path, fileID), ex);
}
}
}
@Override
public boolean hasNext() {
return next != null;
}
@Override
public EmailMessage next() {
current = next;
try {
PSTMessage message = (PSTMessage)folder.getNextChild();
if(message != null) {
if(wholeMsg) {
next = extractEmailMessage(message, currentPath, fileID);
} else {
next = extractPartialEmailMessage(message);
}
} else {
next = null;
}
} catch (PSTException | IOException ex) {
logger.log(Level.WARNING, String.format("Unable to extract emails for path: %s file ID: %d ", currentPath, fileID), ex);
failureCount++;
next = null;
}
return current;
}
/**
* Get a wrapped Iterable version of PstEmailIterator
*
* @return Iterable wrapping this class
*/
Iterable<EmailMessage> getIterable(){
return new Iterable<EmailMessage>(){
@Override
public Iterator<EmailMessage> iterator() {
return PstEmailIterator.this;
}
};
}
}
}

View File

@ -23,6 +23,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
@ -183,51 +184,53 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
}
PstParser parser = new PstParser(services);
PstParser.ParseResult result = parser.parse(file, abstractFile.getId());
PstParser.ParseResult result = parser.open(file, abstractFile.getId());
if (result == PstParser.ParseResult.OK) {
// parse success: Process email and add artifacts
processEmails(parser.getResults(), abstractFile);
} else if (result == PstParser.ParseResult.ENCRYPT) {
// encrypted pst: Add encrypted file artifact
try {
BlackboardArtifact artifact = abstractFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
artifact.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME, EmailParserModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.encryptionFileLevel")));
switch( result) {
case OK:
processEmails(parser.getPartialEmailMessages(), parser.getEmailMessageIterator(), abstractFile);
break;
case ENCRYPT:
// encrypted pst: Add encrypted file artifact
try {
// index the artifact for keyword search
blackboard.indexArtifact(artifact);
} catch (Blackboard.BlackboardException ex) {
MessageNotifyUtil.Notify.error(Bundle.ThunderbirdMboxFileIngestModule_processPst_indexError_message(), artifact.getDisplayName());
logger.log(Level.SEVERE, "Unable to index blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
}
BlackboardArtifact artifact = abstractFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
artifact.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME, EmailParserModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.encryptionFileLevel")));
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED));
} catch (TskCoreException ex) {
logger.log(Level.INFO, "Failed to add encryption attribute to file: {0}", abstractFile.getName()); //NON-NLS
}
} else {
// parsing error: log message
postErrorMessage(
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg",
abstractFile.getName()),
NbBundle.getMessage(this.getClass(),
"ThunderbirdMboxFileIngestModule.processPst.errProcFile.details"));
logger.log(Level.INFO, "PSTParser failed to parse {0}", abstractFile.getName()); //NON-NLS
return ProcessResult.ERROR;
try {
// index the artifact for keyword search
blackboard.indexArtifact(artifact);
} catch (Blackboard.BlackboardException ex) {
MessageNotifyUtil.Notify.error(Bundle.ThunderbirdMboxFileIngestModule_processPst_indexError_message(), artifact.getDisplayName());
logger.log(Level.SEVERE, "Unable to index blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
}
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED));
} catch (TskCoreException ex) {
logger.log(Level.INFO, "Failed to add encryption attribute to file: {0}", abstractFile.getName()); //NON-NLS
}
break;
default:
// parsing error: log message
postErrorMessage(
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg",
abstractFile.getName()),
NbBundle.getMessage(this.getClass(),
"ThunderbirdMboxFileIngestModule.processPst.errProcFile.details"));
logger.log(Level.INFO, "PSTParser failed to parse {0}", abstractFile.getName()); //NON-NLS
return ProcessResult.ERROR;
}
if (file.delete() == false) {
logger.log(Level.INFO, "Failed to delete temp file: {0}", file.getName()); //NON-NLS
}
String errors = parser.getErrors();
if (errors.isEmpty() == false) {
postErrorMessage(
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg2",
abstractFile.getName()), errors);
}
// String errors = parser.getErrors();
// if (errors.isEmpty() == false) {
// postErrorMessage(
// NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg2",
// abstractFile.getName()), errors);
// }
return ProcessResult.OK;
}
@ -281,21 +284,29 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
return ProcessResult.OK;
}
MboxParser parser = new MboxParser(services, emailFolder);
List<EmailMessage> emails = parser.parse(file, abstractFile.getId());
processEmails(emails, abstractFile);
MboxParser emailIterator = MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId());
List<EmailMessage> emails = new ArrayList<>();
if(emailIterator != null) {
while(emailIterator.hasNext()) {
EmailMessage emailMessage = emailIterator.next();
if(emailMessage != null) {
emails.add(emailMessage);
}
}
String errors = emailIterator.getErrors();
if (!errors.isEmpty()) {
postErrorMessage(
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processMBox.errProcFile.msg2",
abstractFile.getName()), errors);
}
}
processEmails(emails, MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId()), abstractFile);
if (file.delete() == false) {
logger.log(Level.INFO, "Failed to delete temp file: {0}", file.getName()); //NON-NLS
}
String errors = parser.getErrors();
if (errors.isEmpty() == false) {
postErrorMessage(
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processMBox.errProcFile.msg2",
abstractFile.getName()), errors);
}
return ProcessResult.OK;
}
@ -434,7 +445,50 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
context.addFilesToJob(derivedFiles);
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
}
private void processEmails(List<EmailMessage> partialEmailsForThreading, Iterator<EmailMessage> fullMessageIterator, AbstractFile abstractFile) {
// Putting try/catch around this to catch any exception and still allow
// the creation of the artifacts to continue.
try{
EmailMessageThreader.threadMessages(partialEmailsForThreading, String.format("%d", abstractFile.getId()));
} catch(Exception ex) {
logger.log(Level.WARNING, String.format("Exception thrown parsing emails from %s", abstractFile.getName()), ex);
}
List<AbstractFile> derivedFiles = new ArrayList<>();
int msgCnt = 0;
while(fullMessageIterator.hasNext()) {
EmailMessage current = fullMessageIterator.next();
if(current == null) {
continue;
}
if(partialEmailsForThreading.size() > msgCnt) {
EmailMessage threaded = partialEmailsForThreading.get(msgCnt++);
if(threaded.getMessageID().equals(current.getMessageID()) &&
threaded.getSubject().equals(current.getSubject())) {
current.setMessageThreadID(threaded.getMessageThreadID());
}
}
BlackboardArtifact msgArtifact = addEmailArtifact(current, abstractFile);
if ((msgArtifact != null) && (current.hasAttachment())) {
derivedFiles.addAll(handleAttachments(current.getAttachments(), abstractFile, msgArtifact ));
}
}
if (derivedFiles.isEmpty() == false) {
for (AbstractFile derived : derivedFiles) {
services.fireModuleContentEvent(new ModuleContentEvent(derived));
}
}
context.addFilesToJob(derivedFiles);
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
}
/**
* Add the given attachments as derived files and reschedule them for
* ingest.