mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
Reduced the memory foot print of the email parser
This commit is contained in:
parent
516d987bb8
commit
f05cb9590b
@ -13,5 +13,6 @@
|
||||
<dependency conf="autopsy->default" org="org.apache.james" name="apache-mime4j-mbox-iterator" rev="0.8.0"/>
|
||||
<dependency conf="autopsy->default" org="com.googlecode.ez-vcard" name="ez-vcard" rev="0.10.5"/>
|
||||
<dependency conf="autopsy->default" org="com.github.mangstadt" name="vinnie" rev="2.0.2"/>
|
||||
<dependency org="com.google.guava" name="guava" rev="19.0"/>
|
||||
</dependencies>
|
||||
</ivy-module>
|
||||
|
@ -1,13 +1,19 @@
|
||||
file.reference.apache-mime4j-core-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar
|
||||
file.reference.apache-mime4j-dom-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar
|
||||
file.reference.apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar
|
||||
file.reference.commons-lang3-3.8.1.jar=release/modules/ext/commons-lang3-3.8.1.jar
|
||||
file.reference.apache-mime4j-core-0.8.0.jar=release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar
|
||||
file.reference.apache-mime4j-dom-0.8.0.jar=release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar
|
||||
file.reference.apache-mime4j-mbox-iterator-0.8.0.jar=release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar
|
||||
file.reference.guava-19.0.jar=release/modules/ext/guava-19.0.jar
|
||||
file.reference.java-libpst-1.0-SNAPSHOT.jar=release/modules/ext/java-libpst-1.0-SNAPSHOT.jar
|
||||
file.reference.ez-vcard-0.10.5.jar=release/modules/ext/ez-vcard-0.10.5.jar
|
||||
file.reference.vinnie-2.0.2.jar=release/modules/ext/vinnie-2.0.2.jar
|
||||
javac.source=1.8
|
||||
javac.compilerargs=-Xlint -Xlint:-serial
|
||||
javadoc.reference.guava-19.0.jar=release/modules/ext/guava-19.0-javadoc.jar
|
||||
license.file=../LICENSE-2.0.txt
|
||||
nbm.homepage=http://www.sleuthkit.org/autopsy/
|
||||
nbm.needs.restart=true
|
||||
source.reference.guava-19.0.jar=release/modules/ext/guava-19.0-sources.jar
|
||||
spec.version.base=4.0
|
||||
|
@ -54,18 +54,6 @@
|
||||
<runtime-relative-path>ext/commons-lang3-3.8.1.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/commons-lang3-3.8.1.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/java-libpst-1.0-SNAPSHOT.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/java-libpst-1.0-SNAPSHOT.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar</binary-origin>
|
||||
@ -74,6 +62,22 @@
|
||||
<runtime-relative-path>ext/ez-vcard-0.10.5.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/ez-vcard-0.10.5.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/java-libpst-1.0-SNAPSHOT.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/java-libpst-1.0-SNAPSHOT.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/guava-19.0.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/guava-19.0.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar</binary-origin>
|
||||
</class-path-extension>
|
||||
<class-path-extension>
|
||||
<runtime-relative-path>ext/vinnie-2.0.2.jar</runtime-relative-path>
|
||||
<binary-origin>release/modules/ext/vinnie-2.0.2.jar</binary-origin>
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2011-2014 Basis Technology Corp.
|
||||
* Copyright 2011-2019 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -33,6 +33,7 @@ import java.nio.charset.IllegalCharsetNameException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.charset.UnsupportedCharsetException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
import java.util.logging.Level;
|
||||
@ -57,22 +58,17 @@ import org.apache.tika.parser.txt.CharsetDetector;
|
||||
import org.apache.tika.parser.txt.CharsetMatch;
|
||||
import org.openide.util.NbBundle;
|
||||
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
|
||||
import org.sleuthkit.autopsy.ingest.IngestServices;
|
||||
import org.sleuthkit.datamodel.TskData;
|
||||
import org.sleuthkit.datamodel.EncodedFileOutputStream;
|
||||
|
||||
/**
|
||||
* A parser that extracts information about email messages and attachments from
|
||||
* a mbox file.
|
||||
*
|
||||
* @author jwallace
|
||||
* An Iterator for parsing mbox files. Wraps an instance of MBoxEmailIterator.
|
||||
*/
|
||||
class MboxParser {
|
||||
class MboxParser implements Iterator<EmailMessage>{
|
||||
|
||||
private static final Logger logger = Logger.getLogger(MboxParser.class.getName());
|
||||
private DefaultMessageBuilder messageBuilder;
|
||||
private IngestServices services;
|
||||
private StringBuilder errors;
|
||||
private final DefaultMessageBuilder messageBuilder;
|
||||
private final StringBuilder errors;
|
||||
|
||||
/**
|
||||
* The mime type string for html text.
|
||||
@ -83,9 +79,11 @@ class MboxParser {
|
||||
* The local path of the mbox file.
|
||||
*/
|
||||
private String localPath;
|
||||
|
||||
private Iterator<EmailMessage> emailIterator = null;
|
||||
|
||||
|
||||
MboxParser(IngestServices services, String localPath) {
|
||||
this.services = services;
|
||||
private MboxParser( String localPath) {
|
||||
this.localPath = localPath;
|
||||
messageBuilder = new DefaultMessageBuilder();
|
||||
MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build();
|
||||
@ -97,63 +95,76 @@ class MboxParser {
|
||||
static boolean isValidMimeTypeMbox(byte[] buffer) {
|
||||
return (new String(buffer)).startsWith("From "); //NON-NLS
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the mbox file and get the email messages.
|
||||
*
|
||||
* @param mboxFile
|
||||
*
|
||||
* @return a list of the email messages in the mbox file.
|
||||
* Returns an instance of MBoxParser that will iterate and return EMailMessage
|
||||
* objects with only the information needed for threading emails.
|
||||
*
|
||||
* @param localPath String path to the mboxFile
|
||||
* @param mboxFile The mboxFile to parse
|
||||
* @return Instance of MboxParser
|
||||
*/
|
||||
List<EmailMessage> parse(File mboxFile, long fileID) {
|
||||
static MboxParser getThreadInfoIterator(String localPath, File mboxFile) {
|
||||
MboxParser parser = new MboxParser(localPath);
|
||||
parser.createIterator(mboxFile, 0, false);
|
||||
return parser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instance of MBoxParser that will iterate "whole" EmailMessages.
|
||||
*
|
||||
* @param localPath String path to the mboxFile
|
||||
* @param mboxFile The mboxFile to parse
|
||||
* @param fileID The fileID of the abstractFile that mboxFile was found
|
||||
* @return Instance of MboxParser
|
||||
*/
|
||||
static MboxParser getEmailIterator(String localPath, File mboxFile, long fileID) {
|
||||
MboxParser parser = new MboxParser(localPath);
|
||||
parser.createIterator(mboxFile, fileID, true);
|
||||
|
||||
return parser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the real Iterator object instance.
|
||||
*
|
||||
* @param mboxFile The mboxFile to parse
|
||||
* @param fileID The fileID of the abstractFile that mboxFile was found
|
||||
* @param wholeMsg True if EmailMessage should have the whole message,
|
||||
* not just the thread information.
|
||||
*/
|
||||
private void createIterator(File mboxFile, long fileID, boolean wholeMsg) {
|
||||
// Detect possible charsets
|
||||
List<CharsetEncoder> encoders = getPossibleEncoders(mboxFile);
|
||||
|
||||
CharsetEncoder theEncoder = null;
|
||||
Iterable<CharBufferWrapper> mboxIterator = null;
|
||||
|
||||
// Loop through the possible encoders and find the first one that works.
|
||||
// That will usually be one of the first ones.
|
||||
for (CharsetEncoder encoder : encoders) {
|
||||
try {
|
||||
mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
|
||||
theEncoder = encoder;
|
||||
Iterable<CharBufferWrapper> mboxIterable = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build();
|
||||
if(mboxIterable != null) {
|
||||
emailIterator = new MBoxEmailIterator(mboxIterable.iterator(), encoder, fileID, wholeMsg);
|
||||
}
|
||||
break;
|
||||
} catch (CharConversionException | UnsupportedCharsetException ex) {
|
||||
// Not the right encoder
|
||||
} catch (IllegalArgumentException ex) {
|
||||
// Not the right encoder
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, "couldn't find mbox file.", ex); //NON-NLS
|
||||
logger.log(Level.WARNING, String.format("Failed to open mbox file: %s %d", mboxFile.getName(), fileID), ex); //NON-NLS
|
||||
addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToReadFile"));
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
// If no encoders work, post an error message and return.
|
||||
if (mboxIterator == null || theEncoder == null) {
|
||||
addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.couldntFindCharset"));
|
||||
return new ArrayList<>();
|
||||
}
|
||||
|
||||
List<EmailMessage> emails = new ArrayList<>();
|
||||
long failCount = 0;
|
||||
|
||||
// Parse each message and extract an EmailMessage structure
|
||||
for (CharBufferWrapper message : mboxIterator) {
|
||||
try {
|
||||
Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset()));
|
||||
emails.add(extractEmail(msg, fileID));
|
||||
} catch (RuntimeException | IOException ex) {
|
||||
logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
|
||||
failCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (failCount > 0) {
|
||||
addErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(), "MboxParser.parse.errMsg.failedToParseNMsgs", failCount));
|
||||
}
|
||||
return emails;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return emailIterator != null ? emailIterator.hasNext() : false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmailMessage next() {
|
||||
return emailIterator != null ? emailIterator.next() : null;
|
||||
}
|
||||
|
||||
String getErrors() {
|
||||
@ -211,6 +222,44 @@ class MboxParser {
|
||||
|
||||
return email;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the subject, inReplyTo, message-ID and references from the Message
|
||||
* object and returns them in a new EmailMessage object.
|
||||
*
|
||||
* @param msg Message object
|
||||
*
|
||||
* @return EmailMessage instance with only some of the message information
|
||||
*/
|
||||
private EmailMessage extractPartialEmail(Message msg) {
|
||||
EmailMessage email = new EmailMessage();
|
||||
email.setSubject(msg.getSubject());
|
||||
email.setMessageID(msg.getMessageId());
|
||||
|
||||
Field field = msg.getHeader().getField("in-reply-to"); //NON-NLS
|
||||
String inReplyTo = null;
|
||||
|
||||
if (field != null) {
|
||||
inReplyTo = field.getBody();
|
||||
email.setInReplyToID(inReplyTo);
|
||||
}
|
||||
|
||||
field = msg.getHeader().getField("references");
|
||||
if (field != null) {
|
||||
List<String> references = new ArrayList<>();
|
||||
for (String id : field.getBody().split(">")) {
|
||||
references.add(id.trim() + ">");
|
||||
}
|
||||
|
||||
if (!references.contains(inReplyTo)) {
|
||||
references.add(inReplyTo);
|
||||
}
|
||||
|
||||
email.setReferences(references);
|
||||
}
|
||||
|
||||
return email;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a multipart mime message. Recursively calls handleMultipart if one
|
||||
@ -333,7 +382,7 @@ class MboxParser {
|
||||
addErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(),
|
||||
"MboxParser.handleAttch.errMsg.failedToCreateOnDisk", outPath));
|
||||
logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
|
||||
logger.log(Level.WARNING, "Failed to create file output stream for: " + outPath, ex); //NON-NLS
|
||||
return;
|
||||
}
|
||||
|
||||
@ -346,14 +395,14 @@ class MboxParser {
|
||||
// This could potentially be other types. Only seen this once.
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
|
||||
logger.log(Level.WARNING, "Failed to write mbox email attachment to disk.", ex); //NON-NLS
|
||||
addErrorMessage(NbBundle.getMessage(this.getClass(), "MboxParser.handleAttch.failedWriteToDisk", filename));
|
||||
return;
|
||||
} finally {
|
||||
try {
|
||||
fos.close();
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.INFO, "Failed to close file output stream", ex); //NON-NLS
|
||||
logger.log(Level.WARNING, "Failed to close file output stream", ex); //NON-NLS
|
||||
}
|
||||
}
|
||||
|
||||
@ -441,7 +490,7 @@ class MboxParser {
|
||||
try {
|
||||
is.close();
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.INFO, "Failed to close input stream"); //NON-NLS
|
||||
logger.log(Level.WARNING, "Failed to close input stream"); //NON-NLS
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -449,4 +498,45 @@ class MboxParser {
|
||||
private void addErrorMessage(String msg) {
|
||||
errors.append("<li>").append(msg).append("</li>"); //NON-NLS
|
||||
}
|
||||
|
||||
/**
|
||||
* An Interator for mbox email messages.
|
||||
*/
|
||||
final class MBoxEmailIterator implements Iterator<EmailMessage> {
|
||||
|
||||
private final Iterator<CharBufferWrapper> mboxIterator;
|
||||
private final CharsetEncoder encoder;
|
||||
private final long fileID;
|
||||
private final boolean wholeMsg;
|
||||
|
||||
MBoxEmailIterator(Iterator<CharBufferWrapper> mboxIter, CharsetEncoder encoder, long fileID, boolean wholeMsg) {
|
||||
mboxIterator = mboxIter;
|
||||
this.encoder = encoder;
|
||||
this.fileID = fileID;
|
||||
this.wholeMsg = wholeMsg;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return (mboxIterator != null && encoder != null) ? mboxIterator.hasNext() : false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmailMessage next() {
|
||||
CharBufferWrapper messageBuffer = mboxIterator.next();
|
||||
|
||||
try {
|
||||
Message msg = messageBuilder.parseMessage(messageBuffer.asInputStream(encoder.charset()));
|
||||
if(wholeMsg) {
|
||||
return extractEmail(msg, fileID);
|
||||
} else {
|
||||
return extractPartialEmail(msg);
|
||||
}
|
||||
} catch (RuntimeException | IOException ex) {
|
||||
logger.log(Level.WARNING, "Failed to get message from mbox: {0}", ex.getMessage()); //NON-NLS
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2011-2014 Basis Technology Corp.
|
||||
* Copyright 2011-2019 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ -18,6 +18,7 @@
|
||||
*/
|
||||
package org.sleuthkit.autopsy.thunderbirdparser;
|
||||
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.pff.PSTAttachment;
|
||||
import com.pff.PSTException;
|
||||
import com.pff.PSTFile;
|
||||
@ -29,6 +30,7 @@ import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
import java.util.logging.Level;
|
||||
@ -55,112 +57,193 @@ class PstParser {
|
||||
* First four bytes of a pst file.
|
||||
*/
|
||||
private static int PST_HEADER = 0x2142444E;
|
||||
private IngestServices services;
|
||||
/**
|
||||
* A map of PSTMessages to their Local path within the file's internal
|
||||
* directory structure.
|
||||
*/
|
||||
private List<EmailMessage> results;
|
||||
private StringBuilder errors;
|
||||
|
||||
private final StringBuilder errors;
|
||||
|
||||
private final IngestServices services;
|
||||
|
||||
private PSTFile pstFile;
|
||||
private long fileID;
|
||||
|
||||
private int failureCount = 0;
|
||||
|
||||
PstParser(IngestServices services) {
|
||||
results = new ArrayList<>();
|
||||
this.services = services;
|
||||
errors = new StringBuilder();
|
||||
this.services = services;
|
||||
}
|
||||
|
||||
enum ParseResult {
|
||||
|
||||
OK, ERROR, ENCRYPT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse and extract email messages from the pst/ost file.
|
||||
*
|
||||
* @param file A pst or ost file.
|
||||
*
|
||||
* @return ParseResult: OK on success, ERROR on an error, ENCRYPT if failed
|
||||
* because the file is encrypted.
|
||||
* Create an instance of PSTFile for the given File object.
|
||||
*
|
||||
* The constructor for PSTFile object will throw a generic PSTException if the
|
||||
* file is encrypted.
|
||||
* <a href=https://github.com/rjohnsondev/java-libpst/blob/5436a7abc8ac8c1622bf5dba0f4f9428fdbcd634/src/main/java/com/pff/PSTFile.java> PSTFile.java</a>
|
||||
*
|
||||
* @param file File to open
|
||||
* @param fileID File id for use when creating the EmailMessage objects
|
||||
* @return ParserResult value OK if the PSTFile was successfully created,
|
||||
* ENCRYPT will be returned for PSTExceptions that matches at specific
|
||||
* message or IllegalArgumentExceptions
|
||||
*/
|
||||
ParseResult parse(File file, long fileID) {
|
||||
PSTFile pstFile;
|
||||
long failures;
|
||||
ParseResult open(File file, long fileID) {
|
||||
if(file == null) {
|
||||
return ParseResult.ERROR;
|
||||
}
|
||||
|
||||
try {
|
||||
pstFile = new PSTFile(file);
|
||||
failures = processFolder(pstFile.getRootFolder(), "\\", true, fileID);
|
||||
if (failures > 0) {
|
||||
addErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(), "PstParser.parse.errMsg.failedToParseNMsgs", failures));
|
||||
} catch(PSTException ex) {
|
||||
// This is the message thrown from the PSTFile constructor if it
|
||||
// detects that the file is encrypted.
|
||||
if(ex.getMessage().equals("Only unencrypted and compressable PST files are supported at this time")) { //NON-NLS
|
||||
logger.log(Level.INFO, "Found encrypted PST file."); //NON-NLS
|
||||
return ParseResult.ENCRYPT;
|
||||
}
|
||||
return ParseResult.OK;
|
||||
} catch (PSTException | IOException ex) {
|
||||
String msg = file.getName() + ": Failed to create internal java-libpst PST file to parse:\n" + ex.getMessage(); //NON-NLS
|
||||
logger.log(Level.WARNING, msg);
|
||||
logger.log(Level.WARNING, msg, ex);
|
||||
return ParseResult.ERROR;
|
||||
} catch (IllegalArgumentException ex) {
|
||||
} catch (IOException ex) {
|
||||
String msg = file.getName() + ": Failed to create internal java-libpst PST file to parse:\n" + ex.getMessage(); //NON-NLS
|
||||
logger.log(Level.WARNING, msg, ex);
|
||||
return ParseResult.ERROR;
|
||||
} catch (IllegalArgumentException ex) { // Not sure if this is true, was in previous version of code.
|
||||
logger.log(Level.INFO, "Found encrypted PST file."); //NON-NLS
|
||||
return ParseResult.ENCRYPT;
|
||||
}
|
||||
|
||||
return ParseResult.OK;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the results of the parsing.
|
||||
*
|
||||
* @return
|
||||
* Creates an EmailMessage iterator for pstFile. These Email objects will be
|
||||
* complete and with all available information.
|
||||
*
|
||||
* @return A instance of an EmailMessage Iterator
|
||||
*/
|
||||
List<EmailMessage> getResults() {
|
||||
return results;
|
||||
Iterator<EmailMessage> getEmailMessageIterator() {
|
||||
if(pstFile == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
Iterable<EmailMessage> iterable = null;
|
||||
|
||||
try {
|
||||
iterable = getEmaiMessageIterator(pstFile.getRootFolder(), "\\", fileID, true);
|
||||
} catch (PSTException | IOException ex) {
|
||||
logger.log(Level.WARNING, String.format("Exception thrown while parsing fileID: %d", fileID), ex);
|
||||
}
|
||||
|
||||
if(iterable == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return iterable.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get a List of EmailMessages which contain only the information needed for
|
||||
* threading the emails.
|
||||
*
|
||||
* @return A list of EmailMessage or an empty list if non were found.
|
||||
*/
|
||||
List<EmailMessage> getPartialEmailMessages() {
|
||||
List<EmailMessage> messages = new ArrayList<>();
|
||||
Iterator<EmailMessage> iterator = getPartialEmailMessageIterator();
|
||||
if(iterator != null) {
|
||||
while(iterator.hasNext()) {
|
||||
messages.add(iterator.next());
|
||||
}
|
||||
}
|
||||
|
||||
return messages;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
String getErrors() {
|
||||
return errors.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Process this folder and all subfolders, adding every email found to
|
||||
* results. Accumulates the folder hierarchy path as it navigates the folder
|
||||
* structure.
|
||||
*
|
||||
* @param folder The folder to navigate and process
|
||||
* @param path The path to the folder within the pst/ost file's directory
|
||||
* structure
|
||||
*
|
||||
* @throws PSTException
|
||||
* @throws IOException
|
||||
|
||||
int getFailureCount() {
|
||||
return failureCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get an Iterator to which will iterate over the PSTFile, but return EmailMessages
|
||||
* with only the information needed for putting the emails into threads.
|
||||
*
|
||||
* @return A EmailMessage iterator or null if no messages where found
|
||||
*/
|
||||
private long processFolder(PSTFolder folder, String path, boolean root, long fileID) {
|
||||
String newPath = (root ? path : path + "\\" + folder.getDisplayName());
|
||||
long failCount = 0L; // Number of emails that failed
|
||||
if (folder.hasSubfolders()) {
|
||||
List<PSTFolder> subFolders;
|
||||
try {
|
||||
subFolders = folder.getSubFolders();
|
||||
} catch (PSTException | IOException ex) {
|
||||
subFolders = new ArrayList<>();
|
||||
logger.log(Level.INFO, "java-libpst exception while getting subfolders: {0}", ex.getMessage()); //NON-NLS
|
||||
}
|
||||
|
||||
for (PSTFolder f : subFolders) {
|
||||
failCount += processFolder(f, newPath, false, fileID);
|
||||
}
|
||||
private Iterator<EmailMessage> getPartialEmailMessageIterator() {
|
||||
if(pstFile == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (folder.getContentCount() != 0) {
|
||||
PSTMessage email;
|
||||
// A folder's children are always emails, never other folders.
|
||||
try {
|
||||
while ((email = (PSTMessage) folder.getNextChild()) != null) {
|
||||
results.add(extractEmailMessage(email, newPath, fileID));
|
||||
}
|
||||
} catch (PSTException | IOException ex) {
|
||||
failCount++;
|
||||
logger.log(Level.INFO, "java-libpst exception while getting emails from a folder: {0}", ex.getMessage()); //NON-NLS
|
||||
}
|
||||
|
||||
Iterable<EmailMessage> iterable = null;
|
||||
|
||||
try {
|
||||
iterable = getEmaiMessageIterator(pstFile.getRootFolder(), "\\", fileID, false);
|
||||
} catch (PSTException | IOException ex) {
|
||||
logger.log(Level.WARNING, String.format("Exception thrown while parsing fileID: %d", fileID), ex);
|
||||
}
|
||||
|
||||
return failCount;
|
||||
|
||||
if(iterable == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return iterable.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an Iterable object of Email messages for the given folder.
|
||||
*
|
||||
* @param folder PSTFolder to process
|
||||
* @param path String path to folder
|
||||
* @param fileID FileID of the AbstractFile folder was found in
|
||||
* @param partialEmail Whether or not fill the EMailMessage with all data
|
||||
*
|
||||
* @return An Iterable for iterating email message, or null if there were no
|
||||
* messages or children in folder.
|
||||
*
|
||||
* @throws PSTException
|
||||
* @throws IOException
|
||||
*/
|
||||
private Iterable<EmailMessage> getEmaiMessageIterator(PSTFolder folder, String path, long fileID, boolean wholeMsg) throws PSTException, IOException {
|
||||
Iterable<EmailMessage> iterable = null;
|
||||
|
||||
if(folder.getContentCount() > 0) {
|
||||
iterable = new PstEmailIterator(folder, path, fileID, wholeMsg).getIterable();
|
||||
}
|
||||
|
||||
if(folder.hasSubfolders()) {
|
||||
List<PSTFolder> subFolders = folder.getSubFolders();
|
||||
for(PSTFolder subFolder: subFolders) {
|
||||
String newpath = path + "\\" + subFolder.getDisplayName();
|
||||
Iterable<EmailMessage> subIterable = getEmaiMessageIterator(subFolder, newpath, fileID, wholeMsg);
|
||||
if(subIterable == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(iterable != null) {
|
||||
iterable = Iterables.concat(iterable, subIterable);
|
||||
} else {
|
||||
iterable = subIterable;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
return iterable;
|
||||
}
|
||||
/**
|
||||
* Create an EmailMessage from a PSTMessage.
|
||||
*
|
||||
@ -214,6 +297,33 @@ class PstParser {
|
||||
|
||||
return email;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an EmailMessage from a PSTMessage with only the information
|
||||
* needed for threading emails.
|
||||
*
|
||||
* @return EmailMessage object with only some information, not all of the msg.
|
||||
*/
|
||||
private EmailMessage extractPartialEmailMessage(PSTMessage msg) {
|
||||
EmailMessage email = new EmailMessage();
|
||||
email.setSubject(msg.getSubject());
|
||||
email.setId(msg.getDescriptorNodeId());
|
||||
email.setMessageID(msg.getInternetMessageId());
|
||||
String inReplyToID = msg.getInReplyToId();
|
||||
email.setInReplyToID(inReplyToID);
|
||||
List<String> references = extractReferences(msg.getTransportMessageHeaders());
|
||||
if (inReplyToID != null && !inReplyToID.isEmpty()) {
|
||||
if (references == null) {
|
||||
references = new ArrayList<>();
|
||||
references.add(inReplyToID);
|
||||
} else if (!references.contains(inReplyToID)) {
|
||||
references.add(inReplyToID);
|
||||
}
|
||||
}
|
||||
email.setReferences(references);
|
||||
|
||||
return email;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the attachments within the PSTMessage to the EmailMessage.
|
||||
@ -388,5 +498,95 @@ class PstParser {
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* A iterator for processing the PST email folder structure and returning
|
||||
* instances of the EmailMessage object.
|
||||
*/
|
||||
private final class PstEmailIterator implements Iterator<EmailMessage> {
|
||||
|
||||
private final PSTFolder folder;
|
||||
private EmailMessage current;
|
||||
private EmailMessage next;
|
||||
|
||||
private final String currentPath;
|
||||
private final long fileID;
|
||||
private final boolean wholeMsg;
|
||||
|
||||
/**
|
||||
* Class constructor, initializes the "next" message;
|
||||
*
|
||||
* @param folder PSTFolder object to iterate across
|
||||
* @param path String path value to the location of folder
|
||||
* @param fileID Long fileID of the abstract file this PSTFolder was found
|
||||
*/
|
||||
PstEmailIterator(PSTFolder folder, String path, long fileID, boolean wholeMsg) {
|
||||
this.folder = folder;
|
||||
this.fileID = fileID;
|
||||
this.currentPath = path;
|
||||
this.wholeMsg = wholeMsg;
|
||||
|
||||
if(folder.getContentCount() > 0) {
|
||||
try {
|
||||
PSTMessage message = (PSTMessage)folder.getNextChild();
|
||||
if(message != null) {
|
||||
if(wholeMsg) {
|
||||
next = extractEmailMessage(message, currentPath, fileID);
|
||||
} else {
|
||||
next = extractPartialEmailMessage(message);
|
||||
}
|
||||
}
|
||||
} catch (PSTException | IOException ex) {
|
||||
failureCount++;
|
||||
logger.log(Level.WARNING, String.format("Unable to extract emails for path: %s file ID: %d ", path, fileID), ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return next != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmailMessage next() {
|
||||
|
||||
current = next;
|
||||
|
||||
try {
|
||||
PSTMessage message = (PSTMessage)folder.getNextChild();
|
||||
if(message != null) {
|
||||
if(wholeMsg) {
|
||||
next = extractEmailMessage(message, currentPath, fileID);
|
||||
} else {
|
||||
next = extractPartialEmailMessage(message);
|
||||
}
|
||||
} else {
|
||||
next = null;
|
||||
}
|
||||
} catch (PSTException | IOException ex) {
|
||||
logger.log(Level.WARNING, String.format("Unable to extract emails for path: %s file ID: %d ", currentPath, fileID), ex);
|
||||
failureCount++;
|
||||
next = null;
|
||||
}
|
||||
|
||||
return current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a wrapped Iterable version of PstEmailIterator
|
||||
*
|
||||
* @return Iterable wrapping this class
|
||||
*/
|
||||
Iterable<EmailMessage> getIterable(){
|
||||
return new Iterable<EmailMessage>(){
|
||||
@Override
|
||||
public Iterator<EmailMessage> iterator() {
|
||||
return PstEmailIterator.this;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.logging.Level;
|
||||
@ -183,51 +184,53 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
|
||||
}
|
||||
|
||||
PstParser parser = new PstParser(services);
|
||||
PstParser.ParseResult result = parser.parse(file, abstractFile.getId());
|
||||
PstParser.ParseResult result = parser.open(file, abstractFile.getId());
|
||||
|
||||
if (result == PstParser.ParseResult.OK) {
|
||||
// parse success: Process email and add artifacts
|
||||
processEmails(parser.getResults(), abstractFile);
|
||||
|
||||
} else if (result == PstParser.ParseResult.ENCRYPT) {
|
||||
// encrypted pst: Add encrypted file artifact
|
||||
try {
|
||||
BlackboardArtifact artifact = abstractFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
|
||||
artifact.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME, EmailParserModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.encryptionFileLevel")));
|
||||
switch( result) {
|
||||
case OK:
|
||||
processEmails(parser.getPartialEmailMessages(), parser.getEmailMessageIterator(), abstractFile);
|
||||
break;
|
||||
|
||||
case ENCRYPT:
|
||||
// encrypted pst: Add encrypted file artifact
|
||||
try {
|
||||
// index the artifact for keyword search
|
||||
blackboard.indexArtifact(artifact);
|
||||
} catch (Blackboard.BlackboardException ex) {
|
||||
MessageNotifyUtil.Notify.error(Bundle.ThunderbirdMboxFileIngestModule_processPst_indexError_message(), artifact.getDisplayName());
|
||||
logger.log(Level.SEVERE, "Unable to index blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
|
||||
}
|
||||
BlackboardArtifact artifact = abstractFile.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED);
|
||||
artifact.addAttribute(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME, EmailParserModuleFactory.getModuleName(), NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.encryptionFileLevel")));
|
||||
|
||||
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED));
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.INFO, "Failed to add encryption attribute to file: {0}", abstractFile.getName()); //NON-NLS
|
||||
}
|
||||
} else {
|
||||
// parsing error: log message
|
||||
postErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg",
|
||||
abstractFile.getName()),
|
||||
NbBundle.getMessage(this.getClass(),
|
||||
"ThunderbirdMboxFileIngestModule.processPst.errProcFile.details"));
|
||||
logger.log(Level.INFO, "PSTParser failed to parse {0}", abstractFile.getName()); //NON-NLS
|
||||
return ProcessResult.ERROR;
|
||||
try {
|
||||
// index the artifact for keyword search
|
||||
blackboard.indexArtifact(artifact);
|
||||
} catch (Blackboard.BlackboardException ex) {
|
||||
MessageNotifyUtil.Notify.error(Bundle.ThunderbirdMboxFileIngestModule_processPst_indexError_message(), artifact.getDisplayName());
|
||||
logger.log(Level.SEVERE, "Unable to index blackboard artifact " + artifact.getArtifactID(), ex); //NON-NLS
|
||||
}
|
||||
|
||||
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_ENCRYPTION_DETECTED));
|
||||
} catch (TskCoreException ex) {
|
||||
logger.log(Level.INFO, "Failed to add encryption attribute to file: {0}", abstractFile.getName()); //NON-NLS
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// parsing error: log message
|
||||
postErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg",
|
||||
abstractFile.getName()),
|
||||
NbBundle.getMessage(this.getClass(),
|
||||
"ThunderbirdMboxFileIngestModule.processPst.errProcFile.details"));
|
||||
logger.log(Level.INFO, "PSTParser failed to parse {0}", abstractFile.getName()); //NON-NLS
|
||||
return ProcessResult.ERROR;
|
||||
}
|
||||
|
||||
if (file.delete() == false) {
|
||||
logger.log(Level.INFO, "Failed to delete temp file: {0}", file.getName()); //NON-NLS
|
||||
}
|
||||
|
||||
String errors = parser.getErrors();
|
||||
if (errors.isEmpty() == false) {
|
||||
postErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg2",
|
||||
abstractFile.getName()), errors);
|
||||
}
|
||||
// String errors = parser.getErrors();
|
||||
// if (errors.isEmpty() == false) {
|
||||
// postErrorMessage(
|
||||
// NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processPst.errProcFile.msg2",
|
||||
// abstractFile.getName()), errors);
|
||||
// }
|
||||
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
@ -281,21 +284,29 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
MboxParser parser = new MboxParser(services, emailFolder);
|
||||
List<EmailMessage> emails = parser.parse(file, abstractFile.getId());
|
||||
processEmails(emails, abstractFile);
|
||||
MboxParser emailIterator = MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId());
|
||||
List<EmailMessage> emails = new ArrayList<>();
|
||||
if(emailIterator != null) {
|
||||
while(emailIterator.hasNext()) {
|
||||
EmailMessage emailMessage = emailIterator.next();
|
||||
if(emailMessage != null) {
|
||||
emails.add(emailMessage);
|
||||
}
|
||||
}
|
||||
|
||||
String errors = emailIterator.getErrors();
|
||||
if (!errors.isEmpty()) {
|
||||
postErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processMBox.errProcFile.msg2",
|
||||
abstractFile.getName()), errors);
|
||||
}
|
||||
}
|
||||
processEmails(emails, MboxParser.getEmailIterator( emailFolder, file, abstractFile.getId()), abstractFile);
|
||||
|
||||
if (file.delete() == false) {
|
||||
logger.log(Level.INFO, "Failed to delete temp file: {0}", file.getName()); //NON-NLS
|
||||
}
|
||||
|
||||
String errors = parser.getErrors();
|
||||
if (errors.isEmpty() == false) {
|
||||
postErrorMessage(
|
||||
NbBundle.getMessage(this.getClass(), "ThunderbirdMboxFileIngestModule.processMBox.errProcFile.msg2",
|
||||
abstractFile.getName()), errors);
|
||||
}
|
||||
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
|
||||
@ -434,7 +445,50 @@ public final class ThunderbirdMboxFileIngestModule implements FileIngestModule {
|
||||
context.addFilesToJob(derivedFiles);
|
||||
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
|
||||
}
|
||||
|
||||
private void processEmails(List<EmailMessage> partialEmailsForThreading, Iterator<EmailMessage> fullMessageIterator, AbstractFile abstractFile) {
|
||||
// Putting try/catch around this to catch any exception and still allow
|
||||
// the creation of the artifacts to continue.
|
||||
try{
|
||||
EmailMessageThreader.threadMessages(partialEmailsForThreading, String.format("%d", abstractFile.getId()));
|
||||
} catch(Exception ex) {
|
||||
logger.log(Level.WARNING, String.format("Exception thrown parsing emails from %s", abstractFile.getName()), ex);
|
||||
}
|
||||
|
||||
List<AbstractFile> derivedFiles = new ArrayList<>();
|
||||
|
||||
int msgCnt = 0;
|
||||
while(fullMessageIterator.hasNext()) {
|
||||
EmailMessage current = fullMessageIterator.next();
|
||||
|
||||
if(current == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(partialEmailsForThreading.size() > msgCnt) {
|
||||
EmailMessage threaded = partialEmailsForThreading.get(msgCnt++);
|
||||
|
||||
if(threaded.getMessageID().equals(current.getMessageID()) &&
|
||||
threaded.getSubject().equals(current.getSubject())) {
|
||||
current.setMessageThreadID(threaded.getMessageThreadID());
|
||||
}
|
||||
}
|
||||
|
||||
BlackboardArtifact msgArtifact = addEmailArtifact(current, abstractFile);
|
||||
|
||||
if ((msgArtifact != null) && (current.hasAttachment())) {
|
||||
derivedFiles.addAll(handleAttachments(current.getAttachments(), abstractFile, msgArtifact ));
|
||||
}
|
||||
}
|
||||
|
||||
if (derivedFiles.isEmpty() == false) {
|
||||
for (AbstractFile derived : derivedFiles) {
|
||||
services.fireModuleContentEvent(new ModuleContentEvent(derived));
|
||||
}
|
||||
}
|
||||
context.addFilesToJob(derivedFiles);
|
||||
services.fireModuleDataEvent(new ModuleDataEvent(EmailParserModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
|
||||
}
|
||||
/**
|
||||
* Add the given attachments as derived files and reschedule them for
|
||||
* ingest.
|
||||
|
Loading…
x
Reference in New Issue
Block a user