diff --git a/thunderbirdparser/nbproject/project.properties b/thunderbirdparser/nbproject/project.properties index 22afe673d8..2ce6b9ccc4 100644 --- a/thunderbirdparser/nbproject/project.properties +++ b/thunderbirdparser/nbproject/project.properties @@ -1,3 +1,8 @@ +file.reference.apache-mime4j-core-0.8.0-SNAPSHOT-sources.jar=release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT-sources.jar +file.reference.apache-mime4j-core-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-core-0.8.0-SNAPSHOT.jar +file.reference.apache-mime4j-dom-0.8.0-SNAPSHOT-sources.jar=release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT-sources.jar +file.reference.apache-mime4j-dom-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-dom-0.8.0-SNAPSHOT.jar +file.reference.apache-mime4j-project-0.8.0-SNAPSHOT.jar=release/modules/ext/apache-mime4j-project-0.8.0-SNAPSHOT.jar file.reference.java-libpst-1.0-SNAPSHOT.jar=release/modules/ext/java-libpst-1.0-SNAPSHOT.jar javac.source=1.7 javac.compilerargs=-Xlint -Xlint:-serial diff --git a/thunderbirdparser/nbproject/project.xml b/thunderbirdparser/nbproject/project.xml index e1f2707e66..f15a8276ad 100644 --- a/thunderbirdparser/nbproject/project.xml +++ b/thunderbirdparser/nbproject/project.xml @@ -58,10 +58,6 @@ ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar - - ext/apache-mime4j-project-0.8.0-SNAPSHOT-tests.jar - release/modules/ext/apache-mime4j-project-0.8.0-SNAPSHOT-tests.jar - diff --git a/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar b/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar index 3195665280..f7248bd03e 100755 Binary files a/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar and b/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT-sources.jar differ diff --git a/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar b/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar index 2aa41fbb45..bb7d15da61 100755 Binary files a/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar and b/thunderbirdparser/release/modules/ext/apache-mime4j-mbox-iterator-0.8.0-SNAPSHOT.jar differ diff --git a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/EmailMessage.java b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/EmailMessage.java index 6352e5a1a2..f015857716 100755 --- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/EmailMessage.java +++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/EmailMessage.java @@ -53,7 +53,9 @@ public class EmailMessage { } void setRecipients(String recipients) { - this.recipients = recipients; + if (recipients != null) { + this.recipients = recipients; + } } String getSender() { @@ -61,7 +63,9 @@ public class EmailMessage { } void setSender(String sender) { - this.sender = sender; + if (sender != null) { + this.sender = sender; + } } String getSubject() { @@ -69,7 +73,9 @@ public class EmailMessage { } void setSubject(String subject) { - this.subject = subject; + if (subject != null) { + this.subject = subject; + } } String getTextBody() { @@ -77,7 +83,9 @@ public class EmailMessage { } void setTextBody(String textBody) { - this.textBody = textBody; + if (textBody != null) { + this.textBody = textBody; + } } String getHtmlBody() { @@ -85,7 +93,9 @@ public class EmailMessage { } void setHtmlBody(String htmlBody) { - this.htmlBody = htmlBody; + if (htmlBody != null) { + this.htmlBody = htmlBody; + } } String getRtfBody() { @@ -93,7 +103,9 @@ public class EmailMessage { } void setRtfBody(String rtfBody) { - this.rtfBody = rtfBody; + if (rtfBody != null) { + this.rtfBody = rtfBody; + } } long getSentDate() { @@ -101,7 +113,9 @@ public class EmailMessage { } void setSentDate(Date sentDate) { - this.sentDate = sentDate.getTime() / 1000; + if (sentDate != null) { + this.sentDate = sentDate.getTime() / 1000; + } } void setSentDate(long sentDate) { @@ -113,7 +127,9 @@ public class EmailMessage { } void setBcc(String bcc) { - this.bcc = bcc; + if (bcc != null) { + this.bcc = bcc; + } } String getCc() { @@ -121,7 +137,9 @@ public class EmailMessage { } void setCc(String cc) { - this.cc = cc; + if (cc != null) { + this.cc = cc; + } } void addAttachment(Attachment a) { @@ -146,7 +164,9 @@ public class EmailMessage { } void setLocalPath(String localPath) { - this.localPath = localPath; + if (localPath != null) { + this.localPath = localPath; + } } } @@ -170,7 +190,9 @@ class Attachment { } void setName(String name) { - this.name = name; + if (name != null) { + this.name = name; + } } String getLocalPath() { @@ -178,7 +200,9 @@ class Attachment { } void setLocalPath(String localPath) { - this.localPath = localPath; + if (localPath != null) { + this.localPath = localPath; + } } long getSize() { @@ -198,7 +222,9 @@ class Attachment { } void setCrTime(Date crTime) { - this.crTime = crTime.getTime() / 1000; + if (crTime != null) { + this.crTime = crTime.getTime() / 1000; + } } long getcTime() { @@ -210,7 +236,9 @@ class Attachment { } void setcTime(Date cTime) { - this.cTime = cTime.getTime() / 1000; + if (cTime != null) { + this.cTime = cTime.getTime() / 1000; + } } long getaTime() { @@ -222,7 +250,9 @@ class Attachment { } void setaTime(Date aTime) { - this.aTime = aTime.getTime() / 1000; + if (aTime != null) { + this.aTime = aTime.getTime() / 1000; + } } long getmTime() { @@ -234,6 +264,8 @@ class Attachment { } void setmTime(Date mTime) { - this.mTime = mTime.getTime() / 1000; + if (mTime != null) { + this.mTime = mTime.getTime() / 1000; + } } } \ No newline at end of file diff --git a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/MboxParser.java b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/MboxParser.java index 5ce7fee704..309b8a3403 100755 --- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/MboxParser.java +++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/MboxParser.java @@ -18,22 +18,29 @@ */ package org.sleuthkit.autopsy.thunderbirdparser; +import java.io.BufferedInputStream; import java.io.BufferedReader; +import java.io.CharConversionException; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; +import java.nio.charset.IllegalCharsetNameException; import java.nio.charset.StandardCharsets; +import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; -import org.apache.james.mime4j.MimeException; import org.apache.james.mime4j.dom.BinaryBody; +import org.apache.james.mime4j.dom.Body; import org.apache.james.mime4j.dom.Entity; import org.apache.james.mime4j.dom.Message; -import org.apache.james.mime4j.dom.MessageBuilder; import org.apache.james.mime4j.dom.Multipart; import org.apache.james.mime4j.dom.TextBody; import org.apache.james.mime4j.dom.address.AddressList; @@ -44,6 +51,10 @@ import org.apache.james.mime4j.dom.field.ContentTypeField; import org.apache.james.mime4j.mboxiterator.CharBufferWrapper; import org.apache.james.mime4j.mboxiterator.MboxIterator; import org.apache.james.mime4j.message.DefaultMessageBuilder; +import org.apache.james.mime4j.stream.MimeConfig; +import org.apache.tika.parser.txt.CharsetDetector; +import org.apache.tika.parser.txt.CharsetMatch; +import org.sleuthkit.autopsy.ingest.IngestServices; /** * A parser that extracts information about email messages and attachments from @@ -53,7 +64,9 @@ import org.apache.james.mime4j.message.DefaultMessageBuilder; */ public class MboxParser { private static final Logger logger = Logger.getLogger(MboxParser.class.getName()); - private MessageBuilder messageBuilder; + private DefaultMessageBuilder messageBuilder; + private IngestServices services; + /** * The mime type string for html text. */ @@ -64,9 +77,13 @@ public class MboxParser { */ private String localPath; - MboxParser(String localPath) { + MboxParser(IngestServices services, String localPath) { + this.services = services; this.localPath = localPath; messageBuilder = new DefaultMessageBuilder(); + MimeConfig config = MimeConfig.custom().setMaxLineLen(-1).build(); + // disable line length checks. + messageBuilder.setMimeEntityConfig(config); } static boolean isValidMimeTypeMbox(byte[] buffer) { @@ -79,24 +96,50 @@ public class MboxParser { * @return a list of the email messages in the mbox file. */ List parse(File mboxFile) { - //JWTODO: detect charset - CharsetEncoder encoder = StandardCharsets.ISO_8859_1.newEncoder(); - List emails = new ArrayList<>(); - try { - for (CharBufferWrapper message : MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build()) { - try { - Message msg = messageBuilder.parseMessage(message.asInputStream(encoder.charset())); - emails.add(extractEmail(msg)); - } catch (MimeException ex) { - logger.log(Level.WARNING, "Failed to get message from mbox.", ex); - } + // Detect possible charsets + List encoders = getPossibleEncoders(mboxFile); + + CharsetEncoder theEncoder = null; + Iterable mboxIterator = null; + // Loop through the possible encoders and find the first one that works. + // That will usually be one of the first ones. + for (CharsetEncoder encoder : encoders) { + try { + mboxIterator = MboxIterator.fromFile(mboxFile).charset(encoder.charset()).build(); + theEncoder = encoder; + break; + } catch (CharConversionException | UnsupportedCharsetException ex) { + // Not the right encoder + } catch (IllegalArgumentException ex) { + // Not the right encoder + } catch (IOException ex) { + logger.log(Level.WARNING, "couldn't find mbox file.", ex); + //JWTODO: post inbox message + return Collections.EMPTY_LIST; } - } catch (FileNotFoundException ex) { - logger.log(Level.WARNING, "couldn't find mbox file.", ex); - } catch (IOException ex) { - logger.log(Level.WARNING, "Error getting messsages from mbox file."); } + // If no encoders work, post an error message and return. + if (mboxIterator == null || theEncoder == null) { + //JWTODO: post inbox message + return Collections.EMPTY_LIST; + } + + List emails = new ArrayList<>(); + long failCount = 0; + + // Parse each message and extract an EmailMessage structure + for (CharBufferWrapper message : mboxIterator) { + try { + Message msg = messageBuilder.parseMessage(message.asInputStream(theEncoder.charset())); + emails.add(extractEmail(msg)); + } catch (IOException ex) { + logger.log(Level.WARNING, "Failed to get message from mbox: " + ex.getMessage()); + failCount++; + } + } + + //JWTODO: post inbox message w/ fail count return emails; } @@ -133,6 +176,7 @@ public class MboxParser { * Recursively calls handleMultipart if one of the body parts is another * multipart. Otherwise, calls the correct method to extract information out * of each part of the body. + * * @param email * @param multi */ @@ -147,7 +191,7 @@ public class MboxParser { e.getMimeType().equals(ContentTypeField.TYPE_TEXT_PLAIN)) { handleTextBody(email, (TextBody) e.getBody(), e.getMimeType()); } else { - logger.log(Level.INFO, "Found unrecognized entity: " + e); + // Ignore other types. } } } @@ -179,7 +223,8 @@ public class MboxParser { email.setHtmlBody(bodyString.toString()); break; default: - logger.log(Level.INFO, "Found unrecognized mime type: " + type); + // Not interested in other text types. + break; } } catch (IOException ex) { logger.log(Level.WARNING, "Error getting text body of mbox message", ex); @@ -195,21 +240,30 @@ public class MboxParser { private void handleAttachment(EmailMessage email, Entity e) { String outputDirPath = ThunderbirdMboxFileIngestModule.getModuleOutputPath() + File.separator; String filename = e.getFilename(); - String outPath = outputDirPath + filename; + String uniqueFilename = filename + "-" + email.getSentDate(); + String outPath = outputDirPath + uniqueFilename; FileOutputStream fos; BinaryBody bb; try { fos = new FileOutputStream(outPath); } catch (FileNotFoundException ex) { - logger.log(Level.INFO, "", ex); + //JWTODO: post ingest message + logger.log(Level.INFO, "Failed to create file output stream for: " + outPath, ex); return; } try { - bb = (BinaryBody) e.getBody(); - bb.writeTo(fos); + Body b = e.getBody(); + if (b instanceof BinaryBody) { + bb = (BinaryBody) b; + bb.writeTo(fos); + } else { + // This could potentially be other types. Only seen this once. + } + } catch (IOException ex) { - logger.log(Level.INFO, "", ex); + logger.log(Level.INFO, "Failed to write mbox email attachment to disk.", ex); + //JWTODO: post ingest message. return; } finally { try { @@ -222,15 +276,8 @@ public class MboxParser { Attachment attach = new Attachment(); attach.setName(filename); attach.setLocalPath(ThunderbirdMboxFileIngestModule.getRelModuleOutputPath() - + File.separator + filename); - // JWTODO: find appropriate constant or make one. -// ContentDispositionField disposition = (ContentDispositionField) e.getHeader().getField("Content-Disposition"); -// if (disposition != null) { -// attach.setSize(disposition.getSize()); -// attach.setCrTime(disposition.getCreationDate()); -// attach.setmTime(disposition.getModificationDate()); -// attach.setaTime(disposition.getReadDate()); -// } + + File.separator + uniqueFilename); + attach.setSize(new File(outPath).length()); email.addAttachment(attach); } @@ -260,4 +307,52 @@ public class MboxParser { private String getAddresses(AddressList addressList) { return (addressList == null) ? "" : getAddresses(addressList.flatten()); } + + /** + * Get a list of the possible encoders for the given mboxFile using Tika's + * CharsetDetector. At a minimum, returns the standard built in charsets. + * @param mboxFile + * @return + */ + private List getPossibleEncoders(File mboxFile) { + InputStream is; + List possibleEncoders = new ArrayList<>(); + + possibleEncoders.add(StandardCharsets.ISO_8859_1.newEncoder()); + possibleEncoders.add(StandardCharsets.US_ASCII.newEncoder()); + possibleEncoders.add(StandardCharsets.UTF_16.newEncoder()); + possibleEncoders.add(StandardCharsets.UTF_16BE.newEncoder()); + possibleEncoders.add(StandardCharsets.UTF_16LE.newEncoder()); + possibleEncoders.add(StandardCharsets.UTF_8.newEncoder()); + + try { + is = new BufferedInputStream(new FileInputStream(mboxFile)); + } catch (FileNotFoundException ex) { + logger.log(Level.WARNING, "Failed to find mbox file while detecting charset"); + return possibleEncoders; + } + + try { + CharsetDetector detector = new CharsetDetector(); + detector.setText(is); + CharsetMatch[] matches = detector.detectAll(); + for (CharsetMatch match : matches) { + try { + possibleEncoders.add(Charset.forName(match.getName()).newEncoder()); + } catch (UnsupportedCharsetException | IllegalCharsetNameException ex) { + // Don't add unsupported charsets to the list + } + } + return possibleEncoders; + } catch (IOException | IllegalArgumentException ex) { + logger.log(Level.WARNING, "Failed to detect charset of mbox file.", ex); + return possibleEncoders; + } finally { + try { + is.close(); + } catch (IOException ex) { + logger.log(Level.INFO, "Failed to close input stream"); + } + } + } } diff --git a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/PstParser.java b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/PstParser.java index 6876d0c6c9..ce0dd16000 100755 --- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/PstParser.java +++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/PstParser.java @@ -45,14 +45,17 @@ import org.sleuthkit.datamodel.TskCoreException; */ public class PstParser { private static final Logger logger = Logger.getLogger(PstParser.class.getName()); + /** + * First four bytes of a pst file. + */ private static int PST_HEADER = 0x2142444E; - + private IngestServices services; /** * A map of PSTMessages to their Local path within the file's internal * directory structure. */ private List results; - private IngestServices services; + PstParser(IngestServices services) { results = new ArrayList<>(); this.services = services; @@ -70,9 +73,11 @@ public class PstParser { */ ParseResult parse(File file) { PSTFile pstFile; + long failures = 0L; try { pstFile = new PSTFile(file); - processFolder(pstFile.getRootFolder(), "\\", true); + failures = processFolder(pstFile.getRootFolder(), "\\", true); + //JWTODO: post ingest message if failures. return ParseResult.OK; } catch (PSTException | IOException ex) { String msg = file.getName() + ": Failed to create internal java-libpst PST file to parse:\n" + ex.getMessage(); @@ -102,9 +107,9 @@ public class PstParser { * @throws PSTException * @throws IOException */ - private void processFolder(PSTFolder folder, String path, boolean root) { + private long processFolder(PSTFolder folder, String path, boolean root) { String newPath = (root ? path : path + "\\" + folder.getDisplayName()); - + long failCount = 0L; // Number of emails that failed if (folder.hasSubfolders()) { List subFolders; try { @@ -115,7 +120,7 @@ public class PstParser { } for (PSTFolder f : subFolders) { - processFolder(f, newPath, false); + failCount += processFolder(f, newPath, false); } } @@ -127,9 +132,12 @@ public class PstParser { results.add(extractEmailMessage(email, newPath)); } } catch (PSTException | IOException ex) { + failCount++; logger.log(Level.INFO, "java-libpst exception while getting emails from a folder: " + ex.getMessage()); } } + + return failCount; } /** @@ -186,15 +194,15 @@ public class PstParser { if (filename.isEmpty()) { filename = attach.getFilename(); } - filename = msg.getDescriptorNodeId() + "-" + filename; - String outPath = outputDirPath + filename; + String uniqueFilename = msg.getDescriptorNodeId() + "-" + filename; + String outPath = outputDirPath + uniqueFilename; saveAttachmentToDisk(attach, outPath); Attachment attachment = new Attachment(); long crTime = attach.getCreationTime().getTime() / 1000; long mTime = attach.getModificationTime().getTime() / 1000; - String relPath = getRelModuleOutputPath() + File.separator + filename; + String relPath = getRelModuleOutputPath() + File.separator + uniqueFilename; attachment.setName(filename); attachment.setCrTime(crTime); attachment.setmTime(mTime); @@ -202,7 +210,8 @@ public class PstParser { attachment.setSize(attach.getFilesize()); email.addAttachment(attachment); } catch (PSTException | IOException ex) { - logger.log(Level.WARNING, "Failed to extract attachment.", ex); + //JWTODO post ingest message + logger.log(Level.WARNING, "Failed to extract attachment from pst file.", ex); } } } @@ -268,7 +277,7 @@ public class PstParser { ByteBuffer bb = ByteBuffer.wrap(buffer); return bb.getInt() == PST_HEADER; } catch (TskCoreException ex) { - System.out.println("Exception"); + logger.log(Level.WARNING, "Exception while detecting if a file is a pst file."); return false; } } diff --git a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java index f798f93f70..339ce01b31 100644 --- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java +++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestModule.java @@ -215,7 +215,7 @@ public class ThunderbirdMboxFileIngestModule extends IngestModuleAbstractFile { return ProcessResult.OK; } - MboxParser parser = new MboxParser(emailFolder); + MboxParser parser = new MboxParser(services, emailFolder); List emails = parser.parse(file); processEmails(emails, abstractFile, ingestContext);