Rename of mboxemail and additions of thunderbirdparser class.

This commit is contained in:
Alex Ebadirad 2012-06-18 14:55:23 -07:00
parent 050e48bd5f
commit 88b61d9981
33 changed files with 1158 additions and 1782 deletions

View File

@ -1,8 +1,8 @@
#Updated by build script
#Thu, 07 Jun 2012 13:38:12 -0700
#Mon, 18 Jun 2012 14:35:03 -0700
OpenIDE-Module-Name=CoreUtils
app.name=Autopsy
app.version=20120607
app.version=20120618
build.type=DEVELOPMENT

View File

@ -1,6 +0,0 @@
Manifest-Version: 1.0
OpenIDE-Module: org.sleuthkit.autopsy.mboxparser
OpenIDE-Module-Layer: org/sleuthkit/autopsy/mboxparser/layer.xml
OpenIDE-Module-Localizing-Bundle: org/sleuthkit/autopsy/mboxparser/Bundle.properties
OpenIDE-Module-Specification-Version: 1.0

View File

@ -1,57 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://www.netbeans.org/ns/project/1">
<type>org.netbeans.modules.apisupport.project</type>
<configuration>
<data xmlns="http://www.netbeans.org/ns/nb-module-project/3">
<code-name-base>org.sleuthkit.autopsy.mboxparser</code-name-base>
<suite-component/>
<module-dependencies>
<dependency>
<code-name-base>org.sleuthkit.autopsy.casemodule</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.coreutils</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>0-1</release-version>
<specification-version>0.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.datamodel</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.ingest</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>0-1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
</module-dependencies>
<public-packages/>
<class-path-extension>
<runtime-relative-path>ext/tika-core-1.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/tika-core-1.1.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/tika-parsers-1.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/tika-parsers-1.1.jar</binary-origin>
</class-path-extension>
</data>
</configuration>
</project>

View File

@ -1 +0,0 @@
OpenIDE-Module-Name=MboxEmailModule

View File

@ -1,206 +0,0 @@
package org.sleuthkit.autopsy.mboxparser;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.mbox.MboxParser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class MboxEmailParser {
private InputStream stream;
//Tika object
private Tika tika;
private Metadata metadata;
private ContentHandler contentHandler;
private String mimeType;
private Parser parser;
private ParseContext context;
private static ArrayList<String> tikaMimeTypes;
static
{
tikaMimeTypes = new ArrayList<String>();
tikaMimeTypes.add(MimeTypes.OCTET_STREAM);
tikaMimeTypes.add(MimeTypes.PLAIN_TEXT);
tikaMimeTypes.add(MimeTypes.XML);
}
public MboxEmailParser()
{
this.tika = new Tika();
}
public MboxEmailParser(InputStream inStream)
{
this.tika = new Tika();
this.stream = inStream;
}
public MboxEmailParser(String filepath)
{
this.tika = new Tika();
this.stream = this.getClass().getResourceAsStream(filepath);
}
private void init() throws IOException
{
this.tika.setMaxStringLength(10*1024*1024);
this.metadata = new Metadata();
//Set MIME Type
this.mimeType = tika.detect(this.stream);
this.parser = new MboxParser();
this.context = new ParseContext();
this.contentHandler = new BodyContentHandler(-1);
//Seems like setting this causes the metadata not to output all of it.
// this.metadata.set(Metadata.CONTENT_TYPE, this.mimeType);
}
public void parse() throws FileNotFoundException, IOException, SAXException, TikaException
{
init();
// this.metadata = new Metadata();
//String mimeType = tika.detect(this.stream);
parser.parse(this.stream,this.contentHandler, this.metadata, context);
}
public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException
{
init();
parser.parse(inStream,this.contentHandler, this.metadata, context);
String blbha = "stop";
}
public Metadata getMetadata()
{
return this.metadata;
}
//Returns message content, i.e. plain text or html
public String getContent()
{
return this.contentHandler.toString();
}
public String detectEmailFileFormat(String filepath) throws IOException
{
return this.tika.detect(filepath);
}
//Detects the mime type from the first few bytes of the document
public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName)
{
return this.tika.detect(firstFewBytes, inDocName);
}
public boolean isValidMimeTypeMbox(byte[] buffer)
{
return (new String(buffer)).startsWith("From ");
}
//This assumes the file/stream was parsed since we are looking at the metadata
public boolean isValidMboxType()
{
return this.metadata.get(Metadata.CONTENT_TYPE).equals("application/mbox");
}
//Get email subject
public String getSubject()
{
return this.metadata.get(Metadata.SUBJECT);
}
public String getTitle()
{
return this.metadata.get(Metadata.TITLE);
}
public Long getDateCreated()
{
Long epochtime;
Long ftime = (long) 0;
try {
String datetime = this.metadata.get(Metadata.DATE);
epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(datetime).getTime();
ftime = epochtime.longValue();
ftime = ftime / 1000;
} catch (ParseException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
return ftime;
}
public String getApplication()
{
String client;
String userAgent = "";
userAgent = this.metadata.get("MboxParser-user-agent");
if(userAgent.matches("(?i).*Thunderbird.*"))
{
client = "Thunderbird";
}
else{
client = "Unknown";
}
return client;
}
public String getContenType()
{
return this.metadata.get(Metadata.CONTENT_TYPE);
}
public String getContenEncoding()
{
return this.metadata.get(Metadata.CONTENT_ENCODING);
}
public String getFrom()
{
return this.metadata.get(Metadata.AUTHOR);
}
public String getTo()
{
return this.metadata.get(Metadata.MESSAGE_TO);
}
public String getCC()
{
return this.metadata.get(Metadata.MESSAGE_CC);
}
public String getBCC()
{
return this.metadata.get(Metadata.MESSAGE_BCC);
}
public String getRecipientAddress()
{
return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS);
}
public String getMboxSupportedMediaType()
{
return MediaType.application("mbox").getType();
}
}

View File

@ -1,195 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.mboxparser;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.exception.TikaException;
import org.sleuthkit.autopsy.ingest.IngestManager;
import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
import org.sleuthkit.autopsy.ingest.IngestMessage;
import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstract.*;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
import org.sleuthkit.autopsy.ingest.ServiceDataEvent;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
import org.sleuthkit.datamodel.ReadContentInputStream;
import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskException;
import org.xml.sax.SAXException;
public class MboxFileIngestService implements IngestServiceAbstractFile {
private static final Logger logger = Logger.getLogger(MboxFileIngestService.class.getName());
private static MboxFileIngestService instance = null;
private IngestManagerProxy managerProxy;
private static int messageId = 0;
private static final String classname = "Mbox Parser";
public static synchronized MboxFileIngestService getDefault() {
if (instance == null) {
instance = new MboxFileIngestService();
}
return instance;
}
@Override
public ProcessResult process(AbstractFile fsContent) {
MboxEmailParser mbox = new MboxEmailParser();
boolean isMbox = false;
try {
byte[] t = new byte[(int) 128];
int byteRead = fsContent.read(t, 0, 128);
isMbox = mbox.isValidMimeTypeMbox(t);
} catch (TskException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
if (isMbox) {
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Processing " + fsContent.getName()));
try {
ReadContentInputStream contentStream = new ReadContentInputStream(fsContent);
mbox.parse(contentStream);
String content = mbox.getContent();
String client = mbox.getApplication();
String from = mbox.getFrom();
String to = mbox.getTo();
Long date = mbox.getDateCreated();
String subject = mbox.getSubject();
String cc = mbox.getCC();
String bcc = mbox.getBCC();
Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), classname, "", to));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), classname, "", cc));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), classname, "", bcc));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), classname, "", from));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), classname, "", content));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), classname, "", content));
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), classname, "",));
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_REPLY_ID.getTypeID(), classname, "",));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), classname, "", date));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), classname, "", date));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), classname, "", subject));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID(), classname, "", client));
BlackboardArtifact bbart;
try {
bbart = fsContent.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG);
bbart.addAttributes(bbattributes);
} catch (TskCoreException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
IngestManager.fireServiceDataEvent(new ServiceDataEvent(classname, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
} catch (FileNotFoundException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (SAXException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (TikaException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
}
return ProcessResult.OK;
}
@Override
public void complete() {
logger.log(Level.INFO, "complete()");
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "COMPLETE"));
//service specific cleanup due completion here
}
@Override
public String getName() {
return "Mbox Parser";
}
@Override
public String getDescription() {
return "This class parses through a file to determine if it is an mbox file and if so, populates an email artifact for it in the blackboard.";
}
@Override
public void init(IngestManagerProxy managerProxy) {
logger.log(Level.INFO, "init()");
this.managerProxy = managerProxy;
//service specific initialization here
}
@Override
public void stop() {
logger.log(Level.INFO, "stop()");
//service specific cleanup due interruption here
}
@Override
public ServiceType getType() {
return ServiceType.AbstractFile;
}
@Override
public boolean hasSimpleConfiguration() {
return false;
}
@Override
public boolean hasAdvancedConfiguration() {
return false;
}
@Override
public javax.swing.JPanel getSimpleConfiguration() {
return null;
}
@Override
public javax.swing.JPanel getAdvancedConfiguration() {
return null;
}
@Override
public boolean hasBackgroundJobsRunning() {
return false;
}
@Override
public void saveAdvancedConfiguration() {
}
@Override
public void saveSimpleConfiguration() {
}
}

View File

@ -1,8 +1,8 @@
build.xml.data.CRC32=d23c11ef
build.xml.data.CRC32=d1b02431
build.xml.script.CRC32=bbb1c310
build.xml.stylesheet.CRC32=a56c6a5b@1.46.2
# This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
# Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
nbproject/build-impl.xml.data.CRC32=d23c11ef
nbproject/build-impl.xml.data.CRC32=d1b02431
nbproject/build-impl.xml.script.CRC32=1562aec2
nbproject/build-impl.xml.stylesheet.CRC32=238281d1@1.46.2

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- You may freely edit this file. See harness/README in the NetBeans platform -->
<!-- for some information on what you could do (e.g. targets to override). -->
<!-- If you delete this file and reopen the project it will be recreated. -->
<project name="org.sleuthkit.autopsy.mboxparser" default="netbeans" basedir=".">
<description>Builds, tests, and runs the project org.sleuthkit.autopsy.mboxparser.</description>
<import file="nbproject/build-impl.xml"/>
</project>

View File

@ -1,6 +0,0 @@
Manifest-Version: 1.0
OpenIDE-Module: org.sleuthkit.autopsy.mboxparser
OpenIDE-Module-Layer: org/sleuthkit/autopsy/mboxparser/layer.xml
OpenIDE-Module-Localizing-Bundle: org/sleuthkit/autopsy/mboxparser/Bundle.properties
OpenIDE-Module-Specification-Version: 1.0

View File

@ -1,45 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
*** GENERATED FROM project.xml - DO NOT EDIT ***
*** EDIT ../build.xml INSTEAD ***
-->
<project name="org.sleuthkit.autopsy.mboxparser-impl" basedir="..">
<fail message="Please build using Ant 1.7.1 or higher.">
<condition>
<not>
<antversion atleast="1.7.1"/>
</not>
</condition>
</fail>
<property file="nbproject/private/suite-private.properties"/>
<property file="nbproject/suite.properties"/>
<fail unless="suite.dir">You must set 'suite.dir' to point to your containing module suite</fail>
<property file="${suite.dir}/nbproject/private/platform-private.properties"/>
<property file="${suite.dir}/nbproject/platform.properties"/>
<macrodef name="property" uri="http://www.netbeans.org/ns/nb-module-project/2">
<attribute name="name"/>
<attribute name="value"/>
<sequential>
<property name="@{name}" value="${@{value}}"/>
</sequential>
</macrodef>
<macrodef name="evalprops" uri="http://www.netbeans.org/ns/nb-module-project/2">
<attribute name="property"/>
<attribute name="value"/>
<sequential>
<property name="@{property}" value="@{value}"/>
</sequential>
</macrodef>
<property file="${user.properties.file}"/>
<nbmproject2:property name="harness.dir" value="nbplatform.${nbplatform.active}.harness.dir" xmlns:nbmproject2="http://www.netbeans.org/ns/nb-module-project/2"/>
<nbmproject2:property name="nbplatform.active.dir" value="nbplatform.${nbplatform.active}.netbeans.dest.dir" xmlns:nbmproject2="http://www.netbeans.org/ns/nb-module-project/2"/>
<nbmproject2:evalprops property="cluster.path.evaluated" value="${cluster.path}" xmlns:nbmproject2="http://www.netbeans.org/ns/nb-module-project/2"/>
<fail message="Path to 'platform' cluster missing in $${cluster.path} property or using corrupt Netbeans Platform (missing harness).">
<condition>
<not>
<contains string="${cluster.path.evaluated}" substring="platform"/>
</not>
</condition>
</fail>
<import file="${harness.dir}/build.xml"/>
</project>

View File

@ -1,100 +0,0 @@
cluster.path=\
${nbplatform.active.dir}/harness:\
${nbplatform.active.dir}/java:\
${nbplatform.active.dir}/platform
disabled.modules=\
org.apache.tools.ant.module,\
org.netbeans.api.debugger.jpda,\
org.netbeans.api.java,\
org.netbeans.libs.cglib,\
org.netbeans.libs.javacapi,\
org.netbeans.libs.javacimpl,\
org.netbeans.libs.jsr223,\
org.netbeans.libs.springframework,\
org.netbeans.modules.ant.browsetask,\
org.netbeans.modules.ant.debugger,\
org.netbeans.modules.ant.freeform,\
org.netbeans.modules.ant.grammar,\
org.netbeans.modules.ant.kit,\
org.netbeans.modules.beans,\
org.netbeans.modules.classfile,\
org.netbeans.modules.dbschema,\
org.netbeans.modules.debugger.jpda,\
org.netbeans.modules.debugger.jpda.ant,\
org.netbeans.modules.debugger.jpda.projects,\
org.netbeans.modules.debugger.jpda.ui,\
org.netbeans.modules.form,\
org.netbeans.modules.form.j2ee,\
org.netbeans.modules.form.kit,\
org.netbeans.modules.hibernate,\
org.netbeans.modules.hibernatelib,\
org.netbeans.modules.hudson.ant,\
org.netbeans.modules.hudson.maven,\
org.netbeans.modules.i18n,\
org.netbeans.modules.i18n.form,\
org.netbeans.modules.j2ee.core.utilities,\
org.netbeans.modules.j2ee.eclipselink,\
org.netbeans.modules.j2ee.eclipselinkmodelgen,\
org.netbeans.modules.j2ee.jpa.refactoring,\
org.netbeans.modules.j2ee.jpa.verification,\
org.netbeans.modules.j2ee.metadata,\
org.netbeans.modules.j2ee.metadata.model.support,\
org.netbeans.modules.j2ee.persistence,\
org.netbeans.modules.j2ee.persistence.kit,\
org.netbeans.modules.j2ee.persistenceapi,\
org.netbeans.modules.j2ee.toplinklib,\
org.netbeans.modules.java.api.common,\
org.netbeans.modules.java.debug,\
org.netbeans.modules.java.editor,\
org.netbeans.modules.java.editor.lib,\
org.netbeans.modules.java.examples,\
org.netbeans.modules.java.freeform,\
org.netbeans.modules.java.guards,\
org.netbeans.modules.java.helpset,\
org.netbeans.modules.java.hints,\
org.netbeans.modules.java.hints.processor,\
org.netbeans.modules.java.j2seplatform,\
org.netbeans.modules.java.j2seproject,\
org.netbeans.modules.java.kit,\
org.netbeans.modules.java.lexer,\
org.netbeans.modules.java.navigation,\
org.netbeans.modules.java.platform,\
org.netbeans.modules.java.preprocessorbridge,\
org.netbeans.modules.java.project,\
org.netbeans.modules.java.source,\
org.netbeans.modules.java.source.ant,\
org.netbeans.modules.java.sourceui,\
org.netbeans.modules.javadoc,\
org.netbeans.modules.javawebstart,\
org.netbeans.modules.jellytools,\
org.netbeans.modules.jellytools.java,\
org.netbeans.modules.junit,\
org.netbeans.modules.maven,\
org.netbeans.modules.maven.coverage,\
org.netbeans.modules.maven.embedder,\
org.netbeans.modules.maven.grammar,\
org.netbeans.modules.maven.graph,\
org.netbeans.modules.maven.hints,\
org.netbeans.modules.maven.indexer,\
org.netbeans.modules.maven.junit,\
org.netbeans.modules.maven.kit,\
org.netbeans.modules.maven.model,\
org.netbeans.modules.maven.osgi,\
org.netbeans.modules.maven.persistence,\
org.netbeans.modules.maven.repository,\
org.netbeans.modules.maven.search,\
org.netbeans.modules.maven.spring,\
org.netbeans.modules.projectimport.eclipse.core,\
org.netbeans.modules.projectimport.eclipse.j2se,\
org.netbeans.modules.refactoring.java,\
org.netbeans.modules.spellchecker.bindings.java,\
org.netbeans.modules.spring.beans,\
org.netbeans.modules.swingapp,\
org.netbeans.modules.websvc.jaxws21,\
org.netbeans.modules.websvc.jaxws21api,\
org.netbeans.modules.websvc.saas.codegen.java,\
org.netbeans.modules.xml.jaxb,\
org.netbeans.modules.xml.tools.java,\
org.openide.compat,\
org.openide.util.enumerations
nbplatform.active=default

View File

@ -1,2 +0,0 @@
javac.source=1.6
javac.compilerargs=-Xlint -Xlint:-serial

View File

@ -1 +0,0 @@
suite.dir=${basedir}/..

View File

@ -1 +0,0 @@
OpenIDE-Module-Name=ThunderbirdMboxEmailModule

View File

@ -1,11 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE filesystem PUBLIC "-//NetBeans//DTD Filesystem 1.2//EN" "http://www.netbeans.org/dtds/filesystem-1_2.dtd">
<filesystem>
<folder name="Services">
<file name="org-sleuthkit-autopsy-mboxparser-MboxFileIngestService.instance">
<attr name="instanceOf" stringvalue="org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile"/>
<attr name="instanceCreate" methodvalue="org.sleuthkit.autopsy.mboxparser.MboxFileIngestService.getDefault"/>
<attr name="position" intvalue="1100"/>
</file>
</folder>
</filesystem>

View File

@ -30,7 +30,7 @@ modules=\
${project.org.sleuthkit.autopsy.recentactivity}:\
${project.org.sleuthkit.autopsy.report}:\
${project.org.sleuthkit.autopsy.testing}:\
${project.org.sleuthkit.autopsy.mboxparser}
${project.org.sleuthkit.autopsy.thunderbirdparser}
project.org.sleuthkit.autopsy.casemodule=Case
project.org.sleuthkit.autopsy.corecomponentinterfaces=CoreComponentInterfaces
project.org.sleuthkit.autopsy.corecomponents=CoreComponents
@ -40,10 +40,10 @@ project.org.sleuthkit.autopsy.filesearch=FileSearch
project.org.sleuthkit.autopsy.hashdatabase=HashDatabase
project.org.sleuthkit.autopsy.ingest=Ingest
project.org.sleuthkit.autopsy.keywordsearch=KeywordSearch
project.org.sleuthkit.autopsy.mboxparser=MboxEmailModule
project.org.sleuthkit.autopsy.menuactions=MenuActions
project.org.sleuthkit.autopsy.datamodel=DataModel
project.org.sleuthkit.autopsy.recentactivity=RecentActivity
project.org.sleuthkit.autopsy.report=Report
project.org.sleuthkit.autopsy.testing=Testing
project.org.sleuthkit.autopsy.thunderbirdparser=thunderbirdparser

View File

@ -2,7 +2,7 @@
<!-- You may freely edit this file. See harness/README in the NetBeans platform -->
<!-- for some information on what you could do (e.g. targets to override). -->
<!-- If you delete this file and reopen the project it will be recreated. -->
<project name="org.sleuthkit.autopsy.mboxparser" default="netbeans" basedir=".">
<description>Builds, tests, and runs the project org.sleuthkit.autopsy.mboxparser.</description>
<project name="org.sleuthkit.autopsy.thunderbirdparser" default="netbeans" basedir=".">
<description>Builds, tests, and runs the project org.sleuthkit.autopsy.thunderbirdparser.</description>
<import file="nbproject/build-impl.xml"/>
</project>

View File

@ -0,0 +1,6 @@
Manifest-Version: 1.0
OpenIDE-Module: org.sleuthkit.autopsy.thunderbirdparser
OpenIDE-Module-Layer: org/sleuthkit/autopsy/thunderbirdparser/layer.xml
OpenIDE-Module-Localizing-Bundle: org/sleuthkit/autopsy/thunderbirdparser/Bundle.properties
OpenIDE-Module-Specification-Version: 1.0

View File

@ -3,7 +3,7 @@
*** GENERATED FROM project.xml - DO NOT EDIT ***
*** EDIT ../build.xml INSTEAD ***
-->
<project name="org.sleuthkit.autopsy.mboxparser-impl" basedir="..">
<project name="org.sleuthkit.autopsy.thunderbirdparser-impl" basedir="..">
<fail message="Please build using Ant 1.7.1 or higher.">
<condition>
<not>

View File

@ -1,57 +1,65 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://www.netbeans.org/ns/project/1">
<type>org.netbeans.modules.apisupport.project</type>
<configuration>
<data xmlns="http://www.netbeans.org/ns/nb-module-project/3">
<code-name-base>org.sleuthkit.autopsy.mboxparser</code-name-base>
<suite-component/>
<module-dependencies>
<dependency>
<code-name-base>org.sleuthkit.autopsy.casemodule</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.coreutils</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>0-1</release-version>
<specification-version>0.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.datamodel</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.ingest</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>0-1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
</module-dependencies>
<public-packages/>
<class-path-extension>
<runtime-relative-path>ext/tika-core-1.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/tika-core-1.1.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/tika-parsers-1.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/tika-parsers-1.1.jar</binary-origin>
</class-path-extension>
</data>
</configuration>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://www.netbeans.org/ns/project/1">
<type>org.netbeans.modules.apisupport.project</type>
<configuration>
<data xmlns="http://www.netbeans.org/ns/nb-module-project/3">
<code-name-base>org.sleuthkit.autopsy.thunderbirdparser</code-name-base>
<suite-component/>
<module-dependencies>
<dependency>
<code-name-base>org.sleuthkit.autopsy.casemodule</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.coreutils</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>0-1</release-version>
<specification-version>0.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.datamodel</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.ingest</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>0-1</release-version>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.sleuthkit.autopsy.recentactivity</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<specification-version>1.0</specification-version>
</run-dependency>
</dependency>
</module-dependencies>
<public-packages/>
<class-path-extension>
<runtime-relative-path>ext/tika-core-1.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/tika-core-1.1.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/tika-parsers-1.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/tika-parsers-1.1.jar</binary-origin>
</class-path-extension>
</data>
</configuration>
</project>

View File

@ -0,0 +1 @@
OpenIDE-Module-Name=ThunderbirdParser

View File

@ -1,183 +1,183 @@
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class ThunderbirdEmailParser {
private InputStream stream;
//Tika object
private Tika tika;
private ThunderbirdMetadata metadata;
private ContentHandler contentHandler;
private String mimeType;
private ThunderbirdMboxParser parser;
private ParseContext context;
private static ArrayList<String> tikaMimeTypes;
static
{
tikaMimeTypes = new ArrayList<String>();
tikaMimeTypes.add(MimeTypes.OCTET_STREAM);
tikaMimeTypes.add(MimeTypes.PLAIN_TEXT);
tikaMimeTypes.add(MimeTypes.XML);
}
public ThunderbirdEmailParser()
{
this.tika = new Tika();
}
public ThunderbirdEmailParser(InputStream inStream)
{
this.tika = new Tika();
this.stream = inStream;
}
public ThunderbirdEmailParser(String filepath)
{
this.tika = new Tika();
this.stream = this.getClass().getResourceAsStream(filepath);
}
private void init() throws IOException
{
this.tika.setMaxStringLength(10*1024*1024);
this.metadata = new ThunderbirdMetadata();
//Set MIME Type
//this.mimeType = tika.detect(this.stream);
this.parser = new ThunderbirdMboxParser();
this.context = new ParseContext();
this.contentHandler = new BodyContentHandler(10*1024*1024);
}
public void parse() throws FileNotFoundException, IOException, SAXException, TikaException
{
init();
parser.parse(this.stream,this.contentHandler, this.metadata, context);
}
public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException
{
init();
parser.parse(inStream,this.contentHandler, this.metadata, context);
}
public ThunderbirdMetadata getMetadata()
{
return this.metadata;
}
//Returns message content, i.e. plain text or html
public String getContent()
{
return this.contentHandler.toString();
}
public String detectEmailFileFormat(String filepath) throws IOException
{
return this.tika.detect(filepath);
}
//Detects the mime type from the first few bytes of the document
public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName)
{
return this.tika.detect(firstFewBytes, inDocName);
}
public boolean isValidMimeTypeMbox(byte[] buffer)
{
return (new String(buffer)).startsWith("From ");
}
//This assumes the file/stream was parsed since we are looking at the metadata
public boolean isValidMboxType()
{
return this.metadata.get(Metadata.CONTENT_TYPE).equals("application/mbox");
}
//Get email subject
public String getSubject()
{
return this.metadata.get(Metadata.SUBJECT);
}
public String getTitle()
{
return this.metadata.get(Metadata.TITLE);
}
public Long getDateCreated()
{
Long epochtime;
Long ftime = 0L;
try {
String datetime = this.metadata.get(Metadata.DATE);
epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(datetime).getTime();
ftime = epochtime.longValue();
ftime = ftime / 1000;
} catch (ParseException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
return ftime;
}
public String getContenType()
{
return this.metadata.get(Metadata.CONTENT_TYPE);
}
public String getContenEncoding()
{
return this.metadata.get(Metadata.CONTENT_ENCODING);
}
public String getFrom()
{
return this.metadata.get(Metadata.MESSAGE_FROM);
}
public String getTo()
{
return this.metadata.get(Metadata.MESSAGE_TO);
}
public String getCC()
{
return this.metadata.get(Metadata.MESSAGE_CC);
}
public String getBCC()
{
return this.metadata.get(Metadata.MESSAGE_BCC);
}
public String getRecipientAddress()
{
return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS);
}
public String getMboxSupportedMediaType()
{
return MediaType.application("mbox").getType();
}
}
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class ThunderbirdEmailParser {
private InputStream stream;
//Tika object
private Tika tika;
private ThunderbirdMetadata metadata;
private ContentHandler contentHandler;
private String mimeType;
private ThunderbirdMboxParser parser;
private ParseContext context;
private static ArrayList<String> tikaMimeTypes;
static
{
tikaMimeTypes = new ArrayList<String>();
tikaMimeTypes.add(MimeTypes.OCTET_STREAM);
tikaMimeTypes.add(MimeTypes.PLAIN_TEXT);
tikaMimeTypes.add(MimeTypes.XML);
}
public ThunderbirdEmailParser()
{
this.tika = new Tika();
}
public ThunderbirdEmailParser(InputStream inStream)
{
this.tika = new Tika();
this.stream = inStream;
}
public ThunderbirdEmailParser(String filepath)
{
this.tika = new Tika();
this.stream = this.getClass().getResourceAsStream(filepath);
}
private void init() throws IOException
{
this.tika.setMaxStringLength(10*1024*1024);
this.metadata = new ThunderbirdMetadata();
//Set MIME Type
//this.mimeType = tika.detect(this.stream);
this.parser = new ThunderbirdMboxParser();
this.context = new ParseContext();
this.contentHandler = new BodyContentHandler(10*1024*1024);
}
public void parse() throws FileNotFoundException, IOException, SAXException, TikaException
{
init();
parser.parse(this.stream,this.contentHandler, this.metadata, context);
}
public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException
{
init();
parser.parse(inStream,this.contentHandler, this.metadata, context);
}
public ThunderbirdMetadata getMetadata()
{
return this.metadata;
}
//Returns message content, i.e. plain text or html
public String getContent()
{
return this.contentHandler.toString();
}
public String detectEmailFileFormat(String filepath) throws IOException
{
return this.tika.detect(filepath);
}
//Detects the mime type from the first few bytes of the document
public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName)
{
return this.tika.detect(firstFewBytes, inDocName);
}
public boolean isValidMimeTypeMbox(byte[] buffer)
{
return (new String(buffer)).startsWith("From ");
}
//This assumes the file/stream was parsed since we are looking at the metadata
public boolean isValidMboxType()
{
return this.metadata.get(Metadata.CONTENT_TYPE).equals("application/mbox");
}
//Get email subject
public String getSubject()
{
return this.metadata.get(Metadata.SUBJECT);
}
public String getTitle()
{
return this.metadata.get(Metadata.TITLE);
}
public Long getDateCreated()
{
Long epochtime;
Long ftime = 0L;
try {
String datetime = this.metadata.get(Metadata.DATE);
epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(datetime).getTime();
ftime = epochtime.longValue();
ftime = ftime / 1000;
} catch (ParseException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
return ftime;
}
public String getContenType()
{
return this.metadata.get(Metadata.CONTENT_TYPE);
}
public String getContenEncoding()
{
return this.metadata.get(Metadata.CONTENT_ENCODING);
}
public String getFrom()
{
return this.metadata.get(Metadata.AUTHOR);
}
public String getTo()
{
return this.metadata.get(Metadata.MESSAGE_TO);
}
public String getCC()
{
return this.metadata.get(Metadata.MESSAGE_CC);
}
public String getBCC()
{
return this.metadata.get(Metadata.MESSAGE_BCC);
}
public String getRecipientAddress()
{
return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS);
}
public String getMboxSupportedMediaType()
{
return MediaType.application("mbox").getType();
}
}

View File

@ -1,193 +1,193 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.exception.TikaException;
import org.sleuthkit.autopsy.ingest.IngestManager;
import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
import org.sleuthkit.autopsy.ingest.IngestMessage;
import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstract.*;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
import org.sleuthkit.autopsy.ingest.ServiceDataEvent;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
import org.sleuthkit.datamodel.ReadContentInputStream;
import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskException;
import org.xml.sax.SAXException;
public class ThunderbirdMboxFileIngestService implements IngestServiceAbstractFile {
private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName());
private static ThunderbirdMboxFileIngestService instance = null;
private IngestManagerProxy managerProxy;
private static int messageId = 0;
private static final String classname = "Mbox Parser";
public static synchronized ThunderbirdMboxFileIngestService getDefault() {
if (instance == null) {
instance = new ThunderbirdMboxFileIngestService();
}
return instance;
}
@Override
public ProcessResult process(AbstractFile fsContent) {
ThunderbirdEmailParser mbox = new ThunderbirdEmailParser();
boolean isMbox = false;
try {
byte[] t = new byte[(int) 128];
int byteRead = fsContent.read(t, 0, 128);
isMbox = mbox.isValidMimeTypeMbox(t);
} catch (TskException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
if (isMbox) {
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Processing " + fsContent.getName()));
try {
ReadContentInputStream contentStream = new ReadContentInputStream(fsContent);
mbox.parse(contentStream);
String content = mbox.getContent();
String from = mbox.getFrom();
String to = mbox.getTo();
Long date = mbox.getDateCreated();
String subject = mbox.getSubject();
String cc = mbox.getCC();
String bcc = mbox.getBCC();
Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), classname, "", to));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), classname, "", cc));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), classname, "", bcc));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), classname, "", from));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), classname, "", content));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), classname, "", content));
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), classname, "",));
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_REPLY_ID.getTypeID(), classname, "",));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), classname, "", date));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), classname, "", date));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), classname, "", subject));
BlackboardArtifact bbart;
try {
bbart = fsContent.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG);
bbart.addAttributes(bbattributes);
} catch (TskCoreException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
IngestManager.fireServiceDataEvent(new ServiceDataEvent(classname, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
} catch (FileNotFoundException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (SAXException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (TikaException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
}
return ProcessResult.OK;
}
@Override
public void complete() {
logger.log(Level.INFO, "complete()");
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "COMPLETE"));
//service specific cleanup due completion here
}
@Override
public String getName() {
return "Mbox Parser";
}
@Override
public String getDescription() {
return "This class parses through a file to determine if it is an mbox file and if so, populates an email artifact for it in the blackboard.";
}
@Override
public void init(IngestManagerProxy managerProxy) {
logger.log(Level.INFO, "init()");
this.managerProxy = managerProxy;
//service specific initialization here
}
@Override
public void stop() {
logger.log(Level.INFO, "stop()");
//service specific cleanup due interruption here
}
@Override
public ServiceType getType() {
return ServiceType.AbstractFile;
}
@Override
public boolean hasSimpleConfiguration() {
return false;
}
@Override
public boolean hasAdvancedConfiguration() {
return false;
}
@Override
public javax.swing.JPanel getSimpleConfiguration() {
return null;
}
@Override
public javax.swing.JPanel getAdvancedConfiguration() {
return null;
}
@Override
public boolean hasBackgroundJobsRunning() {
return false;
}
@Override
public void saveAdvancedConfiguration() {
}
@Override
public void saveSimpleConfiguration() {
}
/*
* Autopsy Forensic Browser
*
* Copyright 2011 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.exception.TikaException;
import org.sleuthkit.autopsy.ingest.IngestManager;
import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
import org.sleuthkit.autopsy.ingest.IngestMessage;
import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstract.*;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
import org.sleuthkit.autopsy.ingest.ServiceDataEvent;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
import org.sleuthkit.datamodel.ReadContentInputStream;
import org.sleuthkit.datamodel.TskCoreException;
import org.sleuthkit.datamodel.TskException;
import org.xml.sax.SAXException;
public class ThunderbirdMboxFileIngestService implements IngestServiceAbstractFile {
private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName());
private static ThunderbirdMboxFileIngestService instance = null;
private IngestManagerProxy managerProxy;
private static int messageId = 0;
private static final String classname = "Thunderbird Parser";
public static synchronized ThunderbirdMboxFileIngestService getDefault() {
if (instance == null) {
instance = new ThunderbirdMboxFileIngestService();
}
return instance;
}
@Override
public ProcessResult process(AbstractFile fsContent) {
ThunderbirdEmailParser mbox = new ThunderbirdEmailParser();
boolean isMbox = false;
try {
byte[] t = new byte[(int) 128];
int byteRead = fsContent.read(t, 0, 128);
isMbox = mbox.isValidMimeTypeMbox(t);
} catch (TskException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
if (isMbox) {
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Processing " + fsContent.getName()));
try {
ReadContentInputStream contentStream = new ReadContentInputStream(fsContent);
mbox.parse(contentStream);
String content = mbox.getContent();
String from = mbox.getFrom();
String to = mbox.getTo();
Long date = mbox.getDateCreated();
String subject = mbox.getSubject();
String cc = mbox.getCC();
String bcc = mbox.getBCC();
Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), classname, "", to));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), classname, "", cc));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), classname, "", bcc));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), classname, "", from));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), classname, "", content));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), classname, "", content));
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), classname, "",));
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_REPLY_ID.getTypeID(), classname, "",));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), classname, "", date));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), classname, "", date));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), classname, "", subject));
BlackboardArtifact bbart;
try {
bbart = fsContent.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG);
bbart.addAttributes(bbattributes);
} catch (TskCoreException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
IngestManager.fireServiceDataEvent(new ServiceDataEvent(classname, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
} catch (FileNotFoundException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (SAXException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (TikaException ex) {
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
}
return ProcessResult.OK;
}
@Override
public void complete() {
logger.log(Level.INFO, "complete()");
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "COMPLETE"));
//service specific cleanup due completion here
}
@Override
public String getName() {
return "Mbox Parser";
}
@Override
public String getDescription() {
return "This class parses through a file to determine if it is an mbox file and if so, populates an email artifact for it in the blackboard.";
}
@Override
public void init(IngestManagerProxy managerProxy) {
logger.log(Level.INFO, "init()");
this.managerProxy = managerProxy;
//service specific initialization here
}
@Override
public void stop() {
logger.log(Level.INFO, "stop()");
//service specific cleanup due interruption here
}
@Override
public ServiceType getType() {
return ServiceType.AbstractFile;
}
@Override
public boolean hasSimpleConfiguration() {
return false;
}
@Override
public boolean hasAdvancedConfiguration() {
return false;
}
@Override
public javax.swing.JPanel getSimpleConfiguration() {
return null;
}
@Override
public javax.swing.JPanel getAdvancedConfiguration() {
return null;
}
@Override
public boolean hasBackgroundJobsRunning() {
return false;
}
@Override
public void saveAdvancedConfiguration() {
}
@Override
public void saveSimpleConfiguration() {
}
}

View File

@ -1,253 +1,253 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
*
* @author arivera
*/
public class ThunderbirdMboxParser {
/** Serial version UID */
private static final long serialVersionUID = -1762689436731160661L;
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("mbox"));
public static final String MBOX_MIME_TYPE = "application/mbox";
public static final String MBOX_RECORD_DIVIDER = "From ";
private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>");
private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-";
private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from";
private ThunderbirdXHTMLContentHandler xhtml = null;
private enum ParseStates {
START, IN_HEADER, IN_CONTENT
}
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
public void parse(
InputStream stream, ContentHandler handler,
ThunderbirdMetadata metadata, ParseContext context)
throws IOException, TikaException, SAXException {
InputStreamReader isr;
try {
// Headers are going to be 7-bit ascii
isr = new InputStreamReader(stream, "US-ASCII");
} catch (UnsupportedEncodingException e) {
throw new TikaException("US-ASCII is not supported!", e);
}
BufferedReader reader = new BufferedReader(isr);
metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
metadata.set(Metadata.CONTENT_ENCODING, "us-ascii");
xhtml = new ThunderbirdXHTMLContentHandler(handler, metadata);
xhtml.startDocument();
ThunderbirdMboxParser.ParseStates parseState = ThunderbirdMboxParser.ParseStates.START;
String multiLine = null;
boolean inQuote = false;
int numEmails = 0;
// We're going to scan, line-by-line, for a line that starts with
// "From "
for (String curLine = reader.readLine(); curLine != null; curLine = reader.readLine())
{
boolean newMessage = curLine.startsWith(MBOX_RECORD_DIVIDER);
if (newMessage) {
numEmails += 1;
}
switch (parseState) {
case START:
if (newMessage) {
parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER;
newMessage = false;
// Fall through to IN_HEADER
} else {
break;
}
case IN_HEADER:
if (newMessage) {
saveHeaderInMetadata(numEmails, metadata, multiLine);
//saveHeaderInMetadata(numEmails, metadata, curLine);
multiLine = curLine;
}
//I think this is never going to be true
else if (curLine.length() == 0)
{
// Blank line is signal that we're transitioning to the content.
saveHeaderInMetadata(numEmails, metadata, multiLine);
parseState = ThunderbirdMboxParser.ParseStates.IN_CONTENT;
// Mimic what PackageParser does between entries.
xhtml.startElement("div", "class", "email-entry");
xhtml.startElement("p");
inQuote = false;
}
else if ((curLine.startsWith(" ") || curLine.startsWith("\t")) )
{
multiLine += " " + curLine.trim();
}
else
{
saveHeaderInMetadata(numEmails, metadata, multiLine);
multiLine = curLine;
}
break;
// TODO - use real email parsing support so we can correctly handle
// things like multipart messages and quoted-printable encoding.
// We'd also want this for charset handling, where content isn't 7-bit
// ascii.
case IN_CONTENT:
if (newMessage) {
endMessage(inQuote);
parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER;
multiLine = curLine;
} else {
boolean quoted = curLine.startsWith(">");
if (inQuote) {
if (!quoted) {
xhtml.endElement("q");
inQuote = false;
}
} else if (quoted) {
xhtml.startElement("q");
inQuote = true;
}
xhtml.characters(curLine);
// For plain text email, each line is a real break position.
xhtml.element("br", "");
}
}
}
if (parseState == ThunderbirdMboxParser.ParseStates.IN_HEADER) {
saveHeaderInMetadata(numEmails, metadata, multiLine);
} else if (parseState == ThunderbirdMboxParser.ParseStates.IN_CONTENT) {
endMessage(inQuote);
}
xhtml.endDocument();
}
private void endMessage(boolean inQuote) throws SAXException {
if (inQuote) {
xhtml.endElement("q");
}
xhtml.endElement("p");
xhtml.endElement("div");
}
private void saveHeaderInMetadata(int numEmails, ThunderbirdMetadata metadata, String curLine)
{
//if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER) && (numEmails >= 1)) n
//At this point, the current line we are feeding should never be null!!!
if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER))
{
metadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
return;
}
else if ((curLine == null)) {
return;
}
Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
if (!headerMatcher.matches()) {
return; // ignore malformed header lines
}
String headerTag = headerMatcher.group(1).toLowerCase();
String headerContent = headerMatcher.group(2);
if (headerTag.equalsIgnoreCase("From")) {
metadata.add(ThunderbirdMetadata.AUTHOR, headerContent);
metadata.add(ThunderbirdMetadata.CREATOR, headerContent);
} else if (headerTag.equalsIgnoreCase("To") ||
headerTag.equalsIgnoreCase("Cc") ||
headerTag.equalsIgnoreCase("Bcc")) {
Matcher address = EMAIL_ADDRESS_PATTERN.matcher(headerContent);
if(address.find()) {
metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, address.group(1));
} else if(headerContent.indexOf('@') > -1) {
metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, headerContent);
}
String property = ThunderbirdMetadata.MESSAGE_TO;
if (headerTag.equalsIgnoreCase("Cc")) {
property = ThunderbirdMetadata.MESSAGE_CC;
} else if (headerTag.equalsIgnoreCase("Bcc")) {
property = ThunderbirdMetadata.MESSAGE_BCC;
}
metadata.add(property, headerContent);
} else if (headerTag.equalsIgnoreCase("Subject")) {
metadata.add(ThunderbirdMetadata.SUBJECT, headerContent);
metadata.add(ThunderbirdMetadata.TITLE, headerContent);
} else if (headerTag.equalsIgnoreCase("Date")) {
try {
Date date = parseDate(headerContent);
metadata.set(ThunderbirdMetadata.DATE, date);
metadata.set(ThunderbirdMetadata.CREATION_DATE, date);
} catch (ParseException e) {
// ignoring date because format was not understood
}
} else if (headerTag.equalsIgnoreCase("Message-Id")) {
metadata.add(ThunderbirdMetadata.IDENTIFIER, headerContent);
} else if (headerTag.equalsIgnoreCase("In-Reply-To")) {
metadata.add(ThunderbirdMetadata.RELATION, headerContent);
} else if (headerTag.equalsIgnoreCase("Content-Type")) {
// TODO - key off content-type in headers to
// set mapping to use for content and convert if necessary.
metadata.add(ThunderbirdMetadata.CONTENT_TYPE, headerContent);
metadata.add(ThunderbirdMetadata.FORMAT, headerContent);
} else {
metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent);
}
}
public static Date parseDate(String headerContent) throws ParseException {
SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
return dateFormat.parse(headerContent);
}
}
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
*
* @author arivera
*/
public class ThunderbirdMboxParser {
/** Serial version UID */
private static final long serialVersionUID = -1762689436731160661L;
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("mbox"));
public static final String MBOX_MIME_TYPE = "application/mbox";
public static final String MBOX_RECORD_DIVIDER = "From ";
private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>");
private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-";
private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from";
private ThunderbirdXHTMLContentHandler xhtml = null;
private enum ParseStates {
START, IN_HEADER, IN_CONTENT
}
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
public void parse(
InputStream stream, ContentHandler handler,
ThunderbirdMetadata metadata, ParseContext context)
throws IOException, TikaException, SAXException {
InputStreamReader isr;
try {
// Headers are going to be 7-bit ascii
isr = new InputStreamReader(stream, "US-ASCII");
} catch (UnsupportedEncodingException e) {
throw new TikaException("US-ASCII is not supported!", e);
}
BufferedReader reader = new BufferedReader(isr);
metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
metadata.set(Metadata.CONTENT_ENCODING, "us-ascii");
xhtml = new ThunderbirdXHTMLContentHandler(handler, metadata);
xhtml.startDocument();
ThunderbirdMboxParser.ParseStates parseState = ThunderbirdMboxParser.ParseStates.START;
String multiLine = null;
boolean inQuote = false;
int numEmails = 0;
// We're going to scan, line-by-line, for a line that starts with
// "From "
for (String curLine = reader.readLine(); curLine != null; curLine = reader.readLine())
{
boolean newMessage = curLine.startsWith(MBOX_RECORD_DIVIDER);
if (newMessage) {
numEmails += 1;
}
switch (parseState) {
case START:
if (newMessage) {
parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER;
newMessage = false;
// Fall through to IN_HEADER
} else {
break;
}
case IN_HEADER:
if (newMessage) {
saveHeaderInMetadata(numEmails, metadata, multiLine);
//saveHeaderInMetadata(numEmails, metadata, curLine);
multiLine = curLine;
}
//I think this is never going to be true
else if (curLine.length() == 0)
{
// Blank line is signal that we're transitioning to the content.
saveHeaderInMetadata(numEmails, metadata, multiLine);
parseState = ThunderbirdMboxParser.ParseStates.IN_CONTENT;
// Mimic what PackageParser does between entries.
xhtml.startElement("div", "class", "email-entry");
xhtml.startElement("p");
inQuote = false;
}
else if ((curLine.startsWith(" ") || curLine.startsWith("\t")) )
{
multiLine += " " + curLine.trim();
}
else
{
saveHeaderInMetadata(numEmails, metadata, multiLine);
multiLine = curLine;
}
break;
// TODO - use real email parsing support so we can correctly handle
// things like multipart messages and quoted-printable encoding.
// We'd also want this for charset handling, where content isn't 7-bit
// ascii.
case IN_CONTENT:
if (newMessage) {
endMessage(inQuote);
parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER;
multiLine = curLine;
} else {
boolean quoted = curLine.startsWith(">");
if (inQuote) {
if (!quoted) {
xhtml.endElement("q");
inQuote = false;
}
} else if (quoted) {
xhtml.startElement("q");
inQuote = true;
}
xhtml.characters(curLine);
// For plain text email, each line is a real break position.
xhtml.element("br", "");
}
}
}
if (parseState == ThunderbirdMboxParser.ParseStates.IN_HEADER) {
saveHeaderInMetadata(numEmails, metadata, multiLine);
} else if (parseState == ThunderbirdMboxParser.ParseStates.IN_CONTENT) {
endMessage(inQuote);
}
xhtml.endDocument();
}
private void endMessage(boolean inQuote) throws SAXException {
if (inQuote) {
xhtml.endElement("q");
}
xhtml.endElement("p");
xhtml.endElement("div");
}
private void saveHeaderInMetadata(int numEmails, ThunderbirdMetadata metadata, String curLine)
{
//if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER) && (numEmails >= 1)) n
//At this point, the current line we are feeding should never be null!!!
if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER))
{
metadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
return;
}
else if ((curLine == null)) {
return;
}
Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
if (!headerMatcher.matches()) {
return; // ignore malformed header lines
}
String headerTag = headerMatcher.group(1).toLowerCase();
String headerContent = headerMatcher.group(2);
if (headerTag.equalsIgnoreCase("From")) {
metadata.add(ThunderbirdMetadata.AUTHOR, headerContent);
metadata.add(ThunderbirdMetadata.CREATOR, headerContent);
} else if (headerTag.equalsIgnoreCase("To") ||
headerTag.equalsIgnoreCase("Cc") ||
headerTag.equalsIgnoreCase("Bcc")) {
Matcher address = EMAIL_ADDRESS_PATTERN.matcher(headerContent);
if(address.find()) {
metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, address.group(1));
} else if(headerContent.indexOf('@') > -1) {
metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, headerContent);
}
String property = ThunderbirdMetadata.MESSAGE_TO;
if (headerTag.equalsIgnoreCase("Cc")) {
property = ThunderbirdMetadata.MESSAGE_CC;
} else if (headerTag.equalsIgnoreCase("Bcc")) {
property = ThunderbirdMetadata.MESSAGE_BCC;
}
metadata.add(property, headerContent);
} else if (headerTag.equalsIgnoreCase("Subject")) {
metadata.add(ThunderbirdMetadata.SUBJECT, headerContent);
metadata.add(ThunderbirdMetadata.TITLE, headerContent);
} else if (headerTag.equalsIgnoreCase("Date")) {
try {
Date date = parseDate(headerContent);
metadata.set(ThunderbirdMetadata.DATE, date);
metadata.set(ThunderbirdMetadata.CREATION_DATE, date);
} catch (ParseException e) {
// ignoring date because format was not understood
}
} else if (headerTag.equalsIgnoreCase("Message-Id")) {
metadata.add(ThunderbirdMetadata.IDENTIFIER, headerContent);
} else if (headerTag.equalsIgnoreCase("In-Reply-To")) {
metadata.add(ThunderbirdMetadata.RELATION, headerContent);
} else if (headerTag.equalsIgnoreCase("Content-Type")) {
// TODO - key off content-type in headers to
// set mapping to use for content and convert if necessary.
metadata.add(ThunderbirdMetadata.CONTENT_TYPE, headerContent);
metadata.add(ThunderbirdMetadata.FORMAT, headerContent);
} else {
metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent);
}
}
public static Date parseDate(String headerContent) throws ParseException {
SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
return dateFormat.parse(headerContent);
}
}

View File

@ -1,447 +1,447 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.Serializable;
import java.text.DateFormat;
import java.text.DateFormatSymbols;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import org.apache.tika.metadata.*;
/**
*
* @author arivera
*/
public class ThunderbirdMetadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders,
IPTC, Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
Serializable {
private int strArrCount = 0;
/** Serial version UID */
private static final long serialVersionUID = 5623926545693153182L;
/**
* A map of all metadata attributes.
*/
private Map<String, ArrayList<String>> metadata = null;
/**
* The UTC time zone. Not sure if {@link TimeZone#getTimeZone(String)}
* understands "UTC" in all environments, but it'll fall back to GMT
* in such cases, which is in practice equivalent to UTC.
*/
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
/**
* Custom time zone used to interpret date values without a time
* component in a way that most likely falls within the same day
* regardless of in which time zone it is later interpreted. For
* example, the "2012-02-17" date would map to "2012-02-17T12:00:00Z"
* (instead of the default "2012-02-17T00:00:00Z"), which would still
* map to "2012-02-17" if interpreted in say Pacific time (while the
* default mapping would result in "2012-02-16" for UTC-8).
*/
private static final TimeZone MIDDAY = TimeZone.getTimeZone("GMT-12:00");
/**
* Some parsers will have the date as a ISO-8601 string
* already, and will set that into the Metadata object.
* So we can return Date objects for these, this is the
* list (in preference order) of the various ISO-8601
* variants that we try when processing a date based
* property.
*/
private static final DateFormat[] iso8601InputFormats = new DateFormat[] {
// yyyy-mm-ddThh...
createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", UTC), // UTC/Zulu
createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null), // Without timezone
// yyyy-mm-dd hh...
createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", UTC), // UTC/Zulu
createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null), // Without timezone
// Date without time, set to Midday UTC
createDateFormat("yyyy-MM-dd", MIDDAY), // Normal date format
createDateFormat("yyyy:MM:dd", MIDDAY), // Image (IPTC/EXIF) format
};
private static DateFormat createDateFormat(String format, TimeZone timezone) {
SimpleDateFormat sdf =
new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
if (timezone != null) {
sdf.setTimeZone(timezone);
}
return sdf;
}
/**
* Parses the given date string. This method is synchronized to prevent
* concurrent access to the thread-unsafe date formats.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
* @param date date string
* @return parsed date, or <code>null</code> if the date can't be parsed
*/
private static synchronized Date parseDate(String date) {
// Java doesn't like timezones in the form ss+hh:mm
// It only likes the hhmm form, without the colon
int n = date.length();
if (date.charAt(n - 3) == ':'
&& (date.charAt(n - 6) == '+' || date.charAt(n - 6) == '-')) {
date = date.substring(0, n - 3) + date.substring(n - 2);
}
// Try several different ISO-8601 variants
for (DateFormat format : iso8601InputFormats) {
try {
return format.parse(date);
} catch (ParseException ignore) {
}
}
return null;
}
/**
* Returns a ISO 8601 representation of the given date. This method is
* synchronized to prevent concurrent access to the thread-unsafe date
* formats.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
* @param date given date
* @return ISO 8601 date string
*/
private static String formatDate(Date date) {
Calendar calendar = GregorianCalendar.getInstance(UTC, Locale.US);
calendar.setTime(date);
return String.format(
"%04d-%02d-%02dT%02d:%02d:%02dZ",
calendar.get(Calendar.YEAR),
calendar.get(Calendar.MONTH) + 1,
calendar.get(Calendar.DAY_OF_MONTH),
calendar.get(Calendar.HOUR_OF_DAY),
calendar.get(Calendar.MINUTE),
calendar.get(Calendar.SECOND));
}
/**
* Constructs a new, empty metadata.
*/
public ThunderbirdMetadata() {
metadata = new HashMap<String, ArrayList<String>>();
}
/**
* Returns true if named value is multivalued.
*
* @param name
* name of metadata
* @return true is named value is multivalued, false if single value or null
*/
public boolean isMultiValued(final String name) {
return metadata.get(name) != null && metadata.get(name).size() > 1;
}
/**
* Returns an array of the names contained in the metadata.
*
* @return Metadata names
*/
public ArrayList<String> names() {
return new ArrayList<String>(metadata.keySet());//.toArray(new String[metadata.keySet().size()]);
}
/**
* Get the value associated to a metadata name. If many values are assiociated
* to the specified name, then the first one is returned.
*
* @param name
* of the metadata.
* @return the value associated to the specified metadata name.
*/
public String get(final String name) {
ArrayList<String> values = metadata.get(name);
if (values == null) {
return null;
} else {
return values.get(0);
}
}
/**
* Returns the value (if any) of the identified metadata property.
*
* @since Apache Tika 0.7
* @param property property definition
* @return property value, or <code>null</code> if the property is not set
*/
public String get(Property property) {
return get(property.getName());
}
/**
* Returns the value of the identified Integer based metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @return property value as a Integer, or <code>null</code> if the property is not set, or not a valid Integer
*/
public Integer getInt(Property property) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
return null;
}
if(property.getValueType() != Property.ValueType.INTEGER) {
return null;
}
String v = get(property);
if(v == null) {
return null;
}
try {
return Integer.valueOf(v);
} catch(NumberFormatException e) {
return null;
}
}
/**
* Returns the value of the identified Date based metadata property.
*
* @since Apache Tika 0.8
* @param property simple date property definition
* @return property value as a Date, or <code>null</code> if the property is not set, or not a valid Date
*/
public Date getDate(Property property) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
return null;
}
if(property.getValueType() != Property.ValueType.DATE) {
return null;
}
String v = get(property);
if (v != null) {
return parseDate(v);
} else {
return null;
}
}
/**
* Get the values associated to a metadata name.
*
* @param name
* of the metadata.
* @return the values associated to a metadata name.
*/
public ArrayList<String> getValues(final String name) {
return _getValues(name);
}
private ArrayList<String> _getValues(final String name) {
ArrayList<String> values = metadata.get(name);
if (values == null) {
values = new ArrayList<String>();
}
return values;
}
/**
* Add a metadata name/value mapping. Add the specified value to the list of
* values associated to the specified metadata name.
*
* @param name
* the metadata name.
* @param value
* the metadata value.
*/
public void add(final String name, final String value) {
ArrayList<String> values = metadata.get(name);
if (values == null) {
set(name, value);
} else {
//ArrayList<String> newValues = new ArrayList<String>();//new String[values.size() + 1];
//System.arraycopy(values, 0, newValues, 0, values.size());
//newValues[newValues.length - 1] = value;
values.add(value);
metadata.put(name, values);
}
}
/**
* Copy All key-value pairs from properties.
*
* @param properties
* properties to copy from
*/
@SuppressWarnings("unchecked")
public void setAll(Properties properties) {
ArrayList<String> propArr = new ArrayList<String>();
Enumeration<String> names =
(Enumeration<String>) properties.propertyNames();
while (names.hasMoreElements()) {
String name = names.nextElement();
propArr.add(properties.getProperty(name) );
metadata.put(name, propArr);
}
}
/**
* Set metadata name/value. Associate the specified value to the specified
* metadata name. If some previous values were associated to this name, they
* are removed.
*
* @param name
* the metadata name.
* @param value
* the metadata value.
*/
public void set(String name, String value) {
ArrayList<String> strArr = this.metadata.get(name);
if(strArr != null)
{
metadata.put(name, strArr);
}
else
{
strArr = new ArrayList<String>();
strArr.add(value);
metadata.put(name,strArr);
}
++strArrCount;
}
/**
* Sets the value of the identified metadata property.
*
* @since Apache Tika 0.7
* @param property property definition
* @param value property value
*/
public void set(Property property, String value) {
set(property.getName(), value);
}
/**
* Sets the integer value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @param value property value
*/
public void set(Property property, int value) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
}
if(property.getValueType() != Property.ValueType.INTEGER) {
throw new PropertyTypeException(Property.ValueType.INTEGER, property.getValueType());
}
set(property.getName(), Integer.toString(value));
}
/**
* Sets the real or rational value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple real or simple rational property definition
* @param value property value
*/
public void set(Property property, double value) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
}
if(property.getValueType() != Property.ValueType.REAL &&
property.getValueType() != Property.ValueType.RATIONAL) {
throw new PropertyTypeException(Property.ValueType.REAL, property.getValueType());
}
set(property.getName(), Double.toString(value));
}
/**
* Sets the date value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @param date property value
*/
public void set(Property property, Date date) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
}
if(property.getValueType() != Property.ValueType.DATE) {
throw new PropertyTypeException(Property.ValueType.DATE, property.getValueType());
}
set(property.getName(), formatDate(date));
}
/**
* Remove a metadata and all its associated values.
*
* @param name
* metadata name to remove
*/
public void remove(String name) {
metadata.remove(name);
}
/**
* Returns the number of metadata names in this metadata.
*
* @return number of metadata names
*/
public int size() {
return metadata.size();
}
public boolean equals(Object o) {
if (o == null) {
return false;
}
ThunderbirdMetadata other = null;
try {
other = (ThunderbirdMetadata) o;
} catch (ClassCastException cce) {
return false;
}
if (other.size() != size()) {
return false;
}
ArrayList<String> names = names();
for (String str : names)
{//int i = 0; i < names.length; i++) {
ArrayList<String> otherValues = other._getValues(str);
ArrayList<String> thisValues = _getValues(str);
if (otherValues.size() != thisValues.size()) {
return false;
}
for (int j = 0; j < otherValues.size(); j++) {
if (!otherValues.get(j).equals(thisValues.get(j))) {
return false;
}
}
}
return true;
}
public String toString() {
StringBuffer buf = new StringBuffer();
ArrayList<String> names = names();
for (int i = 0; i < names.size(); i++) {
ArrayList<String> values = _getValues(names.get(i));
for (int j = 0; j < values.size(); j++) {
buf.append(names.get(i)).append("=").append(values.get(j)).append(" ");
}
}
return buf.toString();
}
}
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.sleuthkit.autopsy.thunderbirdparser;
import java.io.Serializable;
import java.text.DateFormat;
import java.text.DateFormatSymbols;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import org.apache.tika.metadata.*;
/**
*
* @author arivera
*/
public class ThunderbirdMetadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders,
IPTC, Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
Serializable {
private int strArrCount = 0;
/** Serial version UID */
private static final long serialVersionUID = 5623926545693153182L;
/**
* A map of all metadata attributes.
*/
private Map<String, ArrayList<String>> metadata = null;
/**
* The UTC time zone. Not sure if {@link TimeZone#getTimeZone(String)}
* understands "UTC" in all environments, but it'll fall back to GMT
* in such cases, which is in practice equivalent to UTC.
*/
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
/**
* Custom time zone used to interpret date values without a time
* component in a way that most likely falls within the same day
* regardless of in which time zone it is later interpreted. For
* example, the "2012-02-17" date would map to "2012-02-17T12:00:00Z"
* (instead of the default "2012-02-17T00:00:00Z"), which would still
* map to "2012-02-17" if interpreted in say Pacific time (while the
* default mapping would result in "2012-02-16" for UTC-8).
*/
private static final TimeZone MIDDAY = TimeZone.getTimeZone("GMT-12:00");
/**
* Some parsers will have the date as a ISO-8601 string
* already, and will set that into the Metadata object.
* So we can return Date objects for these, this is the
* list (in preference order) of the various ISO-8601
* variants that we try when processing a date based
* property.
*/
private static final DateFormat[] iso8601InputFormats = new DateFormat[] {
// yyyy-mm-ddThh...
createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", UTC), // UTC/Zulu
createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null), // Without timezone
// yyyy-mm-dd hh...
createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", UTC), // UTC/Zulu
createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null), // With timezone
createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null), // Without timezone
// Date without time, set to Midday UTC
createDateFormat("yyyy-MM-dd", MIDDAY), // Normal date format
createDateFormat("yyyy:MM:dd", MIDDAY), // Image (IPTC/EXIF) format
};
private static DateFormat createDateFormat(String format, TimeZone timezone) {
SimpleDateFormat sdf =
new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
if (timezone != null) {
sdf.setTimeZone(timezone);
}
return sdf;
}
/**
* Parses the given date string. This method is synchronized to prevent
* concurrent access to the thread-unsafe date formats.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
* @param date date string
* @return parsed date, or <code>null</code> if the date can't be parsed
*/
private static synchronized Date parseDate(String date) {
// Java doesn't like timezones in the form ss+hh:mm
// It only likes the hhmm form, without the colon
int n = date.length();
if (date.charAt(n - 3) == ':'
&& (date.charAt(n - 6) == '+' || date.charAt(n - 6) == '-')) {
date = date.substring(0, n - 3) + date.substring(n - 2);
}
// Try several different ISO-8601 variants
for (DateFormat format : iso8601InputFormats) {
try {
return format.parse(date);
} catch (ParseException ignore) {
}
}
return null;
}
/**
* Returns a ISO 8601 representation of the given date. This method is
* synchronized to prevent concurrent access to the thread-unsafe date
* formats.
*
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
* @param date given date
* @return ISO 8601 date string
*/
private static String formatDate(Date date) {
Calendar calendar = GregorianCalendar.getInstance(UTC, Locale.US);
calendar.setTime(date);
return String.format(
"%04d-%02d-%02dT%02d:%02d:%02dZ",
calendar.get(Calendar.YEAR),
calendar.get(Calendar.MONTH) + 1,
calendar.get(Calendar.DAY_OF_MONTH),
calendar.get(Calendar.HOUR_OF_DAY),
calendar.get(Calendar.MINUTE),
calendar.get(Calendar.SECOND));
}
/**
* Constructs a new, empty metadata.
*/
public ThunderbirdMetadata() {
metadata = new HashMap<String, ArrayList<String>>();
}
/**
* Returns true if named value is multivalued.
*
* @param name
* name of metadata
* @return true is named value is multivalued, false if single value or null
*/
public boolean isMultiValued(final String name) {
return metadata.get(name) != null && metadata.get(name).size() > 1;
}
/**
* Returns an array of the names contained in the metadata.
*
* @return Metadata names
*/
public ArrayList<String> names() {
return new ArrayList<String>(metadata.keySet());//.toArray(new String[metadata.keySet().size()]);
}
/**
* Get the value associated to a metadata name. If many values are assiociated
* to the specified name, then the first one is returned.
*
* @param name
* of the metadata.
* @return the value associated to the specified metadata name.
*/
public String get(final String name) {
ArrayList<String> values = metadata.get(name);
if (values == null) {
return null;
} else {
return values.get(0);
}
}
/**
* Returns the value (if any) of the identified metadata property.
*
* @since Apache Tika 0.7
* @param property property definition
* @return property value, or <code>null</code> if the property is not set
*/
public String get(Property property) {
return get(property.getName());
}
/**
* Returns the value of the identified Integer based metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @return property value as a Integer, or <code>null</code> if the property is not set, or not a valid Integer
*/
public Integer getInt(Property property) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
return null;
}
if(property.getValueType() != Property.ValueType.INTEGER) {
return null;
}
String v = get(property);
if(v == null) {
return null;
}
try {
return Integer.valueOf(v);
} catch(NumberFormatException e) {
return null;
}
}
/**
* Returns the value of the identified Date based metadata property.
*
* @since Apache Tika 0.8
* @param property simple date property definition
* @return property value as a Date, or <code>null</code> if the property is not set, or not a valid Date
*/
public Date getDate(Property property) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
return null;
}
if(property.getValueType() != Property.ValueType.DATE) {
return null;
}
String v = get(property);
if (v != null) {
return parseDate(v);
} else {
return null;
}
}
/**
* Get the values associated to a metadata name.
*
* @param name
* of the metadata.
* @return the values associated to a metadata name.
*/
public ArrayList<String> getValues(final String name) {
return _getValues(name);
}
private ArrayList<String> _getValues(final String name) {
ArrayList<String> values = metadata.get(name);
if (values == null) {
values = new ArrayList<String>();
}
return values;
}
/**
* Add a metadata name/value mapping. Add the specified value to the list of
* values associated to the specified metadata name.
*
* @param name
* the metadata name.
* @param value
* the metadata value.
*/
public void add(final String name, final String value) {
ArrayList<String> values = metadata.get(name);
if (values == null) {
set(name, value);
} else {
//ArrayList<String> newValues = new ArrayList<String>();//new String[values.size() + 1];
//System.arraycopy(values, 0, newValues, 0, values.size());
//newValues[newValues.length - 1] = value;
values.add(value);
metadata.put(name, values);
}
}
/**
* Copy All key-value pairs from properties.
*
* @param properties
* properties to copy from
*/
@SuppressWarnings("unchecked")
public void setAll(Properties properties) {
ArrayList<String> propArr = new ArrayList<String>();
Enumeration<String> names =
(Enumeration<String>) properties.propertyNames();
while (names.hasMoreElements()) {
String name = names.nextElement();
propArr.add(properties.getProperty(name) );
metadata.put(name, propArr);
}
}
/**
* Set metadata name/value. Associate the specified value to the specified
* metadata name. If some previous values were associated to this name, they
* are removed.
*
* @param name
* the metadata name.
* @param value
* the metadata value.
*/
public void set(String name, String value) {
ArrayList<String> strArr = this.metadata.get(name);
if(strArr != null)
{
metadata.put(name, strArr);
}
else
{
strArr = new ArrayList<String>();
strArr.add(value);
metadata.put(name,strArr);
}
++strArrCount;
}
/**
* Sets the value of the identified metadata property.
*
* @since Apache Tika 0.7
* @param property property definition
* @param value property value
*/
public void set(Property property, String value) {
set(property.getName(), value);
}
/**
* Sets the integer value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @param value property value
*/
public void set(Property property, int value) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
}
if(property.getValueType() != Property.ValueType.INTEGER) {
throw new PropertyTypeException(Property.ValueType.INTEGER, property.getValueType());
}
set(property.getName(), Integer.toString(value));
}
/**
* Sets the real or rational value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple real or simple rational property definition
* @param value property value
*/
public void set(Property property, double value) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
}
if(property.getValueType() != Property.ValueType.REAL &&
property.getValueType() != Property.ValueType.RATIONAL) {
throw new PropertyTypeException(Property.ValueType.REAL, property.getValueType());
}
set(property.getName(), Double.toString(value));
}
/**
* Sets the date value of the identified metadata property.
*
* @since Apache Tika 0.8
* @param property simple integer property definition
* @param date property value
*/
public void set(Property property, Date date) {
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
}
if(property.getValueType() != Property.ValueType.DATE) {
throw new PropertyTypeException(Property.ValueType.DATE, property.getValueType());
}
set(property.getName(), formatDate(date));
}
/**
* Remove a metadata and all its associated values.
*
* @param name
* metadata name to remove
*/
public void remove(String name) {
metadata.remove(name);
}
/**
* Returns the number of metadata names in this metadata.
*
* @return number of metadata names
*/
public int size() {
return metadata.size();
}
public boolean equals(Object o) {
if (o == null) {
return false;
}
ThunderbirdMetadata other = null;
try {
other = (ThunderbirdMetadata) o;
} catch (ClassCastException cce) {
return false;
}
if (other.size() != size()) {
return false;
}
ArrayList<String> names = names();
for (String str : names)
{//int i = 0; i < names.length; i++) {
ArrayList<String> otherValues = other._getValues(str);
ArrayList<String> thisValues = _getValues(str);
if (otherValues.size() != thisValues.size()) {
return false;
}
for (int j = 0; j < otherValues.size(); j++) {
if (!otherValues.get(j).equals(thisValues.get(j))) {
return false;
}
}
}
return true;
}
public String toString() {
StringBuffer buf = new StringBuffer();
ArrayList<String> names = names();
for (int i = 0; i < names.size(); i++) {
ArrayList<String> values = _getValues(names.get(i));
for (int j = 0; j < values.size(); j++) {
buf.append(names.get(i)).append("=").append(values.get(j)).append(" ");
}
}
return buf.toString();
}
}

View File

@ -2,9 +2,9 @@
<!DOCTYPE filesystem PUBLIC "-//NetBeans//DTD Filesystem 1.2//EN" "http://www.netbeans.org/dtds/filesystem-1_2.dtd">
<filesystem>
<folder name="Services">
<file name="org-sleuthkit-autopsy-mboxparser-MboxFileIngestService.instance">
<file name="org-sleuthkit-autopsy-thunderbirdparser-ThunderbirdMboxFilervice.instance">
<attr name="instanceOf" stringvalue="org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile"/>
<attr name="instanceCreate" methodvalue="org.sleuthkit.autopsy.mboxparser.MboxFileIngestService.getDefault"/>
<attr name="instanceCreate" methodvalue="org.sleuthkit.autopsy.thunderbirdparser.ThunderbirdMboxFileIngestService.getDefault"/>
<attr name="position" intvalue="1100"/>
</file>
</folder>