diff --git a/ThunderbirdMboxEmailModule/build.xml b/ThunderbirdMboxEmailModule/build.xml new file mode 100644 index 0000000000..610fee55de --- /dev/null +++ b/ThunderbirdMboxEmailModule/build.xml @@ -0,0 +1,8 @@ + + + + + + Builds, tests, and runs the project org.sleuthkit.autopsy.mboxparser. + + diff --git a/ThunderbirdMboxEmailModule/manifest.mf b/ThunderbirdMboxEmailModule/manifest.mf new file mode 100644 index 0000000000..ce08e338cd --- /dev/null +++ b/ThunderbirdMboxEmailModule/manifest.mf @@ -0,0 +1,6 @@ +Manifest-Version: 1.0 +OpenIDE-Module: org.sleuthkit.autopsy.mboxparser +OpenIDE-Module-Layer: org/sleuthkit/autopsy/mboxparser/layer.xml +OpenIDE-Module-Localizing-Bundle: org/sleuthkit/autopsy/mboxparser/Bundle.properties +OpenIDE-Module-Specification-Version: 1.0 + diff --git a/ThunderbirdMboxEmailModule/nbproject/build-impl.xml b/ThunderbirdMboxEmailModule/nbproject/build-impl.xml new file mode 100644 index 0000000000..445bd74ba3 --- /dev/null +++ b/ThunderbirdMboxEmailModule/nbproject/build-impl.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + You must set 'suite.dir' to point to your containing module suite + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/ThunderbirdMboxEmailModule/nbproject/platform.properties b/ThunderbirdMboxEmailModule/nbproject/platform.properties new file mode 100644 index 0000000000..1d9ac9bcfa --- /dev/null +++ b/ThunderbirdMboxEmailModule/nbproject/platform.properties @@ -0,0 +1,100 @@ +cluster.path=\ + ${nbplatform.active.dir}/harness:\ + ${nbplatform.active.dir}/java:\ + ${nbplatform.active.dir}/platform +disabled.modules=\ + org.apache.tools.ant.module,\ + org.netbeans.api.debugger.jpda,\ + org.netbeans.api.java,\ + org.netbeans.libs.cglib,\ + org.netbeans.libs.javacapi,\ + org.netbeans.libs.javacimpl,\ + org.netbeans.libs.jsr223,\ + org.netbeans.libs.springframework,\ + org.netbeans.modules.ant.browsetask,\ + org.netbeans.modules.ant.debugger,\ + org.netbeans.modules.ant.freeform,\ + org.netbeans.modules.ant.grammar,\ + org.netbeans.modules.ant.kit,\ + org.netbeans.modules.beans,\ + org.netbeans.modules.classfile,\ + org.netbeans.modules.dbschema,\ + org.netbeans.modules.debugger.jpda,\ + org.netbeans.modules.debugger.jpda.ant,\ + org.netbeans.modules.debugger.jpda.projects,\ + org.netbeans.modules.debugger.jpda.ui,\ + org.netbeans.modules.form,\ + org.netbeans.modules.form.j2ee,\ + org.netbeans.modules.form.kit,\ + org.netbeans.modules.hibernate,\ + org.netbeans.modules.hibernatelib,\ + org.netbeans.modules.hudson.ant,\ + org.netbeans.modules.hudson.maven,\ + org.netbeans.modules.i18n,\ + org.netbeans.modules.i18n.form,\ + org.netbeans.modules.j2ee.core.utilities,\ + org.netbeans.modules.j2ee.eclipselink,\ + org.netbeans.modules.j2ee.eclipselinkmodelgen,\ + org.netbeans.modules.j2ee.jpa.refactoring,\ + org.netbeans.modules.j2ee.jpa.verification,\ + org.netbeans.modules.j2ee.metadata,\ + org.netbeans.modules.j2ee.metadata.model.support,\ + org.netbeans.modules.j2ee.persistence,\ + org.netbeans.modules.j2ee.persistence.kit,\ + org.netbeans.modules.j2ee.persistenceapi,\ + org.netbeans.modules.j2ee.toplinklib,\ + org.netbeans.modules.java.api.common,\ + org.netbeans.modules.java.debug,\ + org.netbeans.modules.java.editor,\ + org.netbeans.modules.java.editor.lib,\ + org.netbeans.modules.java.examples,\ + org.netbeans.modules.java.freeform,\ + org.netbeans.modules.java.guards,\ + org.netbeans.modules.java.helpset,\ + org.netbeans.modules.java.hints,\ + org.netbeans.modules.java.hints.processor,\ + org.netbeans.modules.java.j2seplatform,\ + org.netbeans.modules.java.j2seproject,\ + org.netbeans.modules.java.kit,\ + org.netbeans.modules.java.lexer,\ + org.netbeans.modules.java.navigation,\ + org.netbeans.modules.java.platform,\ + org.netbeans.modules.java.preprocessorbridge,\ + org.netbeans.modules.java.project,\ + org.netbeans.modules.java.source,\ + org.netbeans.modules.java.source.ant,\ + org.netbeans.modules.java.sourceui,\ + org.netbeans.modules.javadoc,\ + org.netbeans.modules.javawebstart,\ + org.netbeans.modules.jellytools,\ + org.netbeans.modules.jellytools.java,\ + org.netbeans.modules.junit,\ + org.netbeans.modules.maven,\ + org.netbeans.modules.maven.coverage,\ + org.netbeans.modules.maven.embedder,\ + org.netbeans.modules.maven.grammar,\ + org.netbeans.modules.maven.graph,\ + org.netbeans.modules.maven.hints,\ + org.netbeans.modules.maven.indexer,\ + org.netbeans.modules.maven.junit,\ + org.netbeans.modules.maven.kit,\ + org.netbeans.modules.maven.model,\ + org.netbeans.modules.maven.osgi,\ + org.netbeans.modules.maven.persistence,\ + org.netbeans.modules.maven.repository,\ + org.netbeans.modules.maven.search,\ + org.netbeans.modules.maven.spring,\ + org.netbeans.modules.projectimport.eclipse.core,\ + org.netbeans.modules.projectimport.eclipse.j2se,\ + org.netbeans.modules.refactoring.java,\ + org.netbeans.modules.spellchecker.bindings.java,\ + org.netbeans.modules.spring.beans,\ + org.netbeans.modules.swingapp,\ + org.netbeans.modules.websvc.jaxws21,\ + org.netbeans.modules.websvc.jaxws21api,\ + org.netbeans.modules.websvc.saas.codegen.java,\ + org.netbeans.modules.xml.jaxb,\ + org.netbeans.modules.xml.tools.java,\ + org.openide.compat,\ + org.openide.util.enumerations +nbplatform.active=default diff --git a/ThunderbirdMboxEmailModule/nbproject/project.properties b/ThunderbirdMboxEmailModule/nbproject/project.properties new file mode 100644 index 0000000000..4b781ec33c --- /dev/null +++ b/ThunderbirdMboxEmailModule/nbproject/project.properties @@ -0,0 +1,2 @@ +javac.source=1.6 +javac.compilerargs=-Xlint -Xlint:-serial diff --git a/ThunderbirdMboxEmailModule/nbproject/project.xml b/ThunderbirdMboxEmailModule/nbproject/project.xml new file mode 100644 index 0000000000..2bb3a3a429 --- /dev/null +++ b/ThunderbirdMboxEmailModule/nbproject/project.xml @@ -0,0 +1,57 @@ + + + org.netbeans.modules.apisupport.project + + + org.sleuthkit.autopsy.mboxparser + + + + org.sleuthkit.autopsy.casemodule + + + + 1 + 1.0 + + + + org.sleuthkit.autopsy.coreutils + + + + 0-1 + 0.0 + + + + org.sleuthkit.autopsy.datamodel + + + + 1 + 1.0 + + + + org.sleuthkit.autopsy.ingest + + + + 0-1 + 1.0 + + + + + + ext/tika-core-1.1.jar + release/modules/ext/tika-core-1.1.jar + + + ext/tika-parsers-1.1.jar + release/modules/ext/tika-parsers-1.1.jar + + + + diff --git a/ThunderbirdMboxEmailModule/nbproject/suite.properties b/ThunderbirdMboxEmailModule/nbproject/suite.properties new file mode 100644 index 0000000000..364e160e16 --- /dev/null +++ b/ThunderbirdMboxEmailModule/nbproject/suite.properties @@ -0,0 +1 @@ +suite.dir=${basedir}/.. diff --git a/ThunderbirdMboxEmailModule/release/modules/ext/tika-core-1.1.jar b/ThunderbirdMboxEmailModule/release/modules/ext/tika-core-1.1.jar new file mode 100644 index 0000000000..7ad2be62be Binary files /dev/null and b/ThunderbirdMboxEmailModule/release/modules/ext/tika-core-1.1.jar differ diff --git a/ThunderbirdMboxEmailModule/release/modules/ext/tika-parsers-1.1.jar b/ThunderbirdMboxEmailModule/release/modules/ext/tika-parsers-1.1.jar new file mode 100644 index 0000000000..ad82942ae3 Binary files /dev/null and b/ThunderbirdMboxEmailModule/release/modules/ext/tika-parsers-1.1.jar differ diff --git a/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/Bundle.properties b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/Bundle.properties new file mode 100644 index 0000000000..ce7758f02d --- /dev/null +++ b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/Bundle.properties @@ -0,0 +1 @@ +OpenIDE-Module-Name=ThunderbirdMboxEmailModule diff --git a/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdEmailParser.java b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdEmailParser.java new file mode 100644 index 0000000000..45c7e66029 --- /dev/null +++ b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdEmailParser.java @@ -0,0 +1,183 @@ +package org.sleuthkit.autopsy.thunderbirdparser; + +import java.io.*; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.tika.Tika; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MimeTypes; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.BodyContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +public class ThunderbirdEmailParser { + + + private InputStream stream; + //Tika object + private Tika tika; + private ThunderbirdMetadata metadata; + private ContentHandler contentHandler; + private String mimeType; + private ThunderbirdMboxParser parser; + private ParseContext context; + + private static ArrayList tikaMimeTypes; + + static + { + tikaMimeTypes = new ArrayList(); + tikaMimeTypes.add(MimeTypes.OCTET_STREAM); + tikaMimeTypes.add(MimeTypes.PLAIN_TEXT); + tikaMimeTypes.add(MimeTypes.XML); + } + + public ThunderbirdEmailParser() + { + this.tika = new Tika(); + } + + public ThunderbirdEmailParser(InputStream inStream) + { + this.tika = new Tika(); + this.stream = inStream; + } + + public ThunderbirdEmailParser(String filepath) + { + this.tika = new Tika(); + this.stream = this.getClass().getResourceAsStream(filepath); + } + + private void init() throws IOException + { + this.tika.setMaxStringLength(10*1024*1024); + this.metadata = new ThunderbirdMetadata(); + //Set MIME Type + //this.mimeType = tika.detect(this.stream); + this.parser = new ThunderbirdMboxParser(); + this.context = new ParseContext(); + this.contentHandler = new BodyContentHandler(10*1024*1024); + } + + public void parse() throws FileNotFoundException, IOException, SAXException, TikaException + { + init(); + parser.parse(this.stream,this.contentHandler, this.metadata, context); + } + + public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException + { + init(); + parser.parse(inStream,this.contentHandler, this.metadata, context); + } + + public ThunderbirdMetadata getMetadata() + { + return this.metadata; + } + + + //Returns message content, i.e. plain text or html + public String getContent() + { + return this.contentHandler.toString(); + } + + public String detectEmailFileFormat(String filepath) throws IOException + { + return this.tika.detect(filepath); + } + + //Detects the mime type from the first few bytes of the document + public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName) + { + return this.tika.detect(firstFewBytes, inDocName); + } + + + public boolean isValidMimeTypeMbox(byte[] buffer) + { + return (new String(buffer)).startsWith("From "); + } + + //This assumes the file/stream was parsed since we are looking at the metadata + public boolean isValidMboxType() + { + return this.metadata.get(Metadata.CONTENT_TYPE).equals("application/mbox"); + } + + //Get email subject + public String getSubject() + { + return this.metadata.get(Metadata.SUBJECT); + } + + public String getTitle() + { + return this.metadata.get(Metadata.TITLE); + } + + public Long getDateCreated() + { + Long epochtime; + Long ftime = 0L; + + try { + String datetime = this.metadata.get(Metadata.DATE); + epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(datetime).getTime(); + ftime = epochtime.longValue(); + ftime = ftime / 1000; + } catch (ParseException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); + } + + return ftime; + } + + public String getContenType() + { + return this.metadata.get(Metadata.CONTENT_TYPE); + } + + public String getContenEncoding() + { + return this.metadata.get(Metadata.CONTENT_ENCODING); + } + + public String getFrom() + { + return this.metadata.get(Metadata.MESSAGE_FROM); + } + + public String getTo() + { + return this.metadata.get(Metadata.MESSAGE_TO); + } + + public String getCC() + { + return this.metadata.get(Metadata.MESSAGE_CC); + } + + public String getBCC() + { + return this.metadata.get(Metadata.MESSAGE_BCC); + } + + public String getRecipientAddress() + { + return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS); + } + + public String getMboxSupportedMediaType() + { + return MediaType.application("mbox").getType(); + } +} diff --git a/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestService.java b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestService.java new file mode 100644 index 0000000000..b02d899870 --- /dev/null +++ b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxFileIngestService.java @@ -0,0 +1,193 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2011 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.thunderbirdparser; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.tika.exception.TikaException; +import org.sleuthkit.autopsy.ingest.IngestManager; +import org.sleuthkit.autopsy.ingest.IngestManagerProxy; +import org.sleuthkit.autopsy.ingest.IngestMessage; +import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType; +import org.sleuthkit.autopsy.ingest.IngestServiceAbstract.*; +import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile; +import org.sleuthkit.autopsy.ingest.ServiceDataEvent; +import org.sleuthkit.datamodel.AbstractFile; +import org.sleuthkit.datamodel.BlackboardArtifact; +import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE; +import org.sleuthkit.datamodel.BlackboardAttribute; +import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; +import org.sleuthkit.datamodel.ReadContentInputStream; +import org.sleuthkit.datamodel.TskCoreException; +import org.sleuthkit.datamodel.TskException; +import org.xml.sax.SAXException; + +public class ThunderbirdMboxFileIngestService implements IngestServiceAbstractFile { + + private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()); + private static ThunderbirdMboxFileIngestService instance = null; + private IngestManagerProxy managerProxy; + private static int messageId = 0; + private static final String classname = "Mbox Parser"; + + public static synchronized ThunderbirdMboxFileIngestService getDefault() { + if (instance == null) { + instance = new ThunderbirdMboxFileIngestService(); + } + return instance; + } + + @Override + public ProcessResult process(AbstractFile fsContent) { + ThunderbirdEmailParser mbox = new ThunderbirdEmailParser(); + boolean isMbox = false; + + try { + byte[] t = new byte[(int) 128]; + int byteRead = fsContent.read(t, 0, 128); + isMbox = mbox.isValidMimeTypeMbox(t); + } catch (TskException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); + } + + + if (isMbox) { + managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Processing " + fsContent.getName())); + try { + ReadContentInputStream contentStream = new ReadContentInputStream(fsContent); + mbox.parse(contentStream); + String content = mbox.getContent(); + String from = mbox.getFrom(); + String to = mbox.getTo(); + Long date = mbox.getDateCreated(); + String subject = mbox.getSubject(); + String cc = mbox.getCC(); + String bcc = mbox.getBCC(); + Collection bbattributes = new ArrayList(); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), classname, "", to)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), classname, "", cc)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), classname, "", bcc)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), classname, "", from)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), classname, "", content)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), classname, "", content)); + //bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), classname, "",)); + //bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_REPLY_ID.getTypeID(), classname, "",)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), classname, "", date)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), classname, "", date)); + bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), classname, "", subject)); + BlackboardArtifact bbart; + try { + bbart = fsContent.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG); + bbart.addAttributes(bbattributes); + } catch (TskCoreException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); + } + + + + IngestManager.fireServiceDataEvent(new ServiceDataEvent(classname, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG)); + } catch (FileNotFoundException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); + } catch (IOException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); + } catch (SAXException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); + } catch (TikaException ex) { + Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); + } + } + + return ProcessResult.OK; + } + + @Override + public void complete() { + logger.log(Level.INFO, "complete()"); + managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "COMPLETE")); + + //service specific cleanup due completion here + } + + @Override + public String getName() { + return "Mbox Parser"; + } + + @Override + public String getDescription() { + return "This class parses through a file to determine if it is an mbox file and if so, populates an email artifact for it in the blackboard."; + } + + @Override + public void init(IngestManagerProxy managerProxy) { + logger.log(Level.INFO, "init()"); + this.managerProxy = managerProxy; + + //service specific initialization here + } + + @Override + public void stop() { + logger.log(Level.INFO, "stop()"); + + //service specific cleanup due interruption here + } + + @Override + public ServiceType getType() { + return ServiceType.AbstractFile; + } + + @Override + public boolean hasSimpleConfiguration() { + return false; + } + + @Override + public boolean hasAdvancedConfiguration() { + return false; + } + + @Override + public javax.swing.JPanel getSimpleConfiguration() { + return null; + } + + @Override + public javax.swing.JPanel getAdvancedConfiguration() { + return null; + } + + @Override + public boolean hasBackgroundJobsRunning() { + return false; + } + + @Override + public void saveAdvancedConfiguration() { + } + + @Override + public void saveSimpleConfiguration() { + } +} \ No newline at end of file diff --git a/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxParser.java b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxParser.java new file mode 100644 index 0000000000..1e8ae0db86 --- /dev/null +++ b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMboxParser.java @@ -0,0 +1,253 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.sleuthkit.autopsy.thunderbirdparser; + +import java.io.*; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Collections; +import java.util.Date; +import java.util.Locale; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +/** + * + * @author arivera + */ +public class ThunderbirdMboxParser { + + /** Serial version UID */ + private static final long serialVersionUID = -1762689436731160661L; + + private static final Set SUPPORTED_TYPES = + Collections.singleton(MediaType.application("mbox")); + + public static final String MBOX_MIME_TYPE = "application/mbox"; + public static final String MBOX_RECORD_DIVIDER = "From "; + private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)"); + private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>"); + + private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-"; + private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from"; + + private ThunderbirdXHTMLContentHandler xhtml = null; + + private enum ParseStates { + START, IN_HEADER, IN_CONTENT + } + + public Set getSupportedTypes(ParseContext context) { + return SUPPORTED_TYPES; + } + + public void parse( + InputStream stream, ContentHandler handler, + ThunderbirdMetadata metadata, ParseContext context) + throws IOException, TikaException, SAXException { + + InputStreamReader isr; + try { + // Headers are going to be 7-bit ascii + isr = new InputStreamReader(stream, "US-ASCII"); + } catch (UnsupportedEncodingException e) { + throw new TikaException("US-ASCII is not supported!", e); + } + + BufferedReader reader = new BufferedReader(isr); + + metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE); + metadata.set(Metadata.CONTENT_ENCODING, "us-ascii"); + + xhtml = new ThunderbirdXHTMLContentHandler(handler, metadata); + xhtml.startDocument(); + + ThunderbirdMboxParser.ParseStates parseState = ThunderbirdMboxParser.ParseStates.START; + String multiLine = null; + boolean inQuote = false; + int numEmails = 0; + + + // We're going to scan, line-by-line, for a line that starts with + // "From " + + for (String curLine = reader.readLine(); curLine != null; curLine = reader.readLine()) + { + + boolean newMessage = curLine.startsWith(MBOX_RECORD_DIVIDER); + if (newMessage) { + numEmails += 1; + } + + switch (parseState) { + case START: + if (newMessage) { + parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER; + newMessage = false; + // Fall through to IN_HEADER + } else { + break; + } + + case IN_HEADER: + if (newMessage) { + saveHeaderInMetadata(numEmails, metadata, multiLine); + //saveHeaderInMetadata(numEmails, metadata, curLine); + multiLine = curLine; + } + //I think this is never going to be true + else if (curLine.length() == 0) + { + // Blank line is signal that we're transitioning to the content. + + saveHeaderInMetadata(numEmails, metadata, multiLine); + parseState = ThunderbirdMboxParser.ParseStates.IN_CONTENT; + + // Mimic what PackageParser does between entries. + xhtml.startElement("div", "class", "email-entry"); + xhtml.startElement("p"); + inQuote = false; + } + else if ((curLine.startsWith(" ") || curLine.startsWith("\t")) ) + { + multiLine += " " + curLine.trim(); + } + else + { + saveHeaderInMetadata(numEmails, metadata, multiLine); + multiLine = curLine; + } + + break; + + // TODO - use real email parsing support so we can correctly handle + // things like multipart messages and quoted-printable encoding. + // We'd also want this for charset handling, where content isn't 7-bit + // ascii. + case IN_CONTENT: + if (newMessage) { + endMessage(inQuote); + parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER; + multiLine = curLine; + } else { + boolean quoted = curLine.startsWith(">"); + if (inQuote) { + if (!quoted) { + xhtml.endElement("q"); + inQuote = false; + } + } else if (quoted) { + xhtml.startElement("q"); + inQuote = true; + } + + xhtml.characters(curLine); + + // For plain text email, each line is a real break position. + xhtml.element("br", ""); + } + } + } + + if (parseState == ThunderbirdMboxParser.ParseStates.IN_HEADER) { + saveHeaderInMetadata(numEmails, metadata, multiLine); + } else if (parseState == ThunderbirdMboxParser.ParseStates.IN_CONTENT) { + endMessage(inQuote); + } + + xhtml.endDocument(); + } + + private void endMessage(boolean inQuote) throws SAXException { + if (inQuote) { + xhtml.endElement("q"); + } + + xhtml.endElement("p"); + xhtml.endElement("div"); + } + + private void saveHeaderInMetadata(int numEmails, ThunderbirdMetadata metadata, String curLine) + { + + //if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER) && (numEmails >= 1)) n + //At this point, the current line we are feeding should never be null!!! + if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER)) + { + metadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length())); + return; + } + else if ((curLine == null)) { + return; + } + + Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine); + if (!headerMatcher.matches()) { + return; // ignore malformed header lines + } + + String headerTag = headerMatcher.group(1).toLowerCase(); + String headerContent = headerMatcher.group(2); + + if (headerTag.equalsIgnoreCase("From")) { + metadata.add(ThunderbirdMetadata.AUTHOR, headerContent); + metadata.add(ThunderbirdMetadata.CREATOR, headerContent); + } else if (headerTag.equalsIgnoreCase("To") || + headerTag.equalsIgnoreCase("Cc") || + headerTag.equalsIgnoreCase("Bcc")) { + Matcher address = EMAIL_ADDRESS_PATTERN.matcher(headerContent); + if(address.find()) { + metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, address.group(1)); + } else if(headerContent.indexOf('@') > -1) { + metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, headerContent); + } + + String property = ThunderbirdMetadata.MESSAGE_TO; + if (headerTag.equalsIgnoreCase("Cc")) { + property = ThunderbirdMetadata.MESSAGE_CC; + } else if (headerTag.equalsIgnoreCase("Bcc")) { + property = ThunderbirdMetadata.MESSAGE_BCC; + } + metadata.add(property, headerContent); + } else if (headerTag.equalsIgnoreCase("Subject")) { + metadata.add(ThunderbirdMetadata.SUBJECT, headerContent); + metadata.add(ThunderbirdMetadata.TITLE, headerContent); + } else if (headerTag.equalsIgnoreCase("Date")) { + try { + Date date = parseDate(headerContent); + metadata.set(ThunderbirdMetadata.DATE, date); + metadata.set(ThunderbirdMetadata.CREATION_DATE, date); + } catch (ParseException e) { + // ignoring date because format was not understood + } + } else if (headerTag.equalsIgnoreCase("Message-Id")) { + metadata.add(ThunderbirdMetadata.IDENTIFIER, headerContent); + } else if (headerTag.equalsIgnoreCase("In-Reply-To")) { + metadata.add(ThunderbirdMetadata.RELATION, headerContent); + } else if (headerTag.equalsIgnoreCase("Content-Type")) { + // TODO - key off content-type in headers to + // set mapping to use for content and convert if necessary. + + metadata.add(ThunderbirdMetadata.CONTENT_TYPE, headerContent); + metadata.add(ThunderbirdMetadata.FORMAT, headerContent); + } else { + metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent); + } + } + + public static Date parseDate(String headerContent) throws ParseException { + SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US); + return dateFormat.parse(headerContent); + } + +} diff --git a/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java new file mode 100644 index 0000000000..e0093dac45 --- /dev/null +++ b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java @@ -0,0 +1,447 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.sleuthkit.autopsy.thunderbirdparser; + +import java.io.Serializable; +import java.text.DateFormat; +import java.text.DateFormatSymbols; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.*; +import org.apache.tika.metadata.*; + +/** + * + * @author arivera + */ +public class ThunderbirdMetadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders, + IPTC, Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys, + Serializable { + + private int strArrCount = 0; + + /** Serial version UID */ + private static final long serialVersionUID = 5623926545693153182L; + + /** + * A map of all metadata attributes. + */ + private Map> metadata = null; + + /** + * The UTC time zone. Not sure if {@link TimeZone#getTimeZone(String)} + * understands "UTC" in all environments, but it'll fall back to GMT + * in such cases, which is in practice equivalent to UTC. + */ + private static final TimeZone UTC = TimeZone.getTimeZone("UTC"); + + /** + * Custom time zone used to interpret date values without a time + * component in a way that most likely falls within the same day + * regardless of in which time zone it is later interpreted. For + * example, the "2012-02-17" date would map to "2012-02-17T12:00:00Z" + * (instead of the default "2012-02-17T00:00:00Z"), which would still + * map to "2012-02-17" if interpreted in say Pacific time (while the + * default mapping would result in "2012-02-16" for UTC-8). + */ + private static final TimeZone MIDDAY = TimeZone.getTimeZone("GMT-12:00"); + + /** + * Some parsers will have the date as a ISO-8601 string + * already, and will set that into the Metadata object. + * So we can return Date objects for these, this is the + * list (in preference order) of the various ISO-8601 + * variants that we try when processing a date based + * property. + */ + private static final DateFormat[] iso8601InputFormats = new DateFormat[] { + // yyyy-mm-ddThh... + createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", UTC), // UTC/Zulu + createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null), // With timezone + createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null), // Without timezone + // yyyy-mm-dd hh... + createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", UTC), // UTC/Zulu + createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null), // With timezone + createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null), // Without timezone + // Date without time, set to Midday UTC + createDateFormat("yyyy-MM-dd", MIDDAY), // Normal date format + createDateFormat("yyyy:MM:dd", MIDDAY), // Image (IPTC/EXIF) format + }; + + private static DateFormat createDateFormat(String format, TimeZone timezone) { + SimpleDateFormat sdf = + new SimpleDateFormat(format, new DateFormatSymbols(Locale.US)); + if (timezone != null) { + sdf.setTimeZone(timezone); + } + return sdf; + } + + /** + * Parses the given date string. This method is synchronized to prevent + * concurrent access to the thread-unsafe date formats. + * + * @see TIKA-495 + * @param date date string + * @return parsed date, or null if the date can't be parsed + */ + private static synchronized Date parseDate(String date) { + // Java doesn't like timezones in the form ss+hh:mm + // It only likes the hhmm form, without the colon + int n = date.length(); + if (date.charAt(n - 3) == ':' + && (date.charAt(n - 6) == '+' || date.charAt(n - 6) == '-')) { + date = date.substring(0, n - 3) + date.substring(n - 2); + } + + // Try several different ISO-8601 variants + for (DateFormat format : iso8601InputFormats) { + try { + return format.parse(date); + } catch (ParseException ignore) { + } + } + return null; + } + + /** + * Returns a ISO 8601 representation of the given date. This method is + * synchronized to prevent concurrent access to the thread-unsafe date + * formats. + * + * @see TIKA-495 + * @param date given date + * @return ISO 8601 date string + */ + private static String formatDate(Date date) { + Calendar calendar = GregorianCalendar.getInstance(UTC, Locale.US); + calendar.setTime(date); + return String.format( + "%04d-%02d-%02dT%02d:%02d:%02dZ", + calendar.get(Calendar.YEAR), + calendar.get(Calendar.MONTH) + 1, + calendar.get(Calendar.DAY_OF_MONTH), + calendar.get(Calendar.HOUR_OF_DAY), + calendar.get(Calendar.MINUTE), + calendar.get(Calendar.SECOND)); + } + + /** + * Constructs a new, empty metadata. + */ + public ThunderbirdMetadata() { + metadata = new HashMap>(); + } + + /** + * Returns true if named value is multivalued. + * + * @param name + * name of metadata + * @return true is named value is multivalued, false if single value or null + */ + public boolean isMultiValued(final String name) { + return metadata.get(name) != null && metadata.get(name).size() > 1; + } + + /** + * Returns an array of the names contained in the metadata. + * + * @return Metadata names + */ + public ArrayList names() { + return new ArrayList(metadata.keySet());//.toArray(new String[metadata.keySet().size()]); + } + + /** + * Get the value associated to a metadata name. If many values are assiociated + * to the specified name, then the first one is returned. + * + * @param name + * of the metadata. + * @return the value associated to the specified metadata name. + */ + public String get(final String name) { + ArrayList values = metadata.get(name); + if (values == null) { + return null; + } else { + return values.get(0); + } + } + + /** + * Returns the value (if any) of the identified metadata property. + * + * @since Apache Tika 0.7 + * @param property property definition + * @return property value, or null if the property is not set + */ + public String get(Property property) { + return get(property.getName()); + } + + /** + * Returns the value of the identified Integer based metadata property. + * + * @since Apache Tika 0.8 + * @param property simple integer property definition + * @return property value as a Integer, or null if the property is not set, or not a valid Integer + */ + public Integer getInt(Property property) { + if(property.getPropertyType() != Property.PropertyType.SIMPLE) { + return null; + } + if(property.getValueType() != Property.ValueType.INTEGER) { + return null; + } + + String v = get(property); + if(v == null) { + return null; + } + try { + return Integer.valueOf(v); + } catch(NumberFormatException e) { + return null; + } + } + + /** + * Returns the value of the identified Date based metadata property. + * + * @since Apache Tika 0.8 + * @param property simple date property definition + * @return property value as a Date, or null if the property is not set, or not a valid Date + */ + public Date getDate(Property property) { + if(property.getPropertyType() != Property.PropertyType.SIMPLE) { + return null; + } + if(property.getValueType() != Property.ValueType.DATE) { + return null; + } + + String v = get(property); + if (v != null) { + return parseDate(v); + } else { + return null; + } + } + + /** + * Get the values associated to a metadata name. + * + * @param name + * of the metadata. + * @return the values associated to a metadata name. + */ + public ArrayList getValues(final String name) { + return _getValues(name); + } + + private ArrayList _getValues(final String name) { + ArrayList values = metadata.get(name); + if (values == null) { + values = new ArrayList(); + } + return values; + } + + /** + * Add a metadata name/value mapping. Add the specified value to the list of + * values associated to the specified metadata name. + * + * @param name + * the metadata name. + * @param value + * the metadata value. + */ + public void add(final String name, final String value) { + ArrayList values = metadata.get(name); + if (values == null) { + set(name, value); + } else { + //ArrayList newValues = new ArrayList();//new String[values.size() + 1]; + //System.arraycopy(values, 0, newValues, 0, values.size()); + //newValues[newValues.length - 1] = value; + values.add(value); + metadata.put(name, values); + } + } + + /** + * Copy All key-value pairs from properties. + * + * @param properties + * properties to copy from + */ + @SuppressWarnings("unchecked") + public void setAll(Properties properties) { + ArrayList propArr = new ArrayList(); + Enumeration names = + (Enumeration) properties.propertyNames(); + while (names.hasMoreElements()) { + String name = names.nextElement(); + propArr.add(properties.getProperty(name) ); + metadata.put(name, propArr); + } + } + + /** + * Set metadata name/value. Associate the specified value to the specified + * metadata name. If some previous values were associated to this name, they + * are removed. + * + * @param name + * the metadata name. + * @param value + * the metadata value. + */ + public void set(String name, String value) { + ArrayList strArr = this.metadata.get(name); + + if(strArr != null) + { + metadata.put(name, strArr); + } + else + { + strArr = new ArrayList(); + strArr.add(value); + metadata.put(name,strArr); + } + ++strArrCount; + } + + /** + * Sets the value of the identified metadata property. + * + * @since Apache Tika 0.7 + * @param property property definition + * @param value property value + */ + public void set(Property property, String value) { + set(property.getName(), value); + } + + /** + * Sets the integer value of the identified metadata property. + * + * @since Apache Tika 0.8 + * @param property simple integer property definition + * @param value property value + */ + public void set(Property property, int value) { + if(property.getPropertyType() != Property.PropertyType.SIMPLE) { + throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType()); + } + if(property.getValueType() != Property.ValueType.INTEGER) { + throw new PropertyTypeException(Property.ValueType.INTEGER, property.getValueType()); + } + set(property.getName(), Integer.toString(value)); + } + + /** + * Sets the real or rational value of the identified metadata property. + * + * @since Apache Tika 0.8 + * @param property simple real or simple rational property definition + * @param value property value + */ + public void set(Property property, double value) { + if(property.getPropertyType() != Property.PropertyType.SIMPLE) { + throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType()); + } + if(property.getValueType() != Property.ValueType.REAL && + property.getValueType() != Property.ValueType.RATIONAL) { + throw new PropertyTypeException(Property.ValueType.REAL, property.getValueType()); + } + set(property.getName(), Double.toString(value)); + } + + /** + * Sets the date value of the identified metadata property. + * + * @since Apache Tika 0.8 + * @param property simple integer property definition + * @param date property value + */ + public void set(Property property, Date date) { + if(property.getPropertyType() != Property.PropertyType.SIMPLE) { + throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType()); + } + if(property.getValueType() != Property.ValueType.DATE) { + throw new PropertyTypeException(Property.ValueType.DATE, property.getValueType()); + } + set(property.getName(), formatDate(date)); + } + + /** + * Remove a metadata and all its associated values. + * + * @param name + * metadata name to remove + */ + public void remove(String name) { + metadata.remove(name); + } + + /** + * Returns the number of metadata names in this metadata. + * + * @return number of metadata names + */ + public int size() { + return metadata.size(); + } + + public boolean equals(Object o) { + + if (o == null) { + return false; + } + + ThunderbirdMetadata other = null; + try { + other = (ThunderbirdMetadata) o; + } catch (ClassCastException cce) { + return false; + } + + if (other.size() != size()) { + return false; + } + + ArrayList names = names(); + for (String str : names) + {//int i = 0; i < names.length; i++) { + ArrayList otherValues = other._getValues(str); + ArrayList thisValues = _getValues(str); + if (otherValues.size() != thisValues.size()) { + return false; + } + for (int j = 0; j < otherValues.size(); j++) { + if (!otherValues.get(j).equals(thisValues.get(j))) { + return false; + } + } + } + return true; + } + + public String toString() { + StringBuffer buf = new StringBuffer(); + ArrayList names = names(); + for (int i = 0; i < names.size(); i++) { + ArrayList values = _getValues(names.get(i)); + for (int j = 0; j < values.size(); j++) { + buf.append(names.get(i)).append("=").append(values.get(j)).append(" "); + } + } + return buf.toString(); + } +} diff --git a/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdXHTMLContentHandler.java b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdXHTMLContentHandler.java new file mode 100644 index 0000000000..de7bd049b7 --- /dev/null +++ b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdXHTMLContentHandler.java @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.thunderbirdparser; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.sax.SafeContentHandler; +import org.xml.sax.Attributes; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.AttributesImpl; + +/** + * Content handler decorator that simplifies the task of producing XHTML + * events for Tika content parsers. + */ +public class ThunderbirdXHTMLContentHandler extends SafeContentHandler { + + /** + * The XHTML namespace URI + */ + public static final String XHTML = "http://www.w3.org/1999/xhtml"; + + /** + * The newline character that gets inserted after block elements. + */ + private static final char[] NL = new char[] { '\n' }; + + /** + * The tab character gets inserted before table cells and list items. + */ + private static final char[] TAB = new char[] { '\t' }; + + /** + * The elements that are in the section. + */ + private static final Set HEAD = + unmodifiableSet("title", "link", "base", "meta"); + + /** + * The elements that are automatically emitted by lazyStartHead, so + * skip them if they get sent to startElement/endElement by mistake. + */ + private static final Set AUTO = + unmodifiableSet("html", "head", "body", "frameset"); + + /** + * The elements that get prepended with the {@link #TAB} character. + */ + private static final Set INDENT = + unmodifiableSet("li", "dd", "dt", "td", "th", "frame"); + + /** + * The elements that get appended with the {@link #NL} character. + */ + public static final Set ENDLINE = unmodifiableSet( + "p", "h1", "h2", "h3", "h4", "h5", "h6", "div", "ul", "ol", "dl", + "pre", "hr", "blockquote", "address", "fieldset", "table", "form", + "noscript", "li", "dt", "dd", "noframes", "br", "tr", "select", "option"); + + private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl(); + + private static Set unmodifiableSet(String... elements) { + return Collections.unmodifiableSet( + new HashSet(Arrays.asList(elements))); + } + + /** + * Metadata associated with the document. Used to fill in the + * <head/> section. + */ + private final ThunderbirdMetadata metadata; + + /** + * Flags to indicate whether the document head element has been started/ended. + */ + private boolean headStarted = false; + private boolean headEnded = false; + private boolean useFrameset = false; + + public ThunderbirdXHTMLContentHandler(ContentHandler handler, ThunderbirdMetadata metadata) { + super(handler); + this.metadata = metadata; + } + + /** + * Starts an XHTML document by setting up the namespace mappings. + * The standard XHTML prefix is generated lazily when the first + * element is started. + */ + @Override + public void startDocument() throws SAXException { + super.startDocument(); + startPrefixMapping("", XHTML); + } + + /** + * Generates the following XHTML prefix when called for the first time: + *
+     * <html>
+     *   <head>
+     *     <title>...</title>
+     *   </head>
+     *   <body>
+     * 
+ */ + private void lazyStartHead() throws SAXException { + if (!headStarted) { + headStarted = true; + + // Call directly, so we don't go through our startElement(), which will + // ignore these elements. + super.startElement(XHTML, "html", "html", EMPTY_ATTRIBUTES); + newline(); + super.startElement(XHTML, "head", "head", EMPTY_ATTRIBUTES); + newline(); + } + } + + /** + * Generates the following XHTML prefix when called for the first time: + *
+     * <html>
+     *   <head>
+     *     <title>...</title>
+     *   </head>
+     *   <body> (or <frameset>
+     * 
+ */ + private void lazyEndHead(boolean isFrameset) throws SAXException { + lazyStartHead(); + + if (!headEnded) { + headEnded = true; + useFrameset = isFrameset; + + // TIKA-478: Emit all metadata values (other than title). We have to call + // startElement() and characters() directly to avoid recursive problems. + for (String name : metadata.names()) { + if (name.equals("title")) { + continue; + } + + for (String value : metadata.getValues(name)) { + // Putting null values into attributes causes problems, but is + // allowed by Metadata, so guard against that. + if (value != null) { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute("", "name", "name", "CDATA", name); + attributes.addAttribute("", "content", "content", "CDATA", value); + super.startElement(XHTML, "meta", "meta", attributes); + super.endElement(XHTML, "meta", "meta"); + newline(); + } + } + } + + super.startElement(XHTML, "title", "title", EMPTY_ATTRIBUTES); + String title = metadata.get(Metadata.TITLE); + if (title != null && title.length() > 0) { + char[] titleChars = title.toCharArray(); + super.characters(titleChars, 0, titleChars.length); + } else { + // TIKA-725: Prefer over + super.characters(new char[0], 0, 0); + } + super.endElement(XHTML, "title", "title"); + newline(); + + super.endElement(XHTML, "head", "head"); + newline(); + + if (useFrameset) { + super.startElement(XHTML, "frameset", "frameset", EMPTY_ATTRIBUTES); + } else { + super.startElement(XHTML, "body", "body", EMPTY_ATTRIBUTES); + } + } + } + + /** + * Ends the XHTML document by writing the following footer and + * clearing the namespace mappings: + * <pre> + * </body> + * </html> + * </pre> + */ + @Override + public void endDocument() throws SAXException { + lazyEndHead(useFrameset); + + if (useFrameset) { + super.endElement(XHTML, "frameset", "frameset"); + } else { + super.endElement(XHTML, "body", "body"); + } + + super.endElement(XHTML, "html", "html"); + + endPrefixMapping(""); + super.endDocument(); + } + + /** + * Starts the given element. Table cells and list items are automatically + * indented by emitting a tab character as ignorable whitespace. + */ + @Override + public void startElement( + String uri, String local, String name, Attributes attributes) + throws SAXException { + + if (name.equals("frameset")) { + lazyEndHead(true); + } else if (!AUTO.contains(name)) { + if (HEAD.contains(name)) { + lazyStartHead(); + } else { + lazyEndHead(false); + } + + if (XHTML.equals(uri) && INDENT.contains(name)) { + ignorableWhitespace(TAB, 0, TAB.length); + } + + super.startElement(uri, local, name, attributes); + } + } + + /** + * Ends the given element. Block elements are automatically followed + * by a newline character. + */ + @Override + public void endElement(String uri, String local, String name) throws SAXException { + if (!AUTO.contains(name)) { + super.endElement(uri, local, name); + if (XHTML.equals(uri) && ENDLINE.contains(name)) { + newline(); + } + } + } + + /** + * @see <a href="https://issues.apache.org/jira/browse/TIKA-210">TIKA-210</a> + */ + @Override + public void characters(char[] ch, int start, int length) throws SAXException { + lazyEndHead(useFrameset); + super.characters(ch, start, length); + } + + //------------------------------------------< public convenience methods > + + public void startElement(String name) throws SAXException { + startElement(XHTML, name, name, EMPTY_ATTRIBUTES); + } + + public void startElement(String name, String attribute, String value) + throws SAXException { + AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute("", attribute, attribute, "CDATA", value); + startElement(XHTML, name, name, attributes); + } + + public void startElement(String name, AttributesImpl attributes) + throws SAXException { + startElement(XHTML, name, name, attributes); + } + + public void endElement(String name) throws SAXException { + endElement(XHTML, name, name); + } + + public void characters(String characters) throws SAXException { + if (characters != null && characters.length() > 0) { + characters(characters.toCharArray(), 0, characters.length()); + } + } + + public void newline() throws SAXException { + ignorableWhitespace(NL, 0, NL.length); + } + + /** + * Emits an XHTML element with the given text content. If the given + * text value is null or empty, then the element is not written. + * + * @param name XHTML element name + * @param value element value, possibly <code>null</code> + * @throws SAXException if the content element could not be written + */ + public void element(String name, String value) throws SAXException { + if (value != null && value.length() > 0) { + startElement(name); + characters(value); + endElement(name); + } + } + +} diff --git a/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/layer.xml b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/layer.xml new file mode 100644 index 0000000000..6d26e34fd2 --- /dev/null +++ b/ThunderbirdMboxEmailModule/src/org/sleuthkit/autopsy/thunderbirdparser/layer.xml @@ -0,0 +1,11 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE filesystem PUBLIC "-//NetBeans//DTD Filesystem 1.2//EN" "http://www.netbeans.org/dtds/filesystem-1_2.dtd"> +<filesystem> + <folder name="Services"> + <file name="org-sleuthkit-autopsy-mboxparser-MboxFileIngestService.instance"> + <attr name="instanceOf" stringvalue="org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile"/> + <attr name="instanceCreate" methodvalue="org.sleuthkit.autopsy.mboxparser.MboxFileIngestService.getDefault"/> + <attr name="position" intvalue="1100"/> + </file> + </folder> + </filesystem>