mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-14 17:06:16 +00:00
.
This commit is contained in:
parent
4cdab24917
commit
7fdf34fa95
8
ThunderbirdMboxEmailModule/build.xml
Normal file
8
ThunderbirdMboxEmailModule/build.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!-- You may freely edit this file. See harness/README in the NetBeans platform -->
|
||||||
|
<!-- for some information on what you could do (e.g. targets to override). -->
|
||||||
|
<!-- If you delete this file and reopen the project it will be recreated. -->
|
||||||
|
<project name="org.sleuthkit.autopsy.mboxparser" default="netbeans" basedir=".">
|
||||||
|
<description>Builds, tests, and runs the project org.sleuthkit.autopsy.mboxparser.</description>
|
||||||
|
<import file="nbproject/build-impl.xml"/>
|
||||||
|
</project>
|
6
ThunderbirdMboxEmailModule/manifest.mf
Normal file
6
ThunderbirdMboxEmailModule/manifest.mf
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
Manifest-Version: 1.0
|
||||||
|
OpenIDE-Module: org.sleuthkit.autopsy.mboxparser
|
||||||
|
OpenIDE-Module-Layer: org/sleuthkit/autopsy/mboxparser/layer.xml
|
||||||
|
OpenIDE-Module-Localizing-Bundle: org/sleuthkit/autopsy/mboxparser/Bundle.properties
|
||||||
|
OpenIDE-Module-Specification-Version: 1.0
|
||||||
|
|
45
ThunderbirdMboxEmailModule/nbproject/build-impl.xml
Normal file
45
ThunderbirdMboxEmailModule/nbproject/build-impl.xml
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!--
|
||||||
|
*** GENERATED FROM project.xml - DO NOT EDIT ***
|
||||||
|
*** EDIT ../build.xml INSTEAD ***
|
||||||
|
-->
|
||||||
|
<project name="org.sleuthkit.autopsy.mboxparser-impl" basedir="..">
|
||||||
|
<fail message="Please build using Ant 1.7.1 or higher.">
|
||||||
|
<condition>
|
||||||
|
<not>
|
||||||
|
<antversion atleast="1.7.1"/>
|
||||||
|
</not>
|
||||||
|
</condition>
|
||||||
|
</fail>
|
||||||
|
<property file="nbproject/private/suite-private.properties"/>
|
||||||
|
<property file="nbproject/suite.properties"/>
|
||||||
|
<fail unless="suite.dir">You must set 'suite.dir' to point to your containing module suite</fail>
|
||||||
|
<property file="${suite.dir}/nbproject/private/platform-private.properties"/>
|
||||||
|
<property file="${suite.dir}/nbproject/platform.properties"/>
|
||||||
|
<macrodef name="property" uri="http://www.netbeans.org/ns/nb-module-project/2">
|
||||||
|
<attribute name="name"/>
|
||||||
|
<attribute name="value"/>
|
||||||
|
<sequential>
|
||||||
|
<property name="@{name}" value="${@{value}}"/>
|
||||||
|
</sequential>
|
||||||
|
</macrodef>
|
||||||
|
<macrodef name="evalprops" uri="http://www.netbeans.org/ns/nb-module-project/2">
|
||||||
|
<attribute name="property"/>
|
||||||
|
<attribute name="value"/>
|
||||||
|
<sequential>
|
||||||
|
<property name="@{property}" value="@{value}"/>
|
||||||
|
</sequential>
|
||||||
|
</macrodef>
|
||||||
|
<property file="${user.properties.file}"/>
|
||||||
|
<nbmproject2:property name="harness.dir" value="nbplatform.${nbplatform.active}.harness.dir" xmlns:nbmproject2="http://www.netbeans.org/ns/nb-module-project/2"/>
|
||||||
|
<nbmproject2:property name="nbplatform.active.dir" value="nbplatform.${nbplatform.active}.netbeans.dest.dir" xmlns:nbmproject2="http://www.netbeans.org/ns/nb-module-project/2"/>
|
||||||
|
<nbmproject2:evalprops property="cluster.path.evaluated" value="${cluster.path}" xmlns:nbmproject2="http://www.netbeans.org/ns/nb-module-project/2"/>
|
||||||
|
<fail message="Path to 'platform' cluster missing in $${cluster.path} property or using corrupt Netbeans Platform (missing harness).">
|
||||||
|
<condition>
|
||||||
|
<not>
|
||||||
|
<contains string="${cluster.path.evaluated}" substring="platform"/>
|
||||||
|
</not>
|
||||||
|
</condition>
|
||||||
|
</fail>
|
||||||
|
<import file="${harness.dir}/build.xml"/>
|
||||||
|
</project>
|
100
ThunderbirdMboxEmailModule/nbproject/platform.properties
Normal file
100
ThunderbirdMboxEmailModule/nbproject/platform.properties
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
cluster.path=\
|
||||||
|
${nbplatform.active.dir}/harness:\
|
||||||
|
${nbplatform.active.dir}/java:\
|
||||||
|
${nbplatform.active.dir}/platform
|
||||||
|
disabled.modules=\
|
||||||
|
org.apache.tools.ant.module,\
|
||||||
|
org.netbeans.api.debugger.jpda,\
|
||||||
|
org.netbeans.api.java,\
|
||||||
|
org.netbeans.libs.cglib,\
|
||||||
|
org.netbeans.libs.javacapi,\
|
||||||
|
org.netbeans.libs.javacimpl,\
|
||||||
|
org.netbeans.libs.jsr223,\
|
||||||
|
org.netbeans.libs.springframework,\
|
||||||
|
org.netbeans.modules.ant.browsetask,\
|
||||||
|
org.netbeans.modules.ant.debugger,\
|
||||||
|
org.netbeans.modules.ant.freeform,\
|
||||||
|
org.netbeans.modules.ant.grammar,\
|
||||||
|
org.netbeans.modules.ant.kit,\
|
||||||
|
org.netbeans.modules.beans,\
|
||||||
|
org.netbeans.modules.classfile,\
|
||||||
|
org.netbeans.modules.dbschema,\
|
||||||
|
org.netbeans.modules.debugger.jpda,\
|
||||||
|
org.netbeans.modules.debugger.jpda.ant,\
|
||||||
|
org.netbeans.modules.debugger.jpda.projects,\
|
||||||
|
org.netbeans.modules.debugger.jpda.ui,\
|
||||||
|
org.netbeans.modules.form,\
|
||||||
|
org.netbeans.modules.form.j2ee,\
|
||||||
|
org.netbeans.modules.form.kit,\
|
||||||
|
org.netbeans.modules.hibernate,\
|
||||||
|
org.netbeans.modules.hibernatelib,\
|
||||||
|
org.netbeans.modules.hudson.ant,\
|
||||||
|
org.netbeans.modules.hudson.maven,\
|
||||||
|
org.netbeans.modules.i18n,\
|
||||||
|
org.netbeans.modules.i18n.form,\
|
||||||
|
org.netbeans.modules.j2ee.core.utilities,\
|
||||||
|
org.netbeans.modules.j2ee.eclipselink,\
|
||||||
|
org.netbeans.modules.j2ee.eclipselinkmodelgen,\
|
||||||
|
org.netbeans.modules.j2ee.jpa.refactoring,\
|
||||||
|
org.netbeans.modules.j2ee.jpa.verification,\
|
||||||
|
org.netbeans.modules.j2ee.metadata,\
|
||||||
|
org.netbeans.modules.j2ee.metadata.model.support,\
|
||||||
|
org.netbeans.modules.j2ee.persistence,\
|
||||||
|
org.netbeans.modules.j2ee.persistence.kit,\
|
||||||
|
org.netbeans.modules.j2ee.persistenceapi,\
|
||||||
|
org.netbeans.modules.j2ee.toplinklib,\
|
||||||
|
org.netbeans.modules.java.api.common,\
|
||||||
|
org.netbeans.modules.java.debug,\
|
||||||
|
org.netbeans.modules.java.editor,\
|
||||||
|
org.netbeans.modules.java.editor.lib,\
|
||||||
|
org.netbeans.modules.java.examples,\
|
||||||
|
org.netbeans.modules.java.freeform,\
|
||||||
|
org.netbeans.modules.java.guards,\
|
||||||
|
org.netbeans.modules.java.helpset,\
|
||||||
|
org.netbeans.modules.java.hints,\
|
||||||
|
org.netbeans.modules.java.hints.processor,\
|
||||||
|
org.netbeans.modules.java.j2seplatform,\
|
||||||
|
org.netbeans.modules.java.j2seproject,\
|
||||||
|
org.netbeans.modules.java.kit,\
|
||||||
|
org.netbeans.modules.java.lexer,\
|
||||||
|
org.netbeans.modules.java.navigation,\
|
||||||
|
org.netbeans.modules.java.platform,\
|
||||||
|
org.netbeans.modules.java.preprocessorbridge,\
|
||||||
|
org.netbeans.modules.java.project,\
|
||||||
|
org.netbeans.modules.java.source,\
|
||||||
|
org.netbeans.modules.java.source.ant,\
|
||||||
|
org.netbeans.modules.java.sourceui,\
|
||||||
|
org.netbeans.modules.javadoc,\
|
||||||
|
org.netbeans.modules.javawebstart,\
|
||||||
|
org.netbeans.modules.jellytools,\
|
||||||
|
org.netbeans.modules.jellytools.java,\
|
||||||
|
org.netbeans.modules.junit,\
|
||||||
|
org.netbeans.modules.maven,\
|
||||||
|
org.netbeans.modules.maven.coverage,\
|
||||||
|
org.netbeans.modules.maven.embedder,\
|
||||||
|
org.netbeans.modules.maven.grammar,\
|
||||||
|
org.netbeans.modules.maven.graph,\
|
||||||
|
org.netbeans.modules.maven.hints,\
|
||||||
|
org.netbeans.modules.maven.indexer,\
|
||||||
|
org.netbeans.modules.maven.junit,\
|
||||||
|
org.netbeans.modules.maven.kit,\
|
||||||
|
org.netbeans.modules.maven.model,\
|
||||||
|
org.netbeans.modules.maven.osgi,\
|
||||||
|
org.netbeans.modules.maven.persistence,\
|
||||||
|
org.netbeans.modules.maven.repository,\
|
||||||
|
org.netbeans.modules.maven.search,\
|
||||||
|
org.netbeans.modules.maven.spring,\
|
||||||
|
org.netbeans.modules.projectimport.eclipse.core,\
|
||||||
|
org.netbeans.modules.projectimport.eclipse.j2se,\
|
||||||
|
org.netbeans.modules.refactoring.java,\
|
||||||
|
org.netbeans.modules.spellchecker.bindings.java,\
|
||||||
|
org.netbeans.modules.spring.beans,\
|
||||||
|
org.netbeans.modules.swingapp,\
|
||||||
|
org.netbeans.modules.websvc.jaxws21,\
|
||||||
|
org.netbeans.modules.websvc.jaxws21api,\
|
||||||
|
org.netbeans.modules.websvc.saas.codegen.java,\
|
||||||
|
org.netbeans.modules.xml.jaxb,\
|
||||||
|
org.netbeans.modules.xml.tools.java,\
|
||||||
|
org.openide.compat,\
|
||||||
|
org.openide.util.enumerations
|
||||||
|
nbplatform.active=default
|
2
ThunderbirdMboxEmailModule/nbproject/project.properties
Normal file
2
ThunderbirdMboxEmailModule/nbproject/project.properties
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
javac.source=1.6
|
||||||
|
javac.compilerargs=-Xlint -Xlint:-serial
|
57
ThunderbirdMboxEmailModule/nbproject/project.xml
Normal file
57
ThunderbirdMboxEmailModule/nbproject/project.xml
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://www.netbeans.org/ns/project/1">
|
||||||
|
<type>org.netbeans.modules.apisupport.project</type>
|
||||||
|
<configuration>
|
||||||
|
<data xmlns="http://www.netbeans.org/ns/nb-module-project/3">
|
||||||
|
<code-name-base>org.sleuthkit.autopsy.mboxparser</code-name-base>
|
||||||
|
<suite-component/>
|
||||||
|
<module-dependencies>
|
||||||
|
<dependency>
|
||||||
|
<code-name-base>org.sleuthkit.autopsy.casemodule</code-name-base>
|
||||||
|
<build-prerequisite/>
|
||||||
|
<compile-dependency/>
|
||||||
|
<run-dependency>
|
||||||
|
<release-version>1</release-version>
|
||||||
|
<specification-version>1.0</specification-version>
|
||||||
|
</run-dependency>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<code-name-base>org.sleuthkit.autopsy.coreutils</code-name-base>
|
||||||
|
<build-prerequisite/>
|
||||||
|
<compile-dependency/>
|
||||||
|
<run-dependency>
|
||||||
|
<release-version>0-1</release-version>
|
||||||
|
<specification-version>0.0</specification-version>
|
||||||
|
</run-dependency>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<code-name-base>org.sleuthkit.autopsy.datamodel</code-name-base>
|
||||||
|
<build-prerequisite/>
|
||||||
|
<compile-dependency/>
|
||||||
|
<run-dependency>
|
||||||
|
<release-version>1</release-version>
|
||||||
|
<specification-version>1.0</specification-version>
|
||||||
|
</run-dependency>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<code-name-base>org.sleuthkit.autopsy.ingest</code-name-base>
|
||||||
|
<build-prerequisite/>
|
||||||
|
<compile-dependency/>
|
||||||
|
<run-dependency>
|
||||||
|
<release-version>0-1</release-version>
|
||||||
|
<specification-version>1.0</specification-version>
|
||||||
|
</run-dependency>
|
||||||
|
</dependency>
|
||||||
|
</module-dependencies>
|
||||||
|
<public-packages/>
|
||||||
|
<class-path-extension>
|
||||||
|
<runtime-relative-path>ext/tika-core-1.1.jar</runtime-relative-path>
|
||||||
|
<binary-origin>release/modules/ext/tika-core-1.1.jar</binary-origin>
|
||||||
|
</class-path-extension>
|
||||||
|
<class-path-extension>
|
||||||
|
<runtime-relative-path>ext/tika-parsers-1.1.jar</runtime-relative-path>
|
||||||
|
<binary-origin>release/modules/ext/tika-parsers-1.1.jar</binary-origin>
|
||||||
|
</class-path-extension>
|
||||||
|
</data>
|
||||||
|
</configuration>
|
||||||
|
</project>
|
1
ThunderbirdMboxEmailModule/nbproject/suite.properties
Normal file
1
ThunderbirdMboxEmailModule/nbproject/suite.properties
Normal file
@ -0,0 +1 @@
|
|||||||
|
suite.dir=${basedir}/..
|
BIN
ThunderbirdMboxEmailModule/release/modules/ext/tika-core-1.1.jar
Normal file
BIN
ThunderbirdMboxEmailModule/release/modules/ext/tika-core-1.1.jar
Normal file
Binary file not shown.
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
OpenIDE-Module-Name=ThunderbirdMboxEmailModule
|
@ -0,0 +1,183 @@
|
|||||||
|
package org.sleuthkit.autopsy.thunderbirdparser;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
import org.apache.tika.Tika;
|
||||||
|
import org.apache.tika.exception.TikaException;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.mime.MimeTypes;
|
||||||
|
import org.apache.tika.mime.MediaType;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
import org.apache.tika.sax.BodyContentHandler;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
public class ThunderbirdEmailParser {
|
||||||
|
|
||||||
|
|
||||||
|
private InputStream stream;
|
||||||
|
//Tika object
|
||||||
|
private Tika tika;
|
||||||
|
private ThunderbirdMetadata metadata;
|
||||||
|
private ContentHandler contentHandler;
|
||||||
|
private String mimeType;
|
||||||
|
private ThunderbirdMboxParser parser;
|
||||||
|
private ParseContext context;
|
||||||
|
|
||||||
|
private static ArrayList<String> tikaMimeTypes;
|
||||||
|
|
||||||
|
static
|
||||||
|
{
|
||||||
|
tikaMimeTypes = new ArrayList<String>();
|
||||||
|
tikaMimeTypes.add(MimeTypes.OCTET_STREAM);
|
||||||
|
tikaMimeTypes.add(MimeTypes.PLAIN_TEXT);
|
||||||
|
tikaMimeTypes.add(MimeTypes.XML);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ThunderbirdEmailParser()
|
||||||
|
{
|
||||||
|
this.tika = new Tika();
|
||||||
|
}
|
||||||
|
|
||||||
|
public ThunderbirdEmailParser(InputStream inStream)
|
||||||
|
{
|
||||||
|
this.tika = new Tika();
|
||||||
|
this.stream = inStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ThunderbirdEmailParser(String filepath)
|
||||||
|
{
|
||||||
|
this.tika = new Tika();
|
||||||
|
this.stream = this.getClass().getResourceAsStream(filepath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void init() throws IOException
|
||||||
|
{
|
||||||
|
this.tika.setMaxStringLength(10*1024*1024);
|
||||||
|
this.metadata = new ThunderbirdMetadata();
|
||||||
|
//Set MIME Type
|
||||||
|
//this.mimeType = tika.detect(this.stream);
|
||||||
|
this.parser = new ThunderbirdMboxParser();
|
||||||
|
this.context = new ParseContext();
|
||||||
|
this.contentHandler = new BodyContentHandler(10*1024*1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse() throws FileNotFoundException, IOException, SAXException, TikaException
|
||||||
|
{
|
||||||
|
init();
|
||||||
|
parser.parse(this.stream,this.contentHandler, this.metadata, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException
|
||||||
|
{
|
||||||
|
init();
|
||||||
|
parser.parse(inStream,this.contentHandler, this.metadata, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ThunderbirdMetadata getMetadata()
|
||||||
|
{
|
||||||
|
return this.metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Returns message content, i.e. plain text or html
|
||||||
|
public String getContent()
|
||||||
|
{
|
||||||
|
return this.contentHandler.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String detectEmailFileFormat(String filepath) throws IOException
|
||||||
|
{
|
||||||
|
return this.tika.detect(filepath);
|
||||||
|
}
|
||||||
|
|
||||||
|
//Detects the mime type from the first few bytes of the document
|
||||||
|
public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName)
|
||||||
|
{
|
||||||
|
return this.tika.detect(firstFewBytes, inDocName);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isValidMimeTypeMbox(byte[] buffer)
|
||||||
|
{
|
||||||
|
return (new String(buffer)).startsWith("From ");
|
||||||
|
}
|
||||||
|
|
||||||
|
//This assumes the file/stream was parsed since we are looking at the metadata
|
||||||
|
public boolean isValidMboxType()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.CONTENT_TYPE).equals("application/mbox");
|
||||||
|
}
|
||||||
|
|
||||||
|
//Get email subject
|
||||||
|
public String getSubject()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.SUBJECT);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTitle()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.TITLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long getDateCreated()
|
||||||
|
{
|
||||||
|
Long epochtime;
|
||||||
|
Long ftime = 0L;
|
||||||
|
|
||||||
|
try {
|
||||||
|
String datetime = this.metadata.get(Metadata.DATE);
|
||||||
|
epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(datetime).getTime();
|
||||||
|
ftime = epochtime.longValue();
|
||||||
|
ftime = ftime / 1000;
|
||||||
|
} catch (ParseException ex) {
|
||||||
|
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ftime;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContenType()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.CONTENT_TYPE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContenEncoding()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.CONTENT_ENCODING);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFrom()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.MESSAGE_FROM);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTo()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.MESSAGE_TO);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCC()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.MESSAGE_CC);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBCC()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.MESSAGE_BCC);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getRecipientAddress()
|
||||||
|
{
|
||||||
|
return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getMboxSupportedMediaType()
|
||||||
|
{
|
||||||
|
return MediaType.application("mbox").getType();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,193 @@
|
|||||||
|
/*
|
||||||
|
* Autopsy Forensic Browser
|
||||||
|
*
|
||||||
|
* Copyright 2011 Basis Technology Corp.
|
||||||
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.sleuthkit.autopsy.thunderbirdparser;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import java.util.logging.Logger;
|
||||||
|
import org.apache.tika.exception.TikaException;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestManager;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestMessage;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestServiceAbstract.*;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
|
||||||
|
import org.sleuthkit.autopsy.ingest.ServiceDataEvent;
|
||||||
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||||
|
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
||||||
|
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||||
|
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
|
||||||
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
|
import org.sleuthkit.datamodel.TskException;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
public class ThunderbirdMboxFileIngestService implements IngestServiceAbstractFile {
|
||||||
|
|
||||||
|
private static final Logger logger = Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName());
|
||||||
|
private static ThunderbirdMboxFileIngestService instance = null;
|
||||||
|
private IngestManagerProxy managerProxy;
|
||||||
|
private static int messageId = 0;
|
||||||
|
private static final String classname = "Mbox Parser";
|
||||||
|
|
||||||
|
public static synchronized ThunderbirdMboxFileIngestService getDefault() {
|
||||||
|
if (instance == null) {
|
||||||
|
instance = new ThunderbirdMboxFileIngestService();
|
||||||
|
}
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ProcessResult process(AbstractFile fsContent) {
|
||||||
|
ThunderbirdEmailParser mbox = new ThunderbirdEmailParser();
|
||||||
|
boolean isMbox = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
byte[] t = new byte[(int) 128];
|
||||||
|
int byteRead = fsContent.read(t, 0, 128);
|
||||||
|
isMbox = mbox.isValidMimeTypeMbox(t);
|
||||||
|
} catch (TskException ex) {
|
||||||
|
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (isMbox) {
|
||||||
|
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Processing " + fsContent.getName()));
|
||||||
|
try {
|
||||||
|
ReadContentInputStream contentStream = new ReadContentInputStream(fsContent);
|
||||||
|
mbox.parse(contentStream);
|
||||||
|
String content = mbox.getContent();
|
||||||
|
String from = mbox.getFrom();
|
||||||
|
String to = mbox.getTo();
|
||||||
|
Long date = mbox.getDateCreated();
|
||||||
|
String subject = mbox.getSubject();
|
||||||
|
String cc = mbox.getCC();
|
||||||
|
String bcc = mbox.getBCC();
|
||||||
|
Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_TO.getTypeID(), classname, "", to));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CC.getTypeID(), classname, "", cc));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_BCC.getTypeID(), classname, "", bcc));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_FROM.getTypeID(), classname, "", from));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_PLAIN.getTypeID(), classname, "", content));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_EMAIL_CONTENT_HTML.getTypeID(), classname, "", content));
|
||||||
|
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_ID.getTypeID(), classname, "",));
|
||||||
|
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_MSG_REPLY_ID.getTypeID(), classname, "",));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_RCVD.getTypeID(), classname, "", date));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_SENT.getTypeID(), classname, "", date));
|
||||||
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_SUBJECT.getTypeID(), classname, "", subject));
|
||||||
|
BlackboardArtifact bbart;
|
||||||
|
try {
|
||||||
|
bbart = fsContent.newArtifact(BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG);
|
||||||
|
bbart.addAttributes(bbattributes);
|
||||||
|
} catch (TskCoreException ex) {
|
||||||
|
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
IngestManager.fireServiceDataEvent(new ServiceDataEvent(classname, BlackboardArtifact.ARTIFACT_TYPE.TSK_EMAIL_MSG));
|
||||||
|
} catch (FileNotFoundException ex) {
|
||||||
|
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
|
} catch (SAXException ex) {
|
||||||
|
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
|
} catch (TikaException ex) {
|
||||||
|
Logger.getLogger(ThunderbirdMboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ProcessResult.OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void complete() {
|
||||||
|
logger.log(Level.INFO, "complete()");
|
||||||
|
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "COMPLETE"));
|
||||||
|
|
||||||
|
//service specific cleanup due completion here
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getName() {
|
||||||
|
return "Mbox Parser";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getDescription() {
|
||||||
|
return "This class parses through a file to determine if it is an mbox file and if so, populates an email artifact for it in the blackboard.";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void init(IngestManagerProxy managerProxy) {
|
||||||
|
logger.log(Level.INFO, "init()");
|
||||||
|
this.managerProxy = managerProxy;
|
||||||
|
|
||||||
|
//service specific initialization here
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void stop() {
|
||||||
|
logger.log(Level.INFO, "stop()");
|
||||||
|
|
||||||
|
//service specific cleanup due interruption here
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ServiceType getType() {
|
||||||
|
return ServiceType.AbstractFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasSimpleConfiguration() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasAdvancedConfiguration() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public javax.swing.JPanel getSimpleConfiguration() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public javax.swing.JPanel getAdvancedConfiguration() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasBackgroundJobsRunning() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void saveAdvancedConfiguration() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void saveSimpleConfiguration() {
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,253 @@
|
|||||||
|
/*
|
||||||
|
* To change this template, choose Tools | Templates
|
||||||
|
* and open the template in the editor.
|
||||||
|
*/
|
||||||
|
package org.sleuthkit.autopsy.thunderbirdparser;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import org.apache.tika.exception.TikaException;
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.mime.MediaType;
|
||||||
|
import org.apache.tika.parser.AbstractParser;
|
||||||
|
import org.apache.tika.parser.ParseContext;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author arivera
|
||||||
|
*/
|
||||||
|
public class ThunderbirdMboxParser {
|
||||||
|
|
||||||
|
/** Serial version UID */
|
||||||
|
private static final long serialVersionUID = -1762689436731160661L;
|
||||||
|
|
||||||
|
private static final Set<MediaType> SUPPORTED_TYPES =
|
||||||
|
Collections.singleton(MediaType.application("mbox"));
|
||||||
|
|
||||||
|
public static final String MBOX_MIME_TYPE = "application/mbox";
|
||||||
|
public static final String MBOX_RECORD_DIVIDER = "From ";
|
||||||
|
private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
|
||||||
|
private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>");
|
||||||
|
|
||||||
|
private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-";
|
||||||
|
private static final String EMAIL_FROMLINE_METADATA = EMAIL_HEADER_METADATA_PREFIX + "from";
|
||||||
|
|
||||||
|
private ThunderbirdXHTMLContentHandler xhtml = null;
|
||||||
|
|
||||||
|
private enum ParseStates {
|
||||||
|
START, IN_HEADER, IN_CONTENT
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<MediaType> getSupportedTypes(ParseContext context) {
|
||||||
|
return SUPPORTED_TYPES;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void parse(
|
||||||
|
InputStream stream, ContentHandler handler,
|
||||||
|
ThunderbirdMetadata metadata, ParseContext context)
|
||||||
|
throws IOException, TikaException, SAXException {
|
||||||
|
|
||||||
|
InputStreamReader isr;
|
||||||
|
try {
|
||||||
|
// Headers are going to be 7-bit ascii
|
||||||
|
isr = new InputStreamReader(stream, "US-ASCII");
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
throw new TikaException("US-ASCII is not supported!", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
BufferedReader reader = new BufferedReader(isr);
|
||||||
|
|
||||||
|
metadata.set(Metadata.CONTENT_TYPE, MBOX_MIME_TYPE);
|
||||||
|
metadata.set(Metadata.CONTENT_ENCODING, "us-ascii");
|
||||||
|
|
||||||
|
xhtml = new ThunderbirdXHTMLContentHandler(handler, metadata);
|
||||||
|
xhtml.startDocument();
|
||||||
|
|
||||||
|
ThunderbirdMboxParser.ParseStates parseState = ThunderbirdMboxParser.ParseStates.START;
|
||||||
|
String multiLine = null;
|
||||||
|
boolean inQuote = false;
|
||||||
|
int numEmails = 0;
|
||||||
|
|
||||||
|
|
||||||
|
// We're going to scan, line-by-line, for a line that starts with
|
||||||
|
// "From "
|
||||||
|
|
||||||
|
for (String curLine = reader.readLine(); curLine != null; curLine = reader.readLine())
|
||||||
|
{
|
||||||
|
|
||||||
|
boolean newMessage = curLine.startsWith(MBOX_RECORD_DIVIDER);
|
||||||
|
if (newMessage) {
|
||||||
|
numEmails += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (parseState) {
|
||||||
|
case START:
|
||||||
|
if (newMessage) {
|
||||||
|
parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER;
|
||||||
|
newMessage = false;
|
||||||
|
// Fall through to IN_HEADER
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case IN_HEADER:
|
||||||
|
if (newMessage) {
|
||||||
|
saveHeaderInMetadata(numEmails, metadata, multiLine);
|
||||||
|
//saveHeaderInMetadata(numEmails, metadata, curLine);
|
||||||
|
multiLine = curLine;
|
||||||
|
}
|
||||||
|
//I think this is never going to be true
|
||||||
|
else if (curLine.length() == 0)
|
||||||
|
{
|
||||||
|
// Blank line is signal that we're transitioning to the content.
|
||||||
|
|
||||||
|
saveHeaderInMetadata(numEmails, metadata, multiLine);
|
||||||
|
parseState = ThunderbirdMboxParser.ParseStates.IN_CONTENT;
|
||||||
|
|
||||||
|
// Mimic what PackageParser does between entries.
|
||||||
|
xhtml.startElement("div", "class", "email-entry");
|
||||||
|
xhtml.startElement("p");
|
||||||
|
inQuote = false;
|
||||||
|
}
|
||||||
|
else if ((curLine.startsWith(" ") || curLine.startsWith("\t")) )
|
||||||
|
{
|
||||||
|
multiLine += " " + curLine.trim();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
saveHeaderInMetadata(numEmails, metadata, multiLine);
|
||||||
|
multiLine = curLine;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
// TODO - use real email parsing support so we can correctly handle
|
||||||
|
// things like multipart messages and quoted-printable encoding.
|
||||||
|
// We'd also want this for charset handling, where content isn't 7-bit
|
||||||
|
// ascii.
|
||||||
|
case IN_CONTENT:
|
||||||
|
if (newMessage) {
|
||||||
|
endMessage(inQuote);
|
||||||
|
parseState = ThunderbirdMboxParser.ParseStates.IN_HEADER;
|
||||||
|
multiLine = curLine;
|
||||||
|
} else {
|
||||||
|
boolean quoted = curLine.startsWith(">");
|
||||||
|
if (inQuote) {
|
||||||
|
if (!quoted) {
|
||||||
|
xhtml.endElement("q");
|
||||||
|
inQuote = false;
|
||||||
|
}
|
||||||
|
} else if (quoted) {
|
||||||
|
xhtml.startElement("q");
|
||||||
|
inQuote = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
xhtml.characters(curLine);
|
||||||
|
|
||||||
|
// For plain text email, each line is a real break position.
|
||||||
|
xhtml.element("br", "");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parseState == ThunderbirdMboxParser.ParseStates.IN_HEADER) {
|
||||||
|
saveHeaderInMetadata(numEmails, metadata, multiLine);
|
||||||
|
} else if (parseState == ThunderbirdMboxParser.ParseStates.IN_CONTENT) {
|
||||||
|
endMessage(inQuote);
|
||||||
|
}
|
||||||
|
|
||||||
|
xhtml.endDocument();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void endMessage(boolean inQuote) throws SAXException {
|
||||||
|
if (inQuote) {
|
||||||
|
xhtml.endElement("q");
|
||||||
|
}
|
||||||
|
|
||||||
|
xhtml.endElement("p");
|
||||||
|
xhtml.endElement("div");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void saveHeaderInMetadata(int numEmails, ThunderbirdMetadata metadata, String curLine)
|
||||||
|
{
|
||||||
|
|
||||||
|
//if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER) && (numEmails >= 1)) n
|
||||||
|
//At this point, the current line we are feeding should never be null!!!
|
||||||
|
if ((curLine != null) && curLine.startsWith(MBOX_RECORD_DIVIDER))
|
||||||
|
{
|
||||||
|
metadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else if ((curLine == null)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
|
||||||
|
if (!headerMatcher.matches()) {
|
||||||
|
return; // ignore malformed header lines
|
||||||
|
}
|
||||||
|
|
||||||
|
String headerTag = headerMatcher.group(1).toLowerCase();
|
||||||
|
String headerContent = headerMatcher.group(2);
|
||||||
|
|
||||||
|
if (headerTag.equalsIgnoreCase("From")) {
|
||||||
|
metadata.add(ThunderbirdMetadata.AUTHOR, headerContent);
|
||||||
|
metadata.add(ThunderbirdMetadata.CREATOR, headerContent);
|
||||||
|
} else if (headerTag.equalsIgnoreCase("To") ||
|
||||||
|
headerTag.equalsIgnoreCase("Cc") ||
|
||||||
|
headerTag.equalsIgnoreCase("Bcc")) {
|
||||||
|
Matcher address = EMAIL_ADDRESS_PATTERN.matcher(headerContent);
|
||||||
|
if(address.find()) {
|
||||||
|
metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, address.group(1));
|
||||||
|
} else if(headerContent.indexOf('@') > -1) {
|
||||||
|
metadata.add(ThunderbirdMetadata.MESSAGE_RECIPIENT_ADDRESS, headerContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
String property = ThunderbirdMetadata.MESSAGE_TO;
|
||||||
|
if (headerTag.equalsIgnoreCase("Cc")) {
|
||||||
|
property = ThunderbirdMetadata.MESSAGE_CC;
|
||||||
|
} else if (headerTag.equalsIgnoreCase("Bcc")) {
|
||||||
|
property = ThunderbirdMetadata.MESSAGE_BCC;
|
||||||
|
}
|
||||||
|
metadata.add(property, headerContent);
|
||||||
|
} else if (headerTag.equalsIgnoreCase("Subject")) {
|
||||||
|
metadata.add(ThunderbirdMetadata.SUBJECT, headerContent);
|
||||||
|
metadata.add(ThunderbirdMetadata.TITLE, headerContent);
|
||||||
|
} else if (headerTag.equalsIgnoreCase("Date")) {
|
||||||
|
try {
|
||||||
|
Date date = parseDate(headerContent);
|
||||||
|
metadata.set(ThunderbirdMetadata.DATE, date);
|
||||||
|
metadata.set(ThunderbirdMetadata.CREATION_DATE, date);
|
||||||
|
} catch (ParseException e) {
|
||||||
|
// ignoring date because format was not understood
|
||||||
|
}
|
||||||
|
} else if (headerTag.equalsIgnoreCase("Message-Id")) {
|
||||||
|
metadata.add(ThunderbirdMetadata.IDENTIFIER, headerContent);
|
||||||
|
} else if (headerTag.equalsIgnoreCase("In-Reply-To")) {
|
||||||
|
metadata.add(ThunderbirdMetadata.RELATION, headerContent);
|
||||||
|
} else if (headerTag.equalsIgnoreCase("Content-Type")) {
|
||||||
|
// TODO - key off content-type in headers to
|
||||||
|
// set mapping to use for content and convert if necessary.
|
||||||
|
|
||||||
|
metadata.add(ThunderbirdMetadata.CONTENT_TYPE, headerContent);
|
||||||
|
metadata.add(ThunderbirdMetadata.FORMAT, headerContent);
|
||||||
|
} else {
|
||||||
|
metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Date parseDate(String headerContent) throws ParseException {
|
||||||
|
SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
|
||||||
|
return dateFormat.parse(headerContent);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,447 @@
|
|||||||
|
/*
|
||||||
|
* To change this template, choose Tools | Templates
|
||||||
|
* and open the template in the editor.
|
||||||
|
*/
|
||||||
|
package org.sleuthkit.autopsy.thunderbirdparser;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.text.DateFormat;
|
||||||
|
import java.text.DateFormatSymbols;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.text.SimpleDateFormat;
|
||||||
|
import java.util.*;
|
||||||
|
import org.apache.tika.metadata.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author arivera
|
||||||
|
*/
|
||||||
|
public class ThunderbirdMetadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders,
|
||||||
|
IPTC, Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
|
||||||
|
Serializable {
|
||||||
|
|
||||||
|
private int strArrCount = 0;
|
||||||
|
|
||||||
|
/** Serial version UID */
|
||||||
|
private static final long serialVersionUID = 5623926545693153182L;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A map of all metadata attributes.
|
||||||
|
*/
|
||||||
|
private Map<String, ArrayList<String>> metadata = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The UTC time zone. Not sure if {@link TimeZone#getTimeZone(String)}
|
||||||
|
* understands "UTC" in all environments, but it'll fall back to GMT
|
||||||
|
* in such cases, which is in practice equivalent to UTC.
|
||||||
|
*/
|
||||||
|
private static final TimeZone UTC = TimeZone.getTimeZone("UTC");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Custom time zone used to interpret date values without a time
|
||||||
|
* component in a way that most likely falls within the same day
|
||||||
|
* regardless of in which time zone it is later interpreted. For
|
||||||
|
* example, the "2012-02-17" date would map to "2012-02-17T12:00:00Z"
|
||||||
|
* (instead of the default "2012-02-17T00:00:00Z"), which would still
|
||||||
|
* map to "2012-02-17" if interpreted in say Pacific time (while the
|
||||||
|
* default mapping would result in "2012-02-16" for UTC-8).
|
||||||
|
*/
|
||||||
|
private static final TimeZone MIDDAY = TimeZone.getTimeZone("GMT-12:00");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some parsers will have the date as a ISO-8601 string
|
||||||
|
* already, and will set that into the Metadata object.
|
||||||
|
* So we can return Date objects for these, this is the
|
||||||
|
* list (in preference order) of the various ISO-8601
|
||||||
|
* variants that we try when processing a date based
|
||||||
|
* property.
|
||||||
|
*/
|
||||||
|
private static final DateFormat[] iso8601InputFormats = new DateFormat[] {
|
||||||
|
// yyyy-mm-ddThh...
|
||||||
|
createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", UTC), // UTC/Zulu
|
||||||
|
createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null), // With timezone
|
||||||
|
createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null), // Without timezone
|
||||||
|
// yyyy-mm-dd hh...
|
||||||
|
createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", UTC), // UTC/Zulu
|
||||||
|
createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null), // With timezone
|
||||||
|
createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null), // Without timezone
|
||||||
|
// Date without time, set to Midday UTC
|
||||||
|
createDateFormat("yyyy-MM-dd", MIDDAY), // Normal date format
|
||||||
|
createDateFormat("yyyy:MM:dd", MIDDAY), // Image (IPTC/EXIF) format
|
||||||
|
};
|
||||||
|
|
||||||
|
private static DateFormat createDateFormat(String format, TimeZone timezone) {
|
||||||
|
SimpleDateFormat sdf =
|
||||||
|
new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
|
||||||
|
if (timezone != null) {
|
||||||
|
sdf.setTimeZone(timezone);
|
||||||
|
}
|
||||||
|
return sdf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the given date string. This method is synchronized to prevent
|
||||||
|
* concurrent access to the thread-unsafe date formats.
|
||||||
|
*
|
||||||
|
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
|
||||||
|
* @param date date string
|
||||||
|
* @return parsed date, or <code>null</code> if the date can't be parsed
|
||||||
|
*/
|
||||||
|
private static synchronized Date parseDate(String date) {
|
||||||
|
// Java doesn't like timezones in the form ss+hh:mm
|
||||||
|
// It only likes the hhmm form, without the colon
|
||||||
|
int n = date.length();
|
||||||
|
if (date.charAt(n - 3) == ':'
|
||||||
|
&& (date.charAt(n - 6) == '+' || date.charAt(n - 6) == '-')) {
|
||||||
|
date = date.substring(0, n - 3) + date.substring(n - 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try several different ISO-8601 variants
|
||||||
|
for (DateFormat format : iso8601InputFormats) {
|
||||||
|
try {
|
||||||
|
return format.parse(date);
|
||||||
|
} catch (ParseException ignore) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a ISO 8601 representation of the given date. This method is
|
||||||
|
* synchronized to prevent concurrent access to the thread-unsafe date
|
||||||
|
* formats.
|
||||||
|
*
|
||||||
|
* @see <a href="https://issues.apache.org/jira/browse/TIKA-495">TIKA-495</a>
|
||||||
|
* @param date given date
|
||||||
|
* @return ISO 8601 date string
|
||||||
|
*/
|
||||||
|
private static String formatDate(Date date) {
|
||||||
|
Calendar calendar = GregorianCalendar.getInstance(UTC, Locale.US);
|
||||||
|
calendar.setTime(date);
|
||||||
|
return String.format(
|
||||||
|
"%04d-%02d-%02dT%02d:%02d:%02dZ",
|
||||||
|
calendar.get(Calendar.YEAR),
|
||||||
|
calendar.get(Calendar.MONTH) + 1,
|
||||||
|
calendar.get(Calendar.DAY_OF_MONTH),
|
||||||
|
calendar.get(Calendar.HOUR_OF_DAY),
|
||||||
|
calendar.get(Calendar.MINUTE),
|
||||||
|
calendar.get(Calendar.SECOND));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a new, empty metadata.
|
||||||
|
*/
|
||||||
|
public ThunderbirdMetadata() {
|
||||||
|
metadata = new HashMap<String, ArrayList<String>>();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if named value is multivalued.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* name of metadata
|
||||||
|
* @return true is named value is multivalued, false if single value or null
|
||||||
|
*/
|
||||||
|
public boolean isMultiValued(final String name) {
|
||||||
|
return metadata.get(name) != null && metadata.get(name).size() > 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of the names contained in the metadata.
|
||||||
|
*
|
||||||
|
* @return Metadata names
|
||||||
|
*/
|
||||||
|
public ArrayList<String> names() {
|
||||||
|
return new ArrayList<String>(metadata.keySet());//.toArray(new String[metadata.keySet().size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the value associated to a metadata name. If many values are assiociated
|
||||||
|
* to the specified name, then the first one is returned.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* of the metadata.
|
||||||
|
* @return the value associated to the specified metadata name.
|
||||||
|
*/
|
||||||
|
public String get(final String name) {
|
||||||
|
ArrayList<String> values = metadata.get(name);
|
||||||
|
if (values == null) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return values.get(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the value (if any) of the identified metadata property.
|
||||||
|
*
|
||||||
|
* @since Apache Tika 0.7
|
||||||
|
* @param property property definition
|
||||||
|
* @return property value, or <code>null</code> if the property is not set
|
||||||
|
*/
|
||||||
|
public String get(Property property) {
|
||||||
|
return get(property.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the value of the identified Integer based metadata property.
|
||||||
|
*
|
||||||
|
* @since Apache Tika 0.8
|
||||||
|
* @param property simple integer property definition
|
||||||
|
* @return property value as a Integer, or <code>null</code> if the property is not set, or not a valid Integer
|
||||||
|
*/
|
||||||
|
public Integer getInt(Property property) {
|
||||||
|
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if(property.getValueType() != Property.ValueType.INTEGER) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String v = get(property);
|
||||||
|
if(v == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
return Integer.valueOf(v);
|
||||||
|
} catch(NumberFormatException e) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the value of the identified Date based metadata property.
|
||||||
|
*
|
||||||
|
* @since Apache Tika 0.8
|
||||||
|
* @param property simple date property definition
|
||||||
|
* @return property value as a Date, or <code>null</code> if the property is not set, or not a valid Date
|
||||||
|
*/
|
||||||
|
public Date getDate(Property property) {
|
||||||
|
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if(property.getValueType() != Property.ValueType.DATE) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String v = get(property);
|
||||||
|
if (v != null) {
|
||||||
|
return parseDate(v);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the values associated to a metadata name.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* of the metadata.
|
||||||
|
* @return the values associated to a metadata name.
|
||||||
|
*/
|
||||||
|
public ArrayList<String> getValues(final String name) {
|
||||||
|
return _getValues(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ArrayList<String> _getValues(final String name) {
|
||||||
|
ArrayList<String> values = metadata.get(name);
|
||||||
|
if (values == null) {
|
||||||
|
values = new ArrayList<String>();
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a metadata name/value mapping. Add the specified value to the list of
|
||||||
|
* values associated to the specified metadata name.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* the metadata name.
|
||||||
|
* @param value
|
||||||
|
* the metadata value.
|
||||||
|
*/
|
||||||
|
public void add(final String name, final String value) {
|
||||||
|
ArrayList<String> values = metadata.get(name);
|
||||||
|
if (values == null) {
|
||||||
|
set(name, value);
|
||||||
|
} else {
|
||||||
|
//ArrayList<String> newValues = new ArrayList<String>();//new String[values.size() + 1];
|
||||||
|
//System.arraycopy(values, 0, newValues, 0, values.size());
|
||||||
|
//newValues[newValues.length - 1] = value;
|
||||||
|
values.add(value);
|
||||||
|
metadata.put(name, values);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy All key-value pairs from properties.
|
||||||
|
*
|
||||||
|
* @param properties
|
||||||
|
* properties to copy from
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public void setAll(Properties properties) {
|
||||||
|
ArrayList<String> propArr = new ArrayList<String>();
|
||||||
|
Enumeration<String> names =
|
||||||
|
(Enumeration<String>) properties.propertyNames();
|
||||||
|
while (names.hasMoreElements()) {
|
||||||
|
String name = names.nextElement();
|
||||||
|
propArr.add(properties.getProperty(name) );
|
||||||
|
metadata.put(name, propArr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set metadata name/value. Associate the specified value to the specified
|
||||||
|
* metadata name. If some previous values were associated to this name, they
|
||||||
|
* are removed.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* the metadata name.
|
||||||
|
* @param value
|
||||||
|
* the metadata value.
|
||||||
|
*/
|
||||||
|
public void set(String name, String value) {
|
||||||
|
ArrayList<String> strArr = this.metadata.get(name);
|
||||||
|
|
||||||
|
if(strArr != null)
|
||||||
|
{
|
||||||
|
metadata.put(name, strArr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
strArr = new ArrayList<String>();
|
||||||
|
strArr.add(value);
|
||||||
|
metadata.put(name,strArr);
|
||||||
|
}
|
||||||
|
++strArrCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the value of the identified metadata property.
|
||||||
|
*
|
||||||
|
* @since Apache Tika 0.7
|
||||||
|
* @param property property definition
|
||||||
|
* @param value property value
|
||||||
|
*/
|
||||||
|
public void set(Property property, String value) {
|
||||||
|
set(property.getName(), value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the integer value of the identified metadata property.
|
||||||
|
*
|
||||||
|
* @since Apache Tika 0.8
|
||||||
|
* @param property simple integer property definition
|
||||||
|
* @param value property value
|
||||||
|
*/
|
||||||
|
public void set(Property property, int value) {
|
||||||
|
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
|
||||||
|
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
|
||||||
|
}
|
||||||
|
if(property.getValueType() != Property.ValueType.INTEGER) {
|
||||||
|
throw new PropertyTypeException(Property.ValueType.INTEGER, property.getValueType());
|
||||||
|
}
|
||||||
|
set(property.getName(), Integer.toString(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the real or rational value of the identified metadata property.
|
||||||
|
*
|
||||||
|
* @since Apache Tika 0.8
|
||||||
|
* @param property simple real or simple rational property definition
|
||||||
|
* @param value property value
|
||||||
|
*/
|
||||||
|
public void set(Property property, double value) {
|
||||||
|
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
|
||||||
|
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
|
||||||
|
}
|
||||||
|
if(property.getValueType() != Property.ValueType.REAL &&
|
||||||
|
property.getValueType() != Property.ValueType.RATIONAL) {
|
||||||
|
throw new PropertyTypeException(Property.ValueType.REAL, property.getValueType());
|
||||||
|
}
|
||||||
|
set(property.getName(), Double.toString(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the date value of the identified metadata property.
|
||||||
|
*
|
||||||
|
* @since Apache Tika 0.8
|
||||||
|
* @param property simple integer property definition
|
||||||
|
* @param date property value
|
||||||
|
*/
|
||||||
|
public void set(Property property, Date date) {
|
||||||
|
if(property.getPropertyType() != Property.PropertyType.SIMPLE) {
|
||||||
|
throw new PropertyTypeException(Property.PropertyType.SIMPLE, property.getPropertyType());
|
||||||
|
}
|
||||||
|
if(property.getValueType() != Property.ValueType.DATE) {
|
||||||
|
throw new PropertyTypeException(Property.ValueType.DATE, property.getValueType());
|
||||||
|
}
|
||||||
|
set(property.getName(), formatDate(date));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove a metadata and all its associated values.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* metadata name to remove
|
||||||
|
*/
|
||||||
|
public void remove(String name) {
|
||||||
|
metadata.remove(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of metadata names in this metadata.
|
||||||
|
*
|
||||||
|
* @return number of metadata names
|
||||||
|
*/
|
||||||
|
public int size() {
|
||||||
|
return metadata.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
|
||||||
|
if (o == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ThunderbirdMetadata other = null;
|
||||||
|
try {
|
||||||
|
other = (ThunderbirdMetadata) o;
|
||||||
|
} catch (ClassCastException cce) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (other.size() != size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
ArrayList<String> names = names();
|
||||||
|
for (String str : names)
|
||||||
|
{//int i = 0; i < names.length; i++) {
|
||||||
|
ArrayList<String> otherValues = other._getValues(str);
|
||||||
|
ArrayList<String> thisValues = _getValues(str);
|
||||||
|
if (otherValues.size() != thisValues.size()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int j = 0; j < otherValues.size(); j++) {
|
||||||
|
if (!otherValues.get(j).equals(thisValues.get(j))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer buf = new StringBuffer();
|
||||||
|
ArrayList<String> names = names();
|
||||||
|
for (int i = 0; i < names.size(); i++) {
|
||||||
|
ArrayList<String> values = _getValues(names.get(i));
|
||||||
|
for (int j = 0; j < values.size(); j++) {
|
||||||
|
buf.append(names.get(i)).append("=").append(values.get(j)).append(" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return buf.toString();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,320 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.sleuthkit.autopsy.thunderbirdparser;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.sax.SafeContentHandler;
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.ContentHandler;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.helpers.AttributesImpl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Content handler decorator that simplifies the task of producing XHTML
|
||||||
|
* events for Tika content parsers.
|
||||||
|
*/
|
||||||
|
public class ThunderbirdXHTMLContentHandler extends SafeContentHandler {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The XHTML namespace URI
|
||||||
|
*/
|
||||||
|
public static final String XHTML = "http://www.w3.org/1999/xhtml";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The newline character that gets inserted after block elements.
|
||||||
|
*/
|
||||||
|
private static final char[] NL = new char[] { '\n' };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The tab character gets inserted before table cells and list items.
|
||||||
|
*/
|
||||||
|
private static final char[] TAB = new char[] { '\t' };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The elements that are in the <head> section.
|
||||||
|
*/
|
||||||
|
private static final Set<String> HEAD =
|
||||||
|
unmodifiableSet("title", "link", "base", "meta");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The elements that are automatically emitted by lazyStartHead, so
|
||||||
|
* skip them if they get sent to startElement/endElement by mistake.
|
||||||
|
*/
|
||||||
|
private static final Set<String> AUTO =
|
||||||
|
unmodifiableSet("html", "head", "body", "frameset");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The elements that get prepended with the {@link #TAB} character.
|
||||||
|
*/
|
||||||
|
private static final Set<String> INDENT =
|
||||||
|
unmodifiableSet("li", "dd", "dt", "td", "th", "frame");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The elements that get appended with the {@link #NL} character.
|
||||||
|
*/
|
||||||
|
public static final Set<String> ENDLINE = unmodifiableSet(
|
||||||
|
"p", "h1", "h2", "h3", "h4", "h5", "h6", "div", "ul", "ol", "dl",
|
||||||
|
"pre", "hr", "blockquote", "address", "fieldset", "table", "form",
|
||||||
|
"noscript", "li", "dt", "dd", "noframes", "br", "tr", "select", "option");
|
||||||
|
|
||||||
|
private static final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
|
||||||
|
|
||||||
|
private static Set<String> unmodifiableSet(String... elements) {
|
||||||
|
return Collections.unmodifiableSet(
|
||||||
|
new HashSet<String>(Arrays.asList(elements)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Metadata associated with the document. Used to fill in the
|
||||||
|
* <head/> section.
|
||||||
|
*/
|
||||||
|
private final ThunderbirdMetadata metadata;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flags to indicate whether the document head element has been started/ended.
|
||||||
|
*/
|
||||||
|
private boolean headStarted = false;
|
||||||
|
private boolean headEnded = false;
|
||||||
|
private boolean useFrameset = false;
|
||||||
|
|
||||||
|
public ThunderbirdXHTMLContentHandler(ContentHandler handler, ThunderbirdMetadata metadata) {
|
||||||
|
super(handler);
|
||||||
|
this.metadata = metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts an XHTML document by setting up the namespace mappings.
|
||||||
|
* The standard XHTML prefix is generated lazily when the first
|
||||||
|
* element is started.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void startDocument() throws SAXException {
|
||||||
|
super.startDocument();
|
||||||
|
startPrefixMapping("", XHTML);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates the following XHTML prefix when called for the first time:
|
||||||
|
* <pre>
|
||||||
|
* <html>
|
||||||
|
* <head>
|
||||||
|
* <title>...</title>
|
||||||
|
* </head>
|
||||||
|
* <body>
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
private void lazyStartHead() throws SAXException {
|
||||||
|
if (!headStarted) {
|
||||||
|
headStarted = true;
|
||||||
|
|
||||||
|
// Call directly, so we don't go through our startElement(), which will
|
||||||
|
// ignore these elements.
|
||||||
|
super.startElement(XHTML, "html", "html", EMPTY_ATTRIBUTES);
|
||||||
|
newline();
|
||||||
|
super.startElement(XHTML, "head", "head", EMPTY_ATTRIBUTES);
|
||||||
|
newline();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates the following XHTML prefix when called for the first time:
|
||||||
|
* <pre>
|
||||||
|
* <html>
|
||||||
|
* <head>
|
||||||
|
* <title>...</title>
|
||||||
|
* </head>
|
||||||
|
* <body> (or <frameset>
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
private void lazyEndHead(boolean isFrameset) throws SAXException {
|
||||||
|
lazyStartHead();
|
||||||
|
|
||||||
|
if (!headEnded) {
|
||||||
|
headEnded = true;
|
||||||
|
useFrameset = isFrameset;
|
||||||
|
|
||||||
|
// TIKA-478: Emit all metadata values (other than title). We have to call
|
||||||
|
// startElement() and characters() directly to avoid recursive problems.
|
||||||
|
for (String name : metadata.names()) {
|
||||||
|
if (name.equals("title")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String value : metadata.getValues(name)) {
|
||||||
|
// Putting null values into attributes causes problems, but is
|
||||||
|
// allowed by Metadata, so guard against that.
|
||||||
|
if (value != null) {
|
||||||
|
AttributesImpl attributes = new AttributesImpl();
|
||||||
|
attributes.addAttribute("", "name", "name", "CDATA", name);
|
||||||
|
attributes.addAttribute("", "content", "content", "CDATA", value);
|
||||||
|
super.startElement(XHTML, "meta", "meta", attributes);
|
||||||
|
super.endElement(XHTML, "meta", "meta");
|
||||||
|
newline();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
super.startElement(XHTML, "title", "title", EMPTY_ATTRIBUTES);
|
||||||
|
String title = metadata.get(Metadata.TITLE);
|
||||||
|
if (title != null && title.length() > 0) {
|
||||||
|
char[] titleChars = title.toCharArray();
|
||||||
|
super.characters(titleChars, 0, titleChars.length);
|
||||||
|
} else {
|
||||||
|
// TIKA-725: Prefer <title></title> over <title/>
|
||||||
|
super.characters(new char[0], 0, 0);
|
||||||
|
}
|
||||||
|
super.endElement(XHTML, "title", "title");
|
||||||
|
newline();
|
||||||
|
|
||||||
|
super.endElement(XHTML, "head", "head");
|
||||||
|
newline();
|
||||||
|
|
||||||
|
if (useFrameset) {
|
||||||
|
super.startElement(XHTML, "frameset", "frameset", EMPTY_ATTRIBUTES);
|
||||||
|
} else {
|
||||||
|
super.startElement(XHTML, "body", "body", EMPTY_ATTRIBUTES);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ends the XHTML document by writing the following footer and
|
||||||
|
* clearing the namespace mappings:
|
||||||
|
* <pre>
|
||||||
|
* </body>
|
||||||
|
* </html>
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void endDocument() throws SAXException {
|
||||||
|
lazyEndHead(useFrameset);
|
||||||
|
|
||||||
|
if (useFrameset) {
|
||||||
|
super.endElement(XHTML, "frameset", "frameset");
|
||||||
|
} else {
|
||||||
|
super.endElement(XHTML, "body", "body");
|
||||||
|
}
|
||||||
|
|
||||||
|
super.endElement(XHTML, "html", "html");
|
||||||
|
|
||||||
|
endPrefixMapping("");
|
||||||
|
super.endDocument();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Starts the given element. Table cells and list items are automatically
|
||||||
|
* indented by emitting a tab character as ignorable whitespace.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void startElement(
|
||||||
|
String uri, String local, String name, Attributes attributes)
|
||||||
|
throws SAXException {
|
||||||
|
|
||||||
|
if (name.equals("frameset")) {
|
||||||
|
lazyEndHead(true);
|
||||||
|
} else if (!AUTO.contains(name)) {
|
||||||
|
if (HEAD.contains(name)) {
|
||||||
|
lazyStartHead();
|
||||||
|
} else {
|
||||||
|
lazyEndHead(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (XHTML.equals(uri) && INDENT.contains(name)) {
|
||||||
|
ignorableWhitespace(TAB, 0, TAB.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
super.startElement(uri, local, name, attributes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ends the given element. Block elements are automatically followed
|
||||||
|
* by a newline character.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void endElement(String uri, String local, String name) throws SAXException {
|
||||||
|
if (!AUTO.contains(name)) {
|
||||||
|
super.endElement(uri, local, name);
|
||||||
|
if (XHTML.equals(uri) && ENDLINE.contains(name)) {
|
||||||
|
newline();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see <a href="https://issues.apache.org/jira/browse/TIKA-210">TIKA-210</a>
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void characters(char[] ch, int start, int length) throws SAXException {
|
||||||
|
lazyEndHead(useFrameset);
|
||||||
|
super.characters(ch, start, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------< public convenience methods >
|
||||||
|
|
||||||
|
public void startElement(String name) throws SAXException {
|
||||||
|
startElement(XHTML, name, name, EMPTY_ATTRIBUTES);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void startElement(String name, String attribute, String value)
|
||||||
|
throws SAXException {
|
||||||
|
AttributesImpl attributes = new AttributesImpl();
|
||||||
|
attributes.addAttribute("", attribute, attribute, "CDATA", value);
|
||||||
|
startElement(XHTML, name, name, attributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void startElement(String name, AttributesImpl attributes)
|
||||||
|
throws SAXException {
|
||||||
|
startElement(XHTML, name, name, attributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void endElement(String name) throws SAXException {
|
||||||
|
endElement(XHTML, name, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void characters(String characters) throws SAXException {
|
||||||
|
if (characters != null && characters.length() > 0) {
|
||||||
|
characters(characters.toCharArray(), 0, characters.length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void newline() throws SAXException {
|
||||||
|
ignorableWhitespace(NL, 0, NL.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Emits an XHTML element with the given text content. If the given
|
||||||
|
* text value is null or empty, then the element is not written.
|
||||||
|
*
|
||||||
|
* @param name XHTML element name
|
||||||
|
* @param value element value, possibly <code>null</code>
|
||||||
|
* @throws SAXException if the content element could not be written
|
||||||
|
*/
|
||||||
|
public void element(String name, String value) throws SAXException {
|
||||||
|
if (value != null && value.length() > 0) {
|
||||||
|
startElement(name);
|
||||||
|
characters(value);
|
||||||
|
endElement(name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,11 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<!DOCTYPE filesystem PUBLIC "-//NetBeans//DTD Filesystem 1.2//EN" "http://www.netbeans.org/dtds/filesystem-1_2.dtd">
|
||||||
|
<filesystem>
|
||||||
|
<folder name="Services">
|
||||||
|
<file name="org-sleuthkit-autopsy-mboxparser-MboxFileIngestService.instance">
|
||||||
|
<attr name="instanceOf" stringvalue="org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile"/>
|
||||||
|
<attr name="instanceCreate" methodvalue="org.sleuthkit.autopsy.mboxparser.MboxFileIngestService.getDefault"/>
|
||||||
|
<attr name="position" intvalue="1100"/>
|
||||||
|
</file>
|
||||||
|
</folder>
|
||||||
|
</filesystem>
|
Loading…
x
Reference in New Issue
Block a user