mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
207 lines
5.6 KiB
Java
207 lines
5.6 KiB
Java
package org.sleuthkit.autopsy.mboxparser;
|
|
|
|
import java.io.*;
|
|
import java.text.ParseException;
|
|
import java.text.SimpleDateFormat;
|
|
import java.util.ArrayList;
|
|
import java.util.logging.Level;
|
|
import java.util.logging.Logger;
|
|
import org.apache.tika.Tika;
|
|
import org.apache.tika.exception.TikaException;
|
|
import org.apache.tika.metadata.Metadata;
|
|
import org.apache.tika.mime.MimeTypes;
|
|
import org.apache.tika.mime.MediaType;
|
|
import org.apache.tika.parser.ParseContext;
|
|
import org.apache.tika.parser.Parser;
|
|
import org.apache.tika.parser.mbox.MboxParser;
|
|
import org.apache.tika.sax.BodyContentHandler;
|
|
import org.xml.sax.ContentHandler;
|
|
import org.xml.sax.SAXException;
|
|
|
|
public class MboxEmailParser {
|
|
|
|
|
|
private InputStream stream;
|
|
//Tika object
|
|
private Tika tika;
|
|
private Metadata metadata;
|
|
private ContentHandler contentHandler;
|
|
private String mimeType;
|
|
private Parser parser;
|
|
private ParseContext context;
|
|
|
|
private static ArrayList<String> tikaMimeTypes;
|
|
|
|
static
|
|
{
|
|
tikaMimeTypes = new ArrayList<String>();
|
|
tikaMimeTypes.add(MimeTypes.OCTET_STREAM);
|
|
tikaMimeTypes.add(MimeTypes.PLAIN_TEXT);
|
|
tikaMimeTypes.add(MimeTypes.XML);
|
|
}
|
|
|
|
public MboxEmailParser()
|
|
{
|
|
this.tika = new Tika();
|
|
}
|
|
|
|
public MboxEmailParser(InputStream inStream)
|
|
{
|
|
this.tika = new Tika();
|
|
this.stream = inStream;
|
|
}
|
|
|
|
public MboxEmailParser(String filepath)
|
|
{
|
|
this.tika = new Tika();
|
|
this.stream = this.getClass().getResourceAsStream(filepath);
|
|
}
|
|
|
|
private void init() throws IOException
|
|
{
|
|
this.tika.setMaxStringLength(10*1024*1024);
|
|
this.metadata = new Metadata();
|
|
//Set MIME Type
|
|
this.mimeType = tika.detect(this.stream);
|
|
this.parser = new MboxParser();
|
|
this.context = new ParseContext();
|
|
|
|
this.contentHandler = new BodyContentHandler(-1);
|
|
//Seems like setting this causes the metadata not to output all of it.
|
|
// this.metadata.set(Metadata.CONTENT_TYPE, this.mimeType);
|
|
}
|
|
|
|
public void parse() throws FileNotFoundException, IOException, SAXException, TikaException
|
|
{
|
|
init();
|
|
// this.metadata = new Metadata();
|
|
//String mimeType = tika.detect(this.stream);
|
|
parser.parse(this.stream,this.contentHandler, this.metadata, context);
|
|
}
|
|
|
|
public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException
|
|
{
|
|
init();
|
|
parser.parse(inStream,this.contentHandler, this.metadata, context);
|
|
String blbha = "stop";
|
|
}
|
|
|
|
public Metadata getMetadata()
|
|
{
|
|
return this.metadata;
|
|
}
|
|
|
|
|
|
//Returns message content, i.e. plain text or html
|
|
public String getContent()
|
|
{
|
|
return this.contentHandler.toString();
|
|
}
|
|
|
|
public String detectEmailFileFormat(String filepath) throws IOException
|
|
{
|
|
return this.tika.detect(filepath);
|
|
}
|
|
|
|
//Detects the mime type from the first few bytes of the document
|
|
public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName)
|
|
{
|
|
return this.tika.detect(firstFewBytes, inDocName);
|
|
}
|
|
|
|
|
|
public boolean isValidMimeTypeMbox(byte[] buffer)
|
|
{
|
|
return (new String(buffer)).startsWith("From ");
|
|
}
|
|
|
|
//This assumes the file/stream was parsed since we are looking at the metadata
|
|
public boolean isValidMboxType()
|
|
{
|
|
return this.metadata.get(Metadata.CONTENT_TYPE).equals("application/mbox");
|
|
}
|
|
|
|
//Get email subject
|
|
public String getSubject()
|
|
{
|
|
return this.metadata.get(Metadata.SUBJECT);
|
|
}
|
|
|
|
public String getTitle()
|
|
{
|
|
return this.metadata.get(Metadata.TITLE);
|
|
}
|
|
|
|
public Long getDateCreated()
|
|
{
|
|
Long epochtime;
|
|
Long ftime = (long) 0;
|
|
|
|
try {
|
|
String datetime = this.metadata.get(Metadata.DATE);
|
|
epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(datetime).getTime();
|
|
ftime = epochtime.longValue();
|
|
ftime = ftime / 1000;
|
|
} catch (ParseException ex) {
|
|
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
|
|
}
|
|
|
|
return ftime;
|
|
}
|
|
|
|
public String getApplication()
|
|
{
|
|
String client;
|
|
String userAgent = "";
|
|
userAgent = this.metadata.get("MboxParser-user-agent");
|
|
if(userAgent.matches("(?i).*Thunderbird.*"))
|
|
{
|
|
client = "Thunderbird";
|
|
}
|
|
else{
|
|
client = "Unknown";
|
|
}
|
|
return client;
|
|
}
|
|
|
|
public String getContenType()
|
|
{
|
|
return this.metadata.get(Metadata.CONTENT_TYPE);
|
|
}
|
|
|
|
public String getContenEncoding()
|
|
{
|
|
return this.metadata.get(Metadata.CONTENT_ENCODING);
|
|
}
|
|
|
|
public String getFrom()
|
|
{
|
|
return this.metadata.get(Metadata.AUTHOR);
|
|
}
|
|
|
|
public String getTo()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_TO);
|
|
}
|
|
|
|
public String getCC()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_CC);
|
|
}
|
|
|
|
public String getBCC()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_BCC);
|
|
}
|
|
|
|
public String getRecipientAddress()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS);
|
|
}
|
|
|
|
public String getMboxSupportedMediaType()
|
|
{
|
|
return MediaType.application("mbox").getType();
|
|
}
|
|
}
|