mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-19 02:57:44 +00:00
169 lines
4.6 KiB
Java
169 lines
4.6 KiB
Java
package org.sleuthkit.autopsy.mboxparser;
|
|
|
|
import java.io.*;
|
|
import java.util.ArrayList;
|
|
import org.apache.tika.Tika;
|
|
import org.apache.tika.exception.TikaException;
|
|
import org.apache.tika.io.TikaInputStream;
|
|
import org.apache.tika.metadata.Metadata;
|
|
import org.apache.tika.mime.MimeTypes;
|
|
import org.apache.tika.parser.ParseContext;
|
|
import org.apache.tika.parser.Parser;
|
|
import org.apache.tika.parser.mbox.MboxParser;
|
|
import org.apache.tika.sax.BodyContentHandler;
|
|
import org.xml.sax.ContentHandler;
|
|
import org.xml.sax.SAXException;
|
|
|
|
public class MboxEmailParser {
|
|
|
|
|
|
private InputStream stream;
|
|
//Tika object
|
|
private Tika tika;
|
|
private Metadata metadata;
|
|
private ContentHandler contentHandler;
|
|
private String mimeType;
|
|
private Parser parser;
|
|
private ParseContext context;
|
|
|
|
private static ArrayList<String> tikaMimeTypes;
|
|
|
|
static
|
|
{
|
|
tikaMimeTypes = new ArrayList<String>();
|
|
tikaMimeTypes.add(MimeTypes.OCTET_STREAM);
|
|
tikaMimeTypes.add(MimeTypes.PLAIN_TEXT);
|
|
tikaMimeTypes.add(MimeTypes.XML);
|
|
}
|
|
|
|
public MboxEmailParser()
|
|
{
|
|
this.tika = new Tika();
|
|
}
|
|
|
|
public MboxEmailParser(InputStream inStream)
|
|
{
|
|
this.tika = new Tika();
|
|
this.stream = inStream;
|
|
}
|
|
|
|
public MboxEmailParser(String filepath)
|
|
{
|
|
this.tika = new Tika();
|
|
this.stream = this.getClass().getResourceAsStream(filepath);
|
|
}
|
|
|
|
private void init() throws IOException
|
|
{
|
|
this.metadata = new Metadata();
|
|
//Set MIME Type
|
|
this.mimeType = tika.detect(this.stream);
|
|
this.parser = new MboxParser();
|
|
this.context = new ParseContext();
|
|
this.contentHandler = new BodyContentHandler();
|
|
//Seems like setting this causes the metadata not to output all of it.
|
|
this.metadata.set(Metadata.CONTENT_TYPE, this.mimeType);
|
|
}
|
|
|
|
public void parse() throws FileNotFoundException, IOException, SAXException, TikaException
|
|
{
|
|
init();
|
|
// this.metadata = new Metadata();
|
|
//String mimeType = tika.detect(this.stream);
|
|
parser.parse(this.stream,this.contentHandler, this.metadata, context);
|
|
}
|
|
|
|
public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException
|
|
{
|
|
init();
|
|
parser.parse(inStream,this.contentHandler, this.metadata, context);
|
|
}
|
|
|
|
public Metadata getMetadata()
|
|
{
|
|
return this.metadata;
|
|
}
|
|
|
|
//Returns message content, i.e. plain text or html
|
|
public String getContent()
|
|
{
|
|
return this.contentHandler.toString();
|
|
}
|
|
|
|
public String detectEmailFileFormat(String filepath) throws IOException
|
|
{
|
|
return this.tika.detect(filepath);
|
|
}
|
|
|
|
//Detects the mime type from the first few bytes of the document
|
|
public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName)
|
|
{
|
|
return this.tika.detect(firstFewBytes, inDocName);
|
|
}
|
|
|
|
|
|
public boolean isValidMimeTypeMbox(byte[] buffer)
|
|
{
|
|
String outMimeType = this.tika.detect(buffer);
|
|
|
|
return outMimeType.equals(MimeTypes.OCTET_STREAM) ? true : (outMimeType.equals(MimeTypes.PLAIN_TEXT) ? true : outMimeType.equals(MimeTypes.XML));
|
|
}
|
|
|
|
//This assumes the file/stream was parsed since we are looking at the metadata
|
|
public boolean isValidMboxType()
|
|
{
|
|
return this.metadata.get(Metadata.DATE_CREATED).equals("application/mbox");
|
|
}
|
|
|
|
//Get email subject
|
|
public String getSubject()
|
|
{
|
|
return this.metadata.get(Metadata.SUBJECT);
|
|
}
|
|
|
|
public String getTitle()
|
|
{
|
|
return this.metadata.get(Metadata.TITLE);
|
|
}
|
|
|
|
public String getDateCreated()
|
|
{
|
|
return this.metadata.get(Metadata.DATE_CREATED);
|
|
}
|
|
|
|
public String getContenType()
|
|
{
|
|
return this.metadata.get(Metadata.CONTENT_TYPE);
|
|
}
|
|
|
|
public String getContenEncoding()
|
|
{
|
|
return this.metadata.get(Metadata.CONTENT_ENCODING);
|
|
}
|
|
|
|
public String getFrom()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_FROM);
|
|
}
|
|
|
|
public String getTo()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_TO);
|
|
}
|
|
|
|
public String getCC()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_CC);
|
|
}
|
|
|
|
public String getBCC()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_BCC);
|
|
}
|
|
|
|
public String getRecipientAddress()
|
|
{
|
|
return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS);
|
|
}
|
|
}
|