package org.sleuthkit.autopsy.mboxparser; import java.io.*; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.logging.Level; import java.util.logging.Logger; import org.apache.tika.Tika; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.mime.MimeTypes; import org.apache.tika.mime.MediaType; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.parser.mbox.MboxParser; import org.apache.tika.sax.BodyContentHandler; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; public class MboxEmailParser { private InputStream stream; //Tika object private Tika tika; private Metadata metadata; private ContentHandler contentHandler; private String mimeType; private Parser parser; private ParseContext context; private static ArrayList tikaMimeTypes; static { tikaMimeTypes = new ArrayList(); tikaMimeTypes.add(MimeTypes.OCTET_STREAM); tikaMimeTypes.add(MimeTypes.PLAIN_TEXT); tikaMimeTypes.add(MimeTypes.XML); } public MboxEmailParser() { this.tika = new Tika(); } public MboxEmailParser(InputStream inStream) { this.tika = new Tika(); this.stream = inStream; } public MboxEmailParser(String filepath) { this.tika = new Tika(); this.stream = this.getClass().getResourceAsStream(filepath); } private void init() throws IOException { this.tika.setMaxStringLength(10*1024*1024); this.metadata = new Metadata(); //Set MIME Type this.mimeType = tika.detect(this.stream); this.parser = new MboxParser(); this.context = new ParseContext(); this.contentHandler = new BodyContentHandler(-1); //Seems like setting this causes the metadata not to output all of it. // this.metadata.set(Metadata.CONTENT_TYPE, this.mimeType); } public void parse() throws FileNotFoundException, IOException, SAXException, TikaException { init(); // this.metadata = new Metadata(); //String mimeType = tika.detect(this.stream); parser.parse(this.stream,this.contentHandler, this.metadata, context); } public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException { init(); parser.parse(inStream,this.contentHandler, this.metadata, context); String blbha = "stop"; } public Metadata getMetadata() { return this.metadata; } //Returns message content, i.e. plain text or html public String getContent() { return this.contentHandler.toString(); } public String detectEmailFileFormat(String filepath) throws IOException { return this.tika.detect(filepath); } //Detects the mime type from the first few bytes of the document public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName) { return this.tika.detect(firstFewBytes, inDocName); } public boolean isValidMimeTypeMbox(byte[] buffer) { return (new String(buffer)).startsWith("From "); } //This assumes the file/stream was parsed since we are looking at the metadata public boolean isValidMboxType() { return this.metadata.get(Metadata.CONTENT_TYPE).equals("application/mbox"); } //Get email subject public String getSubject() { return this.metadata.get(Metadata.SUBJECT); } public String getTitle() { return this.metadata.get(Metadata.TITLE); } public Long getDateCreated() { Long epochtime; Long ftime = (long) 0; try { String datetime = this.metadata.get(Metadata.DATE); epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'").parse(datetime).getTime(); ftime = epochtime.longValue(); ftime = ftime / 1000; } catch (ParseException ex) { Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); } return ftime; } public String getApplication() { String client; String userAgent = ""; userAgent = this.metadata.get("MboxParser-user-agent"); if(userAgent.matches("(?i).*Thunderbird.*")) { client = "Thunderbird"; } else{ client = "Unknown"; } return client; } public String getContenType() { return this.metadata.get(Metadata.CONTENT_TYPE); } public String getContenEncoding() { return this.metadata.get(Metadata.CONTENT_ENCODING); } public String getFrom() { return this.metadata.get(Metadata.AUTHOR); } public String getTo() { return this.metadata.get(Metadata.MESSAGE_TO); } public String getCC() { return this.metadata.get(Metadata.MESSAGE_CC); } public String getBCC() { return this.metadata.get(Metadata.MESSAGE_BCC); } public String getRecipientAddress() { return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS); } public String getMboxSupportedMediaType() { return MediaType.application("mbox").getType(); } }