Tweak to make ingest service work.

This commit is contained in:
Alex Ebadirad 2012-06-08 15:41:41 -07:00
parent 6e03006ea4
commit aa21661299
2 changed files with 132 additions and 123 deletions

View File

@ -1,10 +1,13 @@
package org.sleuthkit.autopsy.mboxparser; package org.sleuthkit.autopsy.mboxparser;
import java.io.*; import java.io.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.tika.Tika; import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException; import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MimeTypes; import org.apache.tika.mime.MimeTypes;
import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.ParseContext;
@ -15,154 +18,139 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
public class MboxEmailParser { public class MboxEmailParser {
private InputStream stream; private InputStream stream;
//Tika object //Tika object
private Tika tika; private Tika tika;
private Metadata metadata; private Metadata metadata;
private ContentHandler contentHandler; private ContentHandler contentHandler;
private String mimeType; private String mimeType;
private Parser parser; private Parser parser;
private ParseContext context; private ParseContext context;
private static ArrayList<String> tikaMimeTypes; private static ArrayList<String> tikaMimeTypes;
static static {
{ tikaMimeTypes = new ArrayList<String>();
tikaMimeTypes = new ArrayList<String>();
tikaMimeTypes.add(MimeTypes.OCTET_STREAM); tikaMimeTypes.add(MimeTypes.OCTET_STREAM);
tikaMimeTypes.add(MimeTypes.PLAIN_TEXT); tikaMimeTypes.add(MimeTypes.PLAIN_TEXT);
tikaMimeTypes.add(MimeTypes.XML); tikaMimeTypes.add(MimeTypes.XML);
} }
public MboxEmailParser() public MboxEmailParser() {
{
this.tika = new Tika(); this.tika = new Tika();
} }
public MboxEmailParser(InputStream inStream) public MboxEmailParser(InputStream inStream) {
{
this.tika = new Tika(); this.tika = new Tika();
this.stream = inStream; this.stream = inStream;
} }
public MboxEmailParser(String filepath) public MboxEmailParser(String filepath) {
{
this.tika = new Tika(); this.tika = new Tika();
this.stream = this.getClass().getResourceAsStream(filepath); this.stream = this.getClass().getResourceAsStream(filepath);
} }
private void init() throws IOException private void init() throws IOException {
{ this.metadata = new Metadata();
this.metadata = new Metadata();
//Set MIME Type //Set MIME Type
this.mimeType = tika.detect(this.stream); this.mimeType = tika.detect(this.stream);
this.parser = new MboxParser(); this.parser = new MboxParser();
this.context = new ParseContext(); this.context = new ParseContext();
this.contentHandler = new BodyContentHandler(); this.contentHandler = new BodyContentHandler();
//Seems like setting this causes the metadata not to output all of it. //Seems like setting this causes the metadata not to output all of it.
this.metadata.set(Metadata.CONTENT_TYPE, this.mimeType); this.metadata.set(Metadata.CONTENT_TYPE, this.mimeType);
} }
public void parse() throws FileNotFoundException, IOException, SAXException, TikaException public void parse() throws FileNotFoundException, IOException, SAXException, TikaException {
{
init(); init();
// this.metadata = new Metadata(); // this.metadata = new Metadata();
//String mimeType = tika.detect(this.stream); //String mimeType = tika.detect(this.stream);
parser.parse(this.stream,this.contentHandler, this.metadata, context); parser.parse(this.stream, this.contentHandler, this.metadata, context);
} }
public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException public void parse(InputStream inStream) throws FileNotFoundException, IOException, SAXException, TikaException {
{ init();
init(); parser.parse(inStream, this.contentHandler, this.metadata, context);
parser.parse(inStream,this.contentHandler, this.metadata, context);
} }
public Metadata getMetadata() public Metadata getMetadata() {
{
return this.metadata; return this.metadata;
} }
//Returns message content, i.e. plain text or html //Returns message content, i.e. plain text or html
public String getContent() public String getContent() {
{
return this.contentHandler.toString(); return this.contentHandler.toString();
} }
public String detectEmailFileFormat(String filepath) throws IOException public String detectEmailFileFormat(String filepath) throws IOException {
{
return this.tika.detect(filepath); return this.tika.detect(filepath);
} }
//Detects the mime type from the first few bytes of the document //Detects the mime type from the first few bytes of the document
public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName) public String detectMediaTypeFromBytes(byte[] firstFewBytes, String inDocName) {
{
return this.tika.detect(firstFewBytes, inDocName); return this.tika.detect(firstFewBytes, inDocName);
} }
public boolean isValidMimeTypeMbox(byte[] buffer) {
public boolean isValidMimeTypeMbox(byte[] buffer)
{
String outMimeType = this.tika.detect(buffer); String outMimeType = this.tika.detect(buffer);
return outMimeType.equals(MimeTypes.OCTET_STREAM) ? true : (outMimeType.equals(MimeTypes.PLAIN_TEXT) ? true : outMimeType.equals(MimeTypes.XML)); return outMimeType.equals(MimeTypes.OCTET_STREAM) ? true : (outMimeType.equals(MimeTypes.PLAIN_TEXT) ? true : outMimeType.equals(MimeTypes.XML));
} }
//This assumes the file/stream was parsed since we are looking at the metadata //This assumes the file/stream was parsed since we are looking at the metadata
public boolean isValidMboxType() public boolean isValidMboxType() {
{
return this.metadata.get(Metadata.DATE_CREATED).equals("application/mbox"); return this.metadata.get(Metadata.DATE_CREATED).equals("application/mbox");
} }
//Get email subject //Get email subject
public String getSubject() public String getSubject() {
{
return this.metadata.get(Metadata.SUBJECT); return this.metadata.get(Metadata.SUBJECT);
} }
public String getTitle() public String getTitle() {
{
return this.metadata.get(Metadata.TITLE); return this.metadata.get(Metadata.TITLE);
} }
public String getDateCreated() public Long getDateCreated() {
{ Long epochtime;
return this.metadata.get(Metadata.DATE_CREATED); Long ftime = (long) 0;
try {
epochtime = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'").parse(this.metadata.get(Metadata.DATE_CREATED)).getTime();
ftime = epochtime.longValue();
ftime = ftime / 1000;
} catch (ParseException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
}
return ftime;
} }
public String getContenType() public String getContenType() {
{
return this.metadata.get(Metadata.CONTENT_TYPE); return this.metadata.get(Metadata.CONTENT_TYPE);
} }
public String getContenEncoding() public String getContenEncoding() {
{
return this.metadata.get(Metadata.CONTENT_ENCODING); return this.metadata.get(Metadata.CONTENT_ENCODING);
} }
public String getFrom() public String getFrom() {
{
return this.metadata.get(Metadata.MESSAGE_FROM); return this.metadata.get(Metadata.MESSAGE_FROM);
} }
public String getTo() public String getTo() {
{
return this.metadata.get(Metadata.MESSAGE_TO); return this.metadata.get(Metadata.MESSAGE_TO);
} }
public String getCC() public String getCC() {
{
return this.metadata.get(Metadata.MESSAGE_CC); return this.metadata.get(Metadata.MESSAGE_CC);
} }
public String getBCC() public String getBCC() {
{
return this.metadata.get(Metadata.MESSAGE_BCC); return this.metadata.get(Metadata.MESSAGE_BCC);
} }
public String getRecipientAddress() public String getRecipientAddress() {
{
return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS); return this.metadata.get(Metadata.MESSAGE_RECIPIENT_ADDRESS);
} }
} }

View File

@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.mboxparser;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.Collection;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import org.apache.tika.exception.TikaException; import org.apache.tika.exception.TikaException;
@ -29,6 +30,9 @@ import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstract.*; import org.sleuthkit.autopsy.ingest.IngestServiceAbstract.*;
import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile; import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.FsContent;
import org.sleuthkit.datamodel.ReadContentInputStream; import org.sleuthkit.datamodel.ReadContentInputStream;
import org.sleuthkit.datamodel.TskException; import org.sleuthkit.datamodel.TskException;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
@ -49,7 +53,7 @@ public class MboxFileIngestService implements IngestServiceAbstractFile {
@Override @Override
public ProcessResult process(AbstractFile fsContent) { public ProcessResult process(AbstractFile fsContent) {
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Processing " + fsContent.getName()));
MboxEmailParser mbox = new MboxEmailParser(); MboxEmailParser mbox = new MboxEmailParser();
boolean isMbox = false; boolean isMbox = false;
@ -63,11 +67,19 @@ public class MboxFileIngestService implements IngestServiceAbstractFile {
if (isMbox) { if (isMbox) {
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Processing " + fsContent.getName()));
try { try {
ReadContentInputStream contentStream = new ReadContentInputStream(fsContent); ReadContentInputStream contentStream = new ReadContentInputStream(fsContent);
mbox.parse(contentStream); mbox.parse(contentStream);
String content = mbox.getContent(); String content = mbox.getContent();
String blah = new String(); String subject = mbox.getSubject();
String from = mbox.getFrom();
String to = mbox.getTo();
String cc = mbox.getCC();
String bcc = mbox.getBCC();
String ctype = mbox.getContenType();
Long datetime = mbox.getDateCreated();
} catch (FileNotFoundException ex) { } catch (FileNotFoundException ex) {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) { } catch (IOException ex) {
@ -78,11 +90,22 @@ public class MboxFileIngestService implements IngestServiceAbstractFile {
Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(MboxFileIngestService.class.getName()).log(Level.SEVERE, null, ex);
} }
} }
return ProcessResult.OK;
}
return ProcessResult.OK ; public void addArtifact(BlackboardArtifact.ARTIFACT_TYPE type, FsContent content, Collection<BlackboardAttribute> bbattributes) {
}
@Override try {
public void complete() { BlackboardArtifact bbart = content.newArtifact(type);
bbart.addAttributes(bbattributes);
} catch (TskException ex) {
logger.log(Level.WARNING, "Error while trying to add an artifact: " + ex);
}
}
@Override
public void complete() {
logger.log(Level.INFO, "complete()"); logger.log(Level.INFO, "complete()");
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "COMPLETE")); managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "COMPLETE"));
@ -90,68 +113,66 @@ public class MboxFileIngestService implements IngestServiceAbstractFile {
} }
@Override @Override
public String getName() { public String getName() {
return "Mbox Parser"; return "Mbox Parser";
} }
@Override @Override
public String getDescription() { public String getDescription() {
return "This class parses through a file to determine if it is an mbox file and if so, populates an email artifact for it in the blackboard."; return "This class parses through a file to determine if it is an mbox file and if so, populates an email artifact for it in the blackboard.";
} }
@Override @Override
public void init(IngestManagerProxy managerProxy) { public void init(IngestManagerProxy managerProxy) {
logger.log(Level.INFO, "init()"); logger.log(Level.INFO, "init()");
this.managerProxy = managerProxy; this.managerProxy = managerProxy;
this.managerProxy.postMessage(IngestMessage.createMessage(++messageId, IngestMessage.MessageType.INFO, this, "Started"));
//service specific initialization here //service specific initialization here
} }
@Override @Override
public void stop() { public void stop() {
logger.log(Level.INFO, "stop()"); logger.log(Level.INFO, "stop()");
//service specific cleanup due interruption here //service specific cleanup due interruption here
} }
@Override @Override
public ServiceType getType() { public ServiceType getType() {
return ServiceType.Image; return ServiceType.AbstractFile;
} }
@Override @Override
public boolean hasSimpleConfiguration() { public boolean hasSimpleConfiguration() {
return false;
}
@Override
public boolean hasAdvancedConfiguration() {
return false; return false;
} }
@Override @Override
public javax.swing.JPanel getSimpleConfiguration() { public boolean hasAdvancedConfiguration() {
return null;
}
@Override
public javax.swing.JPanel getAdvancedConfiguration() {
return null;
}
@Override
public boolean hasBackgroundJobsRunning() {
return false; return false;
} }
@Override @Override
public void saveAdvancedConfiguration() { public javax.swing.JPanel getSimpleConfiguration() {
return null;
} }
@Override @Override
public void saveSimpleConfiguration() { public javax.swing.JPanel getAdvancedConfiguration() {
return null;
}
@Override
public boolean hasBackgroundJobsRunning() {
return false;
}
@Override
public void saveAdvancedConfiguration() {
}
@Override
public void saveSimpleConfiguration() {
} }
} }