diff --git a/KeywordSearch/ivy.xml b/KeywordSearch/ivy.xml index 3d0a84af01..fe66fa16ed 100644 --- a/KeywordSearch/ivy.xml +++ b/KeywordSearch/ivy.xml @@ -16,6 +16,7 @@ + diff --git a/KeywordSearch/nbproject/project.xml b/KeywordSearch/nbproject/project.xml index f32685fca2..2d4f8391e4 100644 --- a/KeywordSearch/nbproject/project.xml +++ b/KeywordSearch/nbproject/project.xml @@ -160,6 +160,10 @@ ext/commons-httpclient-3.1.jar release/modules/ext/commons-httpclient-3.1.jar + + ext/tika-core-0.10.jar + release/modules/ext/tika-core-0.10.jar + ext/commons-codec-1.5.jar release/modules/ext/commons-codec-1.5.jar @@ -168,6 +172,10 @@ ext/commons-lang-2.4.jar release/modules/ext/commons-lang-2.4.jar + + ext/tika-parsers-0.10.jar + release/modules/ext/tika-parsers-0.10.jar + ext/jcl-over-slf4j-1.6.1.jar release/modules/ext/jcl-over-slf4j-1.6.1.jar diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileChunk.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileChunk.java new file mode 100644 index 0000000000..a52be7e39d --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileChunk.java @@ -0,0 +1,66 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2012 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.sleuthkit.autopsy.keywordsearch; + +import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException; + +/** + * Represents each string chunk to be indexed, a derivative of AbstractFileExtract file + */ +class AbstractFileChunk { + private int chunkID; + private AbstractFileExtract parent; + + AbstractFileChunk(AbstractFileExtract parent, int chunkID) { + this.parent = parent; + this.chunkID = chunkID; + } + + public AbstractFileExtract getParent() { + return parent; + } + + public int getChunkId() { + return chunkID; + } + + /** + * return String representation of the absolute id (parent and child) + * + * @return + */ + public String getIdString() { + return Server.getChunkIdString(this.parent.getSourceFile().getId(), this.chunkID); + } + + public boolean index(Ingester ingester, byte[] content, long contentSize, ByteContentStream.Encoding encoding) throws IngesterException { + boolean success = true; + ByteContentStream bcs = new ByteContentStream(content, contentSize, parent.getSourceFile(), encoding); + try { + ingester.ingest(this, bcs); + //logger.log(Level.INFO, "Ingesting string chunk: " + this.getName() + ": " + chunkID); + } catch (Exception ingEx) { + success = false; + throw new IngesterException("Problem ingesting file string chunk: " + parent.getSourceFile().getId() + ", chunk: " + chunkID, ingEx); + } + return success; + } + +} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java new file mode 100644 index 0000000000..e1501a8d34 --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java @@ -0,0 +1,48 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2012 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.sleuthkit.autopsy.keywordsearch; + +import org.sleuthkit.datamodel.AbstractFile; + +/** + * Common methods for utilities that extract text and content and divide into + * chunks + */ +interface AbstractFileExtract { + + /** + * Get number of chunks resulted from extracting this AbstractFile + * @return the number of chunks produced + */ + int getNumChunks(); + + /** + * Get the source file associated with this extraction + * @return the source AbstractFile + */ + AbstractFile getSourceFile(); + + /** + * Index the Abstract File + * @return true if indexed successfully, false otherwise + * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException + */ + boolean index() throws Ingester.IngesterException; +} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java index 5f8e1f7cd5..d67543ecb0 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java @@ -24,8 +24,7 @@ import java.io.InputStreamReader; import java.io.Reader; import java.util.logging.Logger; import org.apache.solr.common.util.ContentStream; -import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream; -import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream.Encoding; +import org.sleuthkit.autopsy.keywordsearch.ByteContentStream.Encoding; import org.sleuthkit.datamodel.AbstractContent; import org.sleuthkit.datamodel.AbstractFile; @@ -43,7 +42,7 @@ public class AbstractFileStringContentStream implements ContentStream { private AbstractFileStringStream stream; private static Logger logger = Logger.getLogger(AbstractFileStringContentStream.class.getName()); - public AbstractFileStringContentStream(AbstractFile content, Encoding encoding) { + public AbstractFileStringContentStream(AbstractFile content, ByteContentStream.Encoding encoding) { this.content = content; this.encoding = encoding; this.stream = new AbstractFileStringStream(content, encoding); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java new file mode 100644 index 0000000000..72b30e49d7 --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java @@ -0,0 +1,128 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2011 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.keywordsearch; + +import java.io.IOException; +import java.io.InputStream; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException; +import org.sleuthkit.datamodel.AbstractFile; + + + + + +/** + * Takes an AbstractFile, extract strings, converts into chunks (associated with the original + * source file) up to 1MB then and indexes chunks as text with Solr + */ +class AbstractFileStringExtract implements AbstractFileExtract { + + private KeywordSearchIngestService service; + private Ingester ingester; + private int numChunks; + private static final Logger logger = Logger.getLogger(AbstractFileStringExtract.class.getName()); + static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L; + private AbstractFile aFile; + //single static buffer for all extractions. Safe, indexing can only happen in one thread + private static final byte[] STRING_CHUNK_BUF = new byte[(int) MAX_STRING_CHUNK_SIZE]; + private static final int BOM_LEN = 3; + + static { + //prepend UTF-8 BOM to start of the buffer + STRING_CHUNK_BUF[0] = (byte) 0xEF; + STRING_CHUNK_BUF[1] = (byte) 0xBB; + STRING_CHUNK_BUF[2] = (byte) 0xBF; + } + + public AbstractFileStringExtract(AbstractFile aFile) { + this.aFile = aFile; + numChunks = 0; //unknown until indexing is done + this.service = KeywordSearchIngestService.getDefault(); + Server solrServer = KeywordSearch.getServer(); + ingester = solrServer.getIngester(); + } + + @Override + public int getNumChunks() { + return this.numChunks; + } + + @Override + public AbstractFile getSourceFile() { + return aFile; + } + + @Override + public boolean index() throws IngesterException { + boolean success = false; + + //construct stream that extracts text as we read it + final InputStream stringStream = new AbstractFileStringStream(aFile, ByteContentStream.Encoding.UTF8); + + try { + success = true; + //break input stream into chunks + + long readSize = 0; + while ((readSize = stringStream.read(STRING_CHUNK_BUF, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) { + //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks+1)); + //debug.write(STRING_CHUNK_BUF, 0, (int)readSize); + + AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1); + + try { + chunk.index(ingester, STRING_CHUNK_BUF, readSize + BOM_LEN, ByteContentStream.Encoding.UTF8); + ++this.numChunks; + } catch (IngesterException ingEx) { + success = false; + logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ingEx); + throw ingEx; //need to rethrow/return to signal error and move on + } + + //check if need invoke commit/search between chunks + //not to delay commit if timer has gone off + service.checkRunCommitSearch(); + + //debug.close(); + } + + + //after all chunks, ingest the parent file without content itself, and store numChunks + ingester.ingest(this); + + } catch (IOException ex) { + logger.log(Level.WARNING, "Unable to read input stream to divide and send to Solr, file: " + aFile.getName(), ex); + success = false; + } finally { + try { + stringStream.close(); + } catch (IOException ex) { + logger.log(Level.WARNING, "Error closing input stream stream, file: " + aFile.getName(), ex); + } + } + + + return success; + } + + +} + diff --git a/DataModel/src/org/sleuthkit/autopsy/datamodel/AbstractFileStringStream.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringStream.java similarity index 96% rename from DataModel/src/org/sleuthkit/autopsy/datamodel/AbstractFileStringStream.java rename to KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringStream.java index ec0a5330cc..a12ee984e5 100644 --- a/DataModel/src/org/sleuthkit/autopsy/datamodel/AbstractFileStringStream.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringStream.java @@ -16,13 +16,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.sleuthkit.autopsy.datamodel; +package org.sleuthkit.autopsy.keywordsearch; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.util.logging.Level; import java.util.logging.Logger; +import org.sleuthkit.autopsy.datamodel.DataConversion; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.TskException; @@ -33,16 +34,6 @@ import org.sleuthkit.datamodel.TskException; */ public class AbstractFileStringStream extends InputStream { - public static enum Encoding { - - UTF8 { - - @Override - public String toString() { - return "UTF-8"; - } - }, - }; //args private AbstractFile content; @@ -73,7 +64,7 @@ public class AbstractFileStringStream extends InputStream { * @param encoding target encoding, currently UTF-8 * @param preserveOnBuffBoundary whether to preserve or split string on a buffer boundary. If false, will pack into read buffer up to max. possible, potentially splitting a string. If false, the string will be preserved for next read. */ - public AbstractFileStringStream(AbstractFile content, Encoding encoding, boolean preserveOnBuffBoundary) { + public AbstractFileStringStream(AbstractFile content, ByteContentStream.Encoding encoding, boolean preserveOnBuffBoundary) { this.content = content; this.encoding = encoding.toString(); //this.preserveOnBuffBoundary = preserveOnBuffBoundary; @@ -87,7 +78,7 @@ public class AbstractFileStringStream extends InputStream { * @param content to extract strings from * @param encoding target encoding, currently UTF-8 */ - public AbstractFileStringStream(AbstractFile content, Encoding encoding) { + public AbstractFileStringStream(AbstractFile content, ByteContentStream.Encoding encoding) { this(content, encoding, false); } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java new file mode 100644 index 0000000000..df3ba7cb1a --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java @@ -0,0 +1,149 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2012 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.keywordsearch; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.io.Reader; +import java.nio.charset.Charset; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile; +import org.sleuthkit.datamodel.AbstractFile; +import org.sleuthkit.datamodel.ReadContentInputStream; +import org.apache.tika.Tika; +import org.sleuthkit.autopsy.keywordsearch.ByteContentStream.Encoding; + +/** + * Extractor of text from TIKA supported AbstractFile content. Extracted text is + * divided into chunks and indexed with Solr. + * + * This is especially useful for large content of supported type that is to be + * divided into text chunks and indexed as such. + * + */ +public class AbstractFileTikaTextExtract implements AbstractFileExtract { + + private static final Logger logger = Logger.getLogger(IngestServiceAbstractFile.class.getName()); + private static final Encoding ENCODING = Encoding.UTF8; + static final Charset charset = Charset.forName(ENCODING.toString()); + static final int MAX_EXTR_TEXT_CHUNK_SIZE = 1 * 1024 * 1024; + private static final char[] TEXT_CHUNK_BUF = new char[MAX_EXTR_TEXT_CHUNK_SIZE]; + private static final Tika tika = new Tika(); + private KeywordSearchIngestService service; + private Ingester ingester; + private AbstractFile sourceFile; + private int numChunks = 0; + private static final String UTF16BOM = "\uFEFF"; + + AbstractFileTikaTextExtract(AbstractFile sourceFile) { + this.sourceFile = sourceFile; + this.service = KeywordSearchIngestService.getDefault(); + Server solrServer = KeywordSearch.getServer(); + ingester = solrServer.getIngester(); + } + + @Override + public int getNumChunks() { + return numChunks; + } + + @Override + public AbstractFile getSourceFile() { + return sourceFile; + } + + @Override + public boolean index() throws Ingester.IngesterException { + boolean success = false; + Reader reader = null; + try { + success = true; + reader = tika.parse(new ReadContentInputStream(sourceFile)); + long readSize; + while ((readSize = reader.read(TEXT_CHUNK_BUF, 0, MAX_EXTR_TEXT_CHUNK_SIZE)) != -1) { + + //encode to bytes to index as byte stream + String extracted; + if (readSize < MAX_EXTR_TEXT_CHUNK_SIZE) { + //trim the 0 bytes + StringBuilder sb = new StringBuilder((int) readSize + 5); + //inject BOM here (saves byte buffer realloc), will be converted to specific encoding BOM + sb.append(UTF16BOM); + sb.append(TEXT_CHUNK_BUF, 0, (int) readSize); + extracted = sb.toString(); + + } else { + StringBuilder sb = new StringBuilder((int) readSize + 5); + //inject BOM here (saves byte buffer realloc), will be converted to specific encoding BOM + sb.append(UTF16BOM); + sb.append(TEXT_CHUNK_BUF); + extracted = sb.toString(); + } + //converts BOM automatically to charSet encoding + byte[] encodedBytes = extracted.getBytes(charset); + + + //PrintStream s = new PrintStream("c:\\temp\\ps.txt"); + //for (byte b : encodedBytes) { + // s.format("%02x ", b); + //} + //s.close(); + + //debug + //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks + 1)); + //debug.write(encodedBytes, 0, encodedBytes.length); + //debug.close(); + + AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1); + + try { + chunk.index(ingester, encodedBytes, encodedBytes.length, ENCODING); + ++this.numChunks; + } catch (Ingester.IngesterException ingEx) { + success = false; + logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '" + + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx); + throw ingEx; //need to rethrow/return to signal error and move on + } + + //check if need invoke commit/search between chunks + //not to delay commit if timer has gone off + service.checkRunCommitSearch(); + + } + + } catch (IOException ex) { + logger.log(Level.WARNING, "Unable to read content stream from " + sourceFile.getId(), ex); + } finally { + try { + reader.close(); + } catch (IOException ex) { + logger.log(Level.WARNING, "Unable to close content stream from " + sourceFile.getId(), ex); + } + } + + //after all chunks, ingest the parent file without content itself, and store numChunks + ingester.ingest(this); + + return success; + + } +} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java index e0d23992cb..994ced04db 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java @@ -25,15 +25,32 @@ import java.io.InputStreamReader; import java.io.Reader; import java.util.logging.Logger; import org.apache.solr.common.util.ContentStream; -import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream.Encoding; import org.sleuthkit.datamodel.AbstractContent; -import org.sleuthkit.datamodel.FsContent; /** * Stream of bytes representing string with specified encoding * to feed into Solr as ContentStream */ public class ByteContentStream implements ContentStream { + + public static enum Encoding { + + UTF8 { + + @Override + public String toString() { + return "UTF-8"; + } + }, + UTF16 { + + @Override + public String toString() { + return "UTF-16"; + } + }, + }; + //input private byte[] content; //extracted subcontent private long contentSize; diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileExtract.java deleted file mode 100644 index ecb86aa97c..0000000000 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileExtract.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2011 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -package org.sleuthkit.autopsy.keywordsearch; - -import java.io.IOException; -import java.util.logging.Level; -import java.util.logging.Logger; -import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream; -import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException; -import org.sleuthkit.datamodel.AbstractFile; - - -/** - * Utility to extract strings and index a file with string content as chunks - * associated with the original parent file - */ -class FileExtract { - - KeywordSearchIngestService service; - private int numChunks; - private static final Logger logger = Logger.getLogger(FileExtract.class.getName()); - static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L; - private AbstractFile sourceFile; - - //single static buffer for all extractions. Safe, indexing can only happen in one thread - private static final byte[] STRING_CHUNK_BUF = new byte[(int) MAX_STRING_CHUNK_SIZE]; - private static final int BOM_LEN = 3; - static { - //prepend UTF-8 BOM to start of the buffer - STRING_CHUNK_BUF[0] = (byte)0xEF; - STRING_CHUNK_BUF[1] = (byte)0xBB; - STRING_CHUNK_BUF[2] = (byte)0xBF; - } - - public FileExtract(KeywordSearchIngestService service, AbstractFile sourceFile) { - this.service = service; - this.sourceFile = sourceFile; - numChunks = 0; //unknown until indexing is done - } - - public int getNumChunks() { - return this.numChunks; - } - - public AbstractFile getSourceFile() { - return sourceFile; - } - - - public boolean index(Ingester ingester) throws IngesterException { - boolean success = false; - - AbstractFileStringStream stringStream = null; - try { - success = true; - //break string into chunks - //Note: could use DataConversion.toString() since we are operating on fixed chunks - //but FsContentStringStream handles string boundary case better - stringStream = new AbstractFileStringStream(sourceFile, AbstractFileStringStream.Encoding.UTF8); - long readSize = 0; - - while ((readSize = stringStream.read(STRING_CHUNK_BUF, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) { - //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks+1)); - //debug.write(STRING_CHUNK_BUF, 0, (int)readSize); - - FileExtractedChild chunk = new FileExtractedChild(this, this.numChunks + 1); - - try { - chunk.index(ingester, STRING_CHUNK_BUF, readSize + BOM_LEN); - ++this.numChunks; - } catch (IngesterException ingEx) { - success = false; - logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '" + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx); - throw ingEx; //need to rethrow/return to signal error and move on - } - - //check if need invoke commit/search between chunks - //not to delay commit if timer has gone off - service.checkRunCommitSearch(); - - //debug.close(); - } - - - //after all chunks, ingest the parent file without content itself, and store numChunks - ingester.ingest(this); - - } catch (IOException ex) { - logger.log(Level.WARNING, "Unable to read string stream and send to Solr, file: " + sourceFile.getName(), ex); - success = false; - } finally { - if (stringStream != null) { - try { - stringStream.close(); - } catch (IOException ex) { - logger.log(Level.WARNING, "Error closing string stream, file: " + sourceFile.getName(), ex); - } - } - } - - - return success; - } -} -/** - * Represents each string chunk to be indexed, a child of FileExtracted file - */ -class FileExtractedChild { - - private int chunkID; - private FileExtract parent; - - FileExtractedChild(FileExtract parent, int chunkID) { - this.parent = parent; - this.chunkID = chunkID; - } - - public FileExtract getParentFile() { - return parent; - } - - public int getChunkId() { - return chunkID; - } - - /** - * return String representation of the absolute id (parent and child) - * @return - */ - public String getIdString() { - return getFileExtractChildId(this.parent.getSourceFile().getId(), this.chunkID); - } - - - public boolean index(Ingester ingester, byte[] content, long contentSize) throws IngesterException { - boolean success = true; - ByteContentStream bcs = new ByteContentStream(content, contentSize, parent.getSourceFile(), AbstractFileStringStream.Encoding.UTF8); - try { - ingester.ingest(this, bcs); - //logger.log(Level.INFO, "Ingesting string chunk: " + this.getName() + ": " + chunkID); - - } catch (Exception ingEx) { - success = false; - throw new IngesterException("Problem ingesting file string chunk: " + parent.getSourceFile().getId() + ", chunk: " + chunkID, ingEx); - } - return success; - } - - public static String getFileExtractChildId(long parentID, int childID) { - return Long.toString(parentID) + Server.ID_CHUNK_SEP + Integer.toString(childID); - } -} diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index d95a8f6649..2492288779 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -73,6 +73,17 @@ public class Ingester { "pst", "xml", "class", "dwg", "eml", "emlx", "mbox", "mht"}; + private static Ingester instance; + + private Ingester() { + + } + + public static synchronized Ingester getDefault() { + if (instance == null) + instance = new Ingester(); + return instance; + } @Override @SuppressWarnings("FinalizeDeclaration") @@ -99,16 +110,16 @@ public class Ingester { } /** - * Sends a FileExtract to Solr to have its content extracted and added to the + * Sends a AbstractFileExtract to Solr to have its content extracted and added to the * index. commit() should be called once you're done ingesting files. * FileExtract represents a parent of extracted file with actual content. - * The parent itself has no content, only meta data and is used to associate the extracted FileExtractedChild + * The parent itself has no content, only meta data and is used to associate the extracted AbstractFileChunk * - * @param fe FileExtract to ingest + * @param fe AbstractFileExtract to ingest * @throws IngesterException if there was an error processing a specific * file, but the Solr server is probably fine. */ - void ingest(FileExtract fe) throws IngesterException { + void ingest(AbstractFileExtract fe) throws IngesterException { Map params = getContentFields(fe.getSourceFile()); params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks())); @@ -117,23 +128,23 @@ public class Ingester { } /** - * Sends a FileExtractedChild to Solr and its extracted content stream to be added to the + * Sends a AbstractFileChunk to Solr and its extracted content stream to be added to the * index. commit() should be called once you're done ingesting files. - * FileExtractedChild represents a file chunk and its chunk content. + * AbstractFileChunk represents a file chunk and its chunk content. * - * @param fec FileExtractedChild to ingest + * @param fec AbstractFileChunk to ingest * @throws IngesterException if there was an error processing a specific * file, but the Solr server is probably fine. */ - void ingest(FileExtractedChild fec, ByteContentStream bcs) throws IngesterException { + void ingest(AbstractFileChunk fec, ByteContentStream bcs) throws IngesterException { AbstractContent sourceContent = bcs.getSourceContent(); Map params = getContentFields(sourceContent); //overwrite id with the chunk id params.put(Server.Schema.ID.toString(), - FileExtractedChild.getFileExtractChildId(sourceContent.getId(), fec.getChunkId())); + Server.getChunkIdString(sourceContent.getId(), fec.getChunkId())); - ingest(bcs, params, FileExtract.MAX_STRING_CHUNK_SIZE); + ingest(bcs, params, AbstractFileStringExtract.MAX_STRING_CHUNK_SIZE); } /** @@ -448,8 +459,9 @@ public class Ingester { */ static boolean isIngestible(AbstractFile aFile) { TSK_DB_FILES_TYPE_ENUM aType = aFile.getType(); - if (! aType.equals(TSK_DB_FILES_TYPE_ENUM.FS) ) + if (! aType.equals(TSK_DB_FILES_TYPE_ENUM.FS) ) { return false; + } FsContent fsContent = (FsContent) aFile; diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java index d3ff1b367b..5ad5c234bf 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java @@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; +import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -40,7 +41,6 @@ import org.netbeans.api.progress.ProgressHandleFactory; import org.openide.util.Cancellable; import org.openide.util.Exceptions; import org.sleuthkit.autopsy.casemodule.Case; -import org.sleuthkit.autopsy.ingest.IngestManager; import org.sleuthkit.autopsy.ingest.IngestManagerProxy; import org.sleuthkit.autopsy.ingest.IngestMessage; import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType; @@ -95,7 +95,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi private final String hashDBServiceName = "Hash Lookup"; //NOTE this needs to match the HashDB service getName() private SleuthkitCase caseHandle = null; private boolean skipKnown = true; - boolean initialized = false; + private boolean initialized = false; private enum IngestStatus { @@ -200,6 +200,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi managerProxy.postMessage(IngestMessage.createMessage(++messageID, MessageType.INFO, this, "Completed")); } + //postSummary(); } @@ -224,6 +225,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi runSearcher = false; finalSearcherDone = true; + //commit uncommited files, don't search again commit(); @@ -498,16 +500,27 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi private final Logger logger = Logger.getLogger(Indexer.class.getName()); - private boolean extractAndIngest(AbstractFile aFile) { - boolean indexed = false; - final FileExtract fe = new FileExtract(KeywordSearchIngestService.this, aFile); - try { - indexed = fe.index(ingester); - } catch (IngesterException ex) { - logger.log(Level.WARNING, "Error extracting strings and indexing file: " + aFile.getName(), ex); - indexed = false; + /** + * Extract strings or text with Tika (by streaming) from the file Divide + * the file into chunks and index the chunks + * + * @param aFile file to extract strings from, divide into chunks and + * index + * @param stringsOnly true if use stinrg extraction, false if use Tika + * text extractor + * @return true if the file was indexed, false otherwise + */ + private boolean extractIndex(AbstractFile aFile, boolean stringsOnly) throws IngesterException { + AbstractFileExtract fileExtract; + + if (stringsOnly) { + fileExtract = new AbstractFileStringExtract(aFile); + } else { + fileExtract = new AbstractFileTikaTextExtract(aFile); } - return indexed; + + //divide into chunks and index + return fileExtract.index(); } private void indexFile(AbstractFile aFile, boolean indexContent) { @@ -537,9 +550,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi boolean ingestibleFile = Ingester.isIngestible(aFile); final long size = aFile.getSize(); - //if fs file, limit size of entire file, do not limit strings - if (fsContent != null && (size == 0 || (ingestibleFile && size > MAX_INDEX_SIZE))) { - //if fs file, index meta only, otherwise if unalloc, skip + //if fs file with no content (size is 0), index meta-data only + if (fsContent != null && size == 0) { try { ingester.ingest(fsContent, false); //meta-data only ingestStatus.put(aFile.getId(), IngestStatus.INGESTED_META); @@ -548,15 +560,21 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi logger.log(Level.WARNING, "Unable to index meta-data for fsContent: " + fsContent.getId(), ex); } - return; - } - - if (fsContent != null && ingestibleFile == true) { - //we know it's an allocated fs file (FsContent) with supported content + } else if (fsContent != null && ingestibleFile == true) { + //we know it's an allocated fs file (FsContent) with supported content + //extract text with Tika, divide into chunks and index with Solr try { //logger.log(Level.INFO, "indexing: " + fsContent.getName()); - ingester.ingest(fsContent, true); - ingestStatus.put(fsContent.getId(), IngestStatus.INGESTED); + //ingester.ingest(fsContent, true); + if (!extractIndex(aFile, false)) { + logger.log(Level.WARNING, "Failed to extract Tika text and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ")."); + ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED); + + } else { + ingestStatus.put(aFile.getId(), IngestStatus.INGESTED); + + } + } catch (IngesterException e) { ingestStatus.put(fsContent.getId(), IngestStatus.SKIPPED); //try to extract strings, if a file @@ -578,13 +596,19 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi } private boolean processNonIngestible(AbstractFile aFile) { - if (!extractAndIngest(aFile)) { - logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ")."); + try { + if (!extractIndex(aFile, true)) { + logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ")."); + ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED); + return false; + } else { + ingestStatus.put(aFile.getId(), IngestStatus.EXTRACTED_INGESTED); + return true; + } + } catch (IngesterException ex) { + logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex); ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED); return false; - } else { - ingestStatus.put(aFile.getId(), IngestStatus.EXTRACTED_INGESTED); - return true; } } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java index c22ea8a945..f29f5a8150 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java @@ -326,7 +326,7 @@ public class LuceneQuery implements KeywordSearchQuery { if (chunkID == 0) { contentIDStr = Long.toString(contentID); } else { - contentIDStr = FileExtractedChild.getFileExtractChildId(contentID, chunkID); + contentIDStr = Server.getChunkIdString(contentID, chunkID); } String idQuery = Server.Schema.ID.toString() + ":" + contentIDStr; diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java index 4b72868166..b107d682fb 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java @@ -60,74 +60,63 @@ class Server { public static enum Schema { ID { - @Override public String toString() { return "id"; } }, CONTENT { - @Override public String toString() { return "content"; } }, CONTENT_WS { - @Override public String toString() { return "content_ws"; } }, FILE_NAME { - @Override public String toString() { return "file_name"; } }, CTIME { - @Override public String toString() { return "ctime"; } }, ATIME { - @Override public String toString() { return "atime"; } }, MTIME { - @Override public String toString() { return "mtime"; } }, CRTIME { - @Override public String toString() { return "crtime"; } }, NUM_CHUNKS { - @Override public String toString() { return "num_chunks"; } - },}; - + }, + }; public static final String HL_ANALYZE_CHARS_UNLIMITED = "-1"; - //max content size we can send to Solr public static final long MAX_CONTENT_SIZE = 1L * 1024 * 1024 * 1024; - private static final Logger logger = Logger.getLogger(Server.class.getName()); private static final String DEFAULT_CORE_NAME = "coreCase"; // TODO: DEFAULT_CORE_NAME needs to be replaced with unique names to support multiple open cases @@ -136,6 +125,8 @@ class Server { private String javaPath = "java"; private static final int MAX_SOLR_MEM_MB = 512; //TODO set dynamically based on avail. system resources private Process curSolrProcess = null; + + private static Ingester ingester = null; public enum CORE_EVT_STATES { @@ -148,6 +139,7 @@ class Server { /** * New instance for the server at the given URL + * * @param url should be something like "http://localhost:8983/solr/" */ Server(String url) { @@ -220,7 +212,7 @@ class Server { bw.newLine(); if (Version.getBuildType() == Version.Type.DEVELOPMENT) { //flush buffers if dev version for debugging - bw.flush(); + bw.flush(); } } } catch (IOException ex) { @@ -237,7 +229,7 @@ class Server { void start() { logger.log(Level.INFO, "Starting Solr server from: " + solrFolder.getAbsolutePath()); try { - final String MAX_SOLR_MEM_MB_PAR = " -Xmx" + Integer.toString(MAX_SOLR_MEM_MB) + "m"; + final String MAX_SOLR_MEM_MB_PAR = " -Xmx" + Integer.toString(MAX_SOLR_MEM_MB) + "m"; final String SOLR_START_CMD = javaPath + MAX_SOLR_MEM_MB_PAR + " -DSTOP.PORT=8079 -DSTOP.KEY=mysecret -jar start.jar"; logger.log(Level.INFO, "Starting Solr using: " + SOLR_START_CMD); curSolrProcess = Runtime.getRuntime().exec(SOLR_START_CMD, null, solrFolder); @@ -259,9 +251,8 @@ class Server { /** * Tries to stop a Solr instance. - * - * Waits for the stop command to finish - * before returning. + * + * Waits for the stop command to finish before returning. */ synchronized void stop() { try { @@ -283,8 +274,11 @@ class Server { } /** - * Tests if there's a Solr server running by sending it a core-status request. - * @return false if the request failed with a connection error, otherwise true + * Tests if there's a Solr server running by sending it a core-status + * request. + * + * @return false if the request failed with a connection error, otherwise + * true */ synchronized boolean isRunning() { @@ -311,7 +305,9 @@ class Server { return true; } - /**** Convenience methods for use while we only open one case at a time ****/ + /** + * ** Convenience methods for use while we only open one case at a time *** + */ private volatile Core currentCore = null; synchronized void openCore() { @@ -331,11 +327,14 @@ class Server { serverAction.putValue(CORE_EVT, CORE_EVT_STATES.STOPPED); } - /**** end single-case specific methods ****/ + /** + * ** end single-case specific methods *** + */ /** * Open a core for the given case + * * @param c - * @return + * @return */ synchronized Core openCore(Case c) { String sep = File.separator; @@ -345,6 +344,7 @@ class Server { /** * commit current core if it exists + * * @throws SolrServerException, NoOpenCoreException */ synchronized void commit() throws SolrServerException, NoOpenCoreException { @@ -362,10 +362,12 @@ class Server { } /** - * Execute query that gets only number of all Solr files indexed - * without actually returning the files. The result does not include chunks, only number of actual files. + * Execute query that gets only number of all Solr files indexed without + * actually returning the files. The result does not include chunks, only + * number of actual files. + * * @return int representing number of indexed files - * @throws SolrServerException + * @throws SolrServerException */ public int queryNumIndexedFiles() throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -374,12 +376,13 @@ class Server { return currentCore.queryNumIndexedFiles(); } - - /** - * Execute query that gets only number of all Solr documents indexed (files and chunks) - * without actually returning the documents + + /** + * Execute query that gets only number of all Solr documents indexed (files + * and chunks) without actually returning the documents + * * @return int representing number of indexed files (files and chunks) - * @throws SolrServerException + * @throws SolrServerException */ public int queryNumIndexedDocuments() throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -391,6 +394,7 @@ class Server { /** * Return true if the file is indexed (either as a whole as a chunk) + * * @param contentID * @return true if it is indexed * @throws SolrServerException, NoOpenCoreException @@ -405,9 +409,11 @@ class Server { /** * Execute query that gets number of indexed file chunks for a file + * * @param fileID file id of the original file broken into chunks and indexed - * @return int representing number of indexed file chunks, 0 if there is no chunks - * @throws SolrServerException + * @return int representing number of indexed file chunks, 0 if there is no + * chunks + * @throws SolrServerException */ public int queryNumFileChunks(long fileID) throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -419,10 +425,11 @@ class Server { /** * Execute solr query + * * @param sq query * @return query response * @throws SolrServerException - * @throws NoOpenCoreException + * @throws NoOpenCoreException */ public QueryResponse query(SolrQuery sq) throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -433,11 +440,12 @@ class Server { /** * Execute solr query + * * @param sq the query * @param method http method to use * @return query response * @throws SolrServerException - * @throws NoOpenCoreException + * @throws NoOpenCoreException */ public QueryResponse query(SolrQuery sq, SolrRequest.METHOD method) throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -448,10 +456,11 @@ class Server { /** * Execute Solr terms query + * * @param sq the query * @return terms response * @throws SolrServerException - * @throws NoOpenCoreException + * @throws NoOpenCoreException */ public TermsResponse queryTerms(SolrQuery sq) throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -462,10 +471,11 @@ class Server { /** * Execute Solr query to get content text + * * @param content to get the text for * @return content text string * @throws SolrServerException - * @throws NoOpenCoreException + * @throws NoOpenCoreException */ public String getSolrContent(final Content content) throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -473,14 +483,16 @@ class Server { } return currentCore.getSolrContent(content.getId(), 0); } - + /** * Execute Solr query to get content text from content chunk + * * @param content to get the text for - * @param chunkID chunk number to query (starting at 1), or 0 if there is no chunks for that content + * @param chunkID chunk number to query (starting at 1), or 0 if there is no + * chunks for that content * @return content text string * @throws SolrServerException - * @throws NoOpenCoreException + * @throws NoOpenCoreException */ public String getSolrContent(final Content content, int chunkID) throws SolrServerException, NoOpenCoreException { if (currentCore == null) { @@ -490,15 +502,28 @@ class Server { } /** - * factory method to create ingester - * @return ingester + * Method to return ingester instance + * + * @return ingester instance */ - public Ingester getIngester() { - return new Ingester(); + public static Ingester getIngester() { + return Ingester.getDefault(); + } + + /** + * Given file parent id and child chunk ID, return the ID string of the chunk + * as stored in Solr, e.g. FILEID_CHUNKID + * @param parentID the parent file id (id of the source content) + * @param childID the child chunk id + * @return formatted string id + */ + public static String getChunkIdString(long parentID, int childID) { + return Long.toString(parentID) + Server.ID_CHUNK_SEP + Integer.toString(childID); } /** * Open a new core + * * @param coreName name to refer to the core by in Solr * @param dataDir directory to load/store the core data from/to * @return new core @@ -574,13 +599,13 @@ class Server { } } - - private String getSolrContent(long contentID, int chunkID) { + private String getSolrContent(long contentID, int chunkID) { final SolrQuery q = new SolrQuery(); q.setQuery("*:*"); String filterQuery = Schema.ID.toString() + ":" + contentID; - if (chunkID != 0) + if (chunkID != 0) { filterQuery = filterQuery + Server.ID_CHUNK_SEP + chunkID; + } q.addFilterQuery(filterQuery); q.setFields(Schema.CONTENT.toString()); try { @@ -602,11 +627,12 @@ class Server { } /** - * Execute query that gets only number of all Solr files (not chunks) indexed - * without actually returning the files - * - * @return int representing number of indexed files (entire files, not chunks) - * @throws SolrServerException + * Execute query that gets only number of all Solr files (not chunks) + * indexed without actually returning the files + * + * @return int representing number of indexed files (entire files, not + * chunks) + * @throws SolrServerException */ private int queryNumIndexedFiles() throws SolrServerException { SolrQuery q = new SolrQuery(Server.Schema.ID + ":*" + Server.ID_CHUNK_SEP + "*"); @@ -614,14 +640,15 @@ class Server { int numChunks = (int) query(q).getResults().getNumFound(); return queryNumIndexedDocuments() - numChunks; } - + /** * Execute query that gets only number of all Solr documents indexed - * without actually returning the documents. Documents include entire indexed files - * as well as chunks, which are treated as documents. - * - * @return int representing number of indexed documents (entire files and chunks) - * @throws SolrServerException + * without actually returning the documents. Documents include entire + * indexed files as well as chunks, which are treated as documents. + * + * @return int representing number of indexed documents (entire files + * and chunks) + * @throws SolrServerException */ private int queryNumIndexedDocuments() throws SolrServerException { SolrQuery q = new SolrQuery("*:*"); @@ -631,9 +658,10 @@ class Server { /** * Return true if the file is indexed (either as a whole as a chunk) + * * @param contentID * @return true if it is indexed - * @throws SolrServerException + * @throws SolrServerException */ private boolean queryIsIndexed(long contentID) throws SolrServerException { SolrQuery q = new SolrQuery("*:*"); @@ -645,12 +673,15 @@ class Server { /** * Execute query that gets number of indexed file chunks for a file - * @param contentID file id of the original file broken into chunks and indexed - * @return int representing number of indexed file chunks, 0 if there is no chunks - * @throws SolrServerException + * + * @param contentID file id of the original file broken into chunks and + * indexed + * @return int representing number of indexed file chunks, 0 if there is + * no chunks + * @throws SolrServerException */ private int queryNumFileChunks(long contentID) throws SolrServerException { - final SolrQuery q = + final SolrQuery q = new SolrQuery(Server.Schema.ID + ":" + Long.toString(contentID) + Server.ID_CHUNK_SEP + "*"); q.setRows(0); return (int) query(q).getResults().getNumFound(); diff --git a/thunderbirdparser/nbproject/project.properties b/thunderbirdparser/nbproject/project.properties index ba51ec0265..83174ac5f0 100644 --- a/thunderbirdparser/nbproject/project.properties +++ b/thunderbirdparser/nbproject/project.properties @@ -1,5 +1,3 @@ file.reference.commons-lang-2.4.jar=release/modules/ext/commons-lang-2.4.jar -file.reference.tika-core-1.1.jar=release/modules/ext/tika-core-1.1.jar -file.reference.tika-parsers-1.1.jar=release/modules/ext/tika-parsers-1.1.jar javac.source=1.6 javac.compilerargs=-Xlint -Xlint:-serial diff --git a/thunderbirdparser/nbproject/project.xml b/thunderbirdparser/nbproject/project.xml index ea2c04cf47..63116d7987 100644 --- a/thunderbirdparser/nbproject/project.xml +++ b/thunderbirdparser/nbproject/project.xml @@ -53,17 +53,13 @@ - ext/tika-core-1.1.jar - release/modules/ext/tika-core-1.1.jar + ext/tika-core-0.10.jar + release/modules/ext/tika-core-0.10.jar ext/commons-lang-2.4.jar release/modules/ext/commons-lang-2.4.jar - - ext/tika-parsers-1.1.jar - release/modules/ext/tika-parsers-1.1.jar - diff --git a/thunderbirdparser/release/modules/ext/tika-core-0.10.jar b/thunderbirdparser/release/modules/ext/tika-core-0.10.jar new file mode 100644 index 0000000000..78087e1381 Binary files /dev/null and b/thunderbirdparser/release/modules/ext/tika-core-0.10.jar differ diff --git a/thunderbirdparser/release/modules/ext/tika-core-1.1.jar b/thunderbirdparser/release/modules/ext/tika-core-1.1.jar deleted file mode 100644 index 7ad2be62be..0000000000 Binary files a/thunderbirdparser/release/modules/ext/tika-core-1.1.jar and /dev/null differ diff --git a/thunderbirdparser/release/modules/ext/tika-parsers-1.1.jar b/thunderbirdparser/release/modules/ext/tika-parsers-1.1.jar deleted file mode 100644 index ad82942ae3..0000000000 Binary files a/thunderbirdparser/release/modules/ext/tika-parsers-1.1.jar and /dev/null differ diff --git a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java index 0ecb09215e..133e333140 100644 --- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java +++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java @@ -17,7 +17,7 @@ import org.apache.tika.metadata.*; * @author arivera */ public class ThunderbirdMetadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders, - IPTC, Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys, + Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys, Serializable { private int strArrCount = 0;