TSK-519 Add support for files of known filetypes > 100 MB

(first take) - also fix thunderbird module deps so they work with keyword search module
2025-07-06 21:00:22 +00:00 · 2012-07-10 14:05:35 -04:00 · 2012-07-10 14:05:35 -04:00 · 8f26cda926
commit 8f26cda926
parent fe402d2019
20 changed files with 596 additions and 298 deletions
--- a/KeywordSearch/ivy.xml
+++ b/KeywordSearch/ivy.xml
@ -16,6 +16,7 @@
        <dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="3.5.0" transitive="false" /> <!-- the war file -->
        <dependency conf="autopsy->*" org="org.apache.solr" name="solr-solrj" rev="3.5.0"/>
        <dependency conf="autopsy->*" org="commons-lang" name="commons-lang" rev="2.4"/>
        <dependency conf="autopsy->*" org="org.apache.tika" name="tika-parsers" rev="0.10"/>
        <dependency conf="start-solr->default" org="org.mortbay.jetty" name="start" rev="6.1.26"/>
        <dependency conf="jetty-libs->default" org="org.mortbay.jetty" name="jetty" rev="6.1.26"/>
        <dependency conf="jetty-libs->default" org="org.mortbay.jetty" name="jsp-2.1" rev="6.1.14"/>
--- a/KeywordSearch/nbproject/project.xml
+++ b/KeywordSearch/nbproject/project.xml
@ -160,6 +160,10 @@
                <runtime-relative-path>ext/commons-httpclient-3.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-httpclient-3.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/tika-core-0.10.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/tika-core-0.10.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-codec-1.5.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-codec-1.5.jar</binary-origin>
@ -168,6 +172,10 @@
                <runtime-relative-path>ext/commons-lang-2.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-lang-2.4.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/tika-parsers-0.10.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/tika-parsers-0.10.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jcl-over-slf4j-1.6.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jcl-over-slf4j-1.6.1.jar</binary-origin>
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileChunk.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileChunk.java
@ -0,0 +1,66 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2012 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
 /**
 * Represents each string chunk to be indexed, a derivative of AbstractFileExtract file
 */
 class AbstractFileChunk {
    private int chunkID;
    private AbstractFileExtract parent;
    AbstractFileChunk(AbstractFileExtract parent, int chunkID) {
        this.parent = parent;
        this.chunkID = chunkID;
    }
    public AbstractFileExtract getParent() {
        return parent;
    }
    public int getChunkId() {
        return chunkID;
    }
    /**
     * return String representation of the absolute id (parent and child)
     *
     * @return
     */
    public String getIdString() {
        return Server.getChunkIdString(this.parent.getSourceFile().getId(), this.chunkID);
    }
    public boolean index(Ingester ingester, byte[] content, long contentSize, ByteContentStream.Encoding encoding) throws IngesterException {
        boolean success = true;
        ByteContentStream bcs = new ByteContentStream(content, contentSize, parent.getSourceFile(), encoding);
        try {
            ingester.ingest(this, bcs);
            //logger.log(Level.INFO, "Ingesting string chunk: " + this.getName() + ": " + chunkID);
        } catch (Exception ingEx) {
            success = false;
            throw new IngesterException("Problem ingesting file string chunk: " + parent.getSourceFile().getId() + ", chunk: " + chunkID, ingEx);
        }
        return success;
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java
@ -0,0 +1,48 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2012 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import org.sleuthkit.datamodel.AbstractFile;
 /**
 * Common methods for utilities that extract text and content and divide into
 * chunks
 */
 interface AbstractFileExtract {
    /**
     * Get number of chunks resulted from extracting this AbstractFile
     * @return the number of chunks produced
     */
    int getNumChunks();
    /**
     * Get the source file associated with this extraction
     * @return the source AbstractFile
     */
    AbstractFile getSourceFile();
    /**
     * Index the Abstract File
     * @return true if indexed successfully, false otherwise
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException 
     */
    boolean index() throws Ingester.IngesterException;
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java
@ -24,8 +24,7 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.logging.Logger;
 import org.apache.solr.common.util.ContentStream;
-import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream;
+import org.sleuthkit.autopsy.keywordsearch.ByteContentStream.Encoding;
 import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream.Encoding;
 import org.sleuthkit.datamodel.AbstractContent;
 import org.sleuthkit.datamodel.AbstractFile;
@ -43,7 +42,7 @@ public class AbstractFileStringContentStream implements ContentStream {
    private AbstractFileStringStream stream;
    private static Logger logger = Logger.getLogger(AbstractFileStringContentStream.class.getName());
-    public AbstractFileStringContentStream(AbstractFile content, Encoding encoding) {
+    public AbstractFileStringContentStream(AbstractFile content, ByteContentStream.Encoding encoding) {
        this.content = content;
        this.encoding = encoding;
        this.stream = new AbstractFileStringStream(content, encoding);
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java
@ -0,0 +1,128 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
 import org.sleuthkit.datamodel.AbstractFile;
 /**
 * Takes an AbstractFile, extract strings, converts into chunks (associated with the original
 * source file) up to 1MB then and indexes chunks as text with Solr
 */
 class AbstractFileStringExtract implements AbstractFileExtract {
    private KeywordSearchIngestService service;
    private Ingester ingester;
    private int numChunks;
    private static final Logger logger = Logger.getLogger(AbstractFileStringExtract.class.getName());
    static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
    private AbstractFile aFile;
    //single static buffer for all extractions.  Safe, indexing can only happen in one thread
    private static final byte[] STRING_CHUNK_BUF = new byte[(int) MAX_STRING_CHUNK_SIZE];
    private static final int BOM_LEN = 3;
    static {
        //prepend UTF-8 BOM to start of the buffer
        STRING_CHUNK_BUF[0] = (byte) 0xEF;
        STRING_CHUNK_BUF[1] = (byte) 0xBB;
        STRING_CHUNK_BUF[2] = (byte) 0xBF;
    }
    public AbstractFileStringExtract(AbstractFile aFile) {
        this.aFile = aFile;
        numChunks = 0; //unknown until indexing is done
        this.service = KeywordSearchIngestService.getDefault();
        Server solrServer = KeywordSearch.getServer();
        ingester = solrServer.getIngester();
    }
    @Override
    public int getNumChunks() {
        return this.numChunks;
    }
    @Override
    public AbstractFile getSourceFile() {
        return aFile;
    }
    @Override
    public boolean index() throws IngesterException {
        boolean success = false;
        //construct stream that extracts text as we read it
        final InputStream stringStream = new AbstractFileStringStream(aFile, ByteContentStream.Encoding.UTF8);
        try {
            success = true;
            //break input stream into chunks 
            long readSize = 0;
            while ((readSize = stringStream.read(STRING_CHUNK_BUF, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) {
                //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks+1));
                //debug.write(STRING_CHUNK_BUF, 0, (int)readSize);
                AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
                try {
                    chunk.index(ingester, STRING_CHUNK_BUF, readSize + BOM_LEN, ByteContentStream.Encoding.UTF8);
                    ++this.numChunks;
                } catch (IngesterException ingEx) {
                    success = false;
                    logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ingEx);
                    throw ingEx; //need to rethrow/return to signal error and move on
                }
                //check if need invoke commit/search between chunks
                //not to delay commit if timer has gone off
                service.checkRunCommitSearch();
                //debug.close();    
            }
            //after all chunks, ingest the parent file without content itself, and store numChunks
            ingester.ingest(this);
        } catch (IOException ex) {
            logger.log(Level.WARNING, "Unable to read input stream to divide and send to Solr, file: " + aFile.getName(), ex);
            success = false;
        } finally {
            try {
                stringStream.close();
            } catch (IOException ex) {
                logger.log(Level.WARNING, "Error closing input stream stream, file: " + aFile.getName(), ex);
            }
        }
        return success;
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringStream.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringStream.java
@ -16,13 +16,14 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.datamodel;
+package org.sleuthkit.autopsy.keywordsearch;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.sleuthkit.autopsy.datamodel.DataConversion;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.TskException;
@ -33,16 +34,6 @@ import org.sleuthkit.datamodel.TskException;
 */
 public class AbstractFileStringStream extends InputStream {
    public static enum Encoding {
        UTF8 {
            @Override
            public String toString() {
                return "UTF-8";
            }
        },
    };
    //args
    private AbstractFile content;
@ -73,7 +64,7 @@ public class AbstractFileStringStream extends InputStream {
     * @param encoding target encoding, currently UTF-8
     * @param preserveOnBuffBoundary whether to preserve or split string on a buffer boundary. If false, will pack into read buffer up to max. possible, potentially splitting a string. If false, the string will be preserved for next read.
     */
-    public AbstractFileStringStream(AbstractFile content, Encoding encoding, boolean preserveOnBuffBoundary) {
+    public AbstractFileStringStream(AbstractFile content, ByteContentStream.Encoding encoding, boolean preserveOnBuffBoundary) {
        this.content = content;
        this.encoding = encoding.toString();
        //this.preserveOnBuffBoundary = preserveOnBuffBoundary;
@ -87,7 +78,7 @@ public class AbstractFileStringStream extends InputStream {
     * @param content to extract strings from
     * @param encoding target encoding, currently UTF-8
     */
-    public AbstractFileStringStream(AbstractFile content, Encoding encoding) {
+    public AbstractFileStringStream(AbstractFile content, ByteContentStream.Encoding encoding) {
        this(content, encoding, false);
    }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java
@ -0,0 +1,149 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2012 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.Reader;
 import java.nio.charset.Charset;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.ReadContentInputStream;
 import org.apache.tika.Tika;
 import org.sleuthkit.autopsy.keywordsearch.ByteContentStream.Encoding;
 /**
 * Extractor of text from TIKA supported AbstractFile content. Extracted text is
 * divided into chunks and indexed with Solr.
 *
 * This is especially useful for large content of supported type that is to be
 * divided into text chunks and indexed as such.
 *
 */
 public class AbstractFileTikaTextExtract implements AbstractFileExtract {
    private static final Logger logger = Logger.getLogger(IngestServiceAbstractFile.class.getName());
    private static final Encoding ENCODING = Encoding.UTF8;
    static final Charset charset = Charset.forName(ENCODING.toString());
    static final int MAX_EXTR_TEXT_CHUNK_SIZE = 1 * 1024 * 1024;
    private static final char[] TEXT_CHUNK_BUF = new char[MAX_EXTR_TEXT_CHUNK_SIZE];
    private static final Tika tika = new Tika();
    private KeywordSearchIngestService service;
    private Ingester ingester;
    private AbstractFile sourceFile;
    private int numChunks = 0;
    private static final String UTF16BOM = "\uFEFF";
    AbstractFileTikaTextExtract(AbstractFile sourceFile) {
        this.sourceFile = sourceFile;
        this.service = KeywordSearchIngestService.getDefault();
        Server solrServer = KeywordSearch.getServer();
        ingester = solrServer.getIngester();
    }
    @Override
    public int getNumChunks() {
        return numChunks;
    }
    @Override
    public AbstractFile getSourceFile() {
        return sourceFile;
    }
    @Override
    public boolean index() throws Ingester.IngesterException {
        boolean success = false;
        Reader reader = null;
        try {
            success = true;
            reader = tika.parse(new ReadContentInputStream(sourceFile));
            long readSize;
            while ((readSize = reader.read(TEXT_CHUNK_BUF, 0, MAX_EXTR_TEXT_CHUNK_SIZE)) != -1) {
                //encode to bytes to index as byte stream
                String extracted;
                if (readSize < MAX_EXTR_TEXT_CHUNK_SIZE) {
                    //trim the 0 bytes
                    StringBuilder sb = new StringBuilder((int) readSize + 5);
                    //inject BOM here (saves byte buffer realloc), will be converted to specific encoding BOM
                    sb.append(UTF16BOM); 
                    sb.append(TEXT_CHUNK_BUF, 0, (int) readSize);
                    extracted = sb.toString();
                } else {
                    StringBuilder sb = new StringBuilder((int) readSize + 5);
                    //inject BOM here (saves byte buffer realloc), will be converted to specific encoding BOM
                    sb.append(UTF16BOM); 
                    sb.append(TEXT_CHUNK_BUF);
                    extracted = sb.toString();
                }
                //converts BOM automatically to charSet encoding
                byte[] encodedBytes = extracted.getBytes(charset);
                //PrintStream s = new PrintStream("c:\\temp\\ps.txt");
                //for (byte b : encodedBytes) {
                //    s.format("%02x ", b);
                //}
                //s.close();
                //debug
                //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks + 1));
                //debug.write(encodedBytes, 0, encodedBytes.length);
                //debug.close();
                AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
                try {
                    chunk.index(ingester, encodedBytes, encodedBytes.length, ENCODING);
                    ++this.numChunks;
                } catch (Ingester.IngesterException ingEx) {
                    success = false;
                    logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '"
                            + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
                    throw ingEx; //need to rethrow/return to signal error and move on
                }
                //check if need invoke commit/search between chunks
                //not to delay commit if timer has gone off
                service.checkRunCommitSearch();
            }
        } catch (IOException ex) {
            logger.log(Level.WARNING, "Unable to read content stream from " + sourceFile.getId(), ex);
        } finally {
            try {
                reader.close();
            } catch (IOException ex) {
                logger.log(Level.WARNING, "Unable to close content stream from " + sourceFile.getId(), ex);
            }
        }
        //after all chunks, ingest the parent file without content itself, and store numChunks
        ingester.ingest(this);
        return success;
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java
@ -25,15 +25,32 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.logging.Logger;
 import org.apache.solr.common.util.ContentStream;
 import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream.Encoding;
 import org.sleuthkit.datamodel.AbstractContent;
 import org.sleuthkit.datamodel.FsContent;
 /**
 * Stream of bytes representing string with specified encoding
 * to feed into Solr as ContentStream
 */
 public class ByteContentStream implements ContentStream {   
    public static enum Encoding {
        UTF8 {
            @Override
            public String toString() {
                return "UTF-8";
            }
        },
        UTF16 {
            @Override
            public String toString() {
                return "UTF-16";
            }
        },
    };
    //input
    private byte[] content; //extracted subcontent
    private long contentSize;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileExtract.java
@ -1,170 +0,0 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.IOException;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream;
 import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
 import org.sleuthkit.datamodel.AbstractFile;
 /**
 * Utility to extract strings and index a file with string content as chunks
 * associated with the original parent file
 */
 class FileExtract {
    KeywordSearchIngestService service;
    private int numChunks;
    private static final Logger logger = Logger.getLogger(FileExtract.class.getName());
    static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
    private AbstractFile sourceFile;
    //single static buffer for all extractions.  Safe, indexing can only happen in one thread
    private static final byte[] STRING_CHUNK_BUF = new byte[(int) MAX_STRING_CHUNK_SIZE];
    private static final int BOM_LEN = 3;
    static {
        //prepend UTF-8 BOM to start of the buffer
            STRING_CHUNK_BUF[0] = (byte)0xEF;
            STRING_CHUNK_BUF[1] = (byte)0xBB;
            STRING_CHUNK_BUF[2] = (byte)0xBF;
    }
    public FileExtract(KeywordSearchIngestService service, AbstractFile sourceFile) {
        this.service = service;
        this.sourceFile = sourceFile;
        numChunks = 0; //unknown until indexing is done
    }
    public int getNumChunks() {
        return this.numChunks;
    }
    public AbstractFile getSourceFile() {
        return sourceFile;
    }
    public boolean index(Ingester ingester) throws IngesterException {
        boolean success = false;
        AbstractFileStringStream stringStream = null;
        try {
            success = true;
            //break string into chunks 
            //Note: could use DataConversion.toString() since we are operating on fixed chunks
            //but FsContentStringStream handles string boundary case better
            stringStream = new AbstractFileStringStream(sourceFile, AbstractFileStringStream.Encoding.UTF8);
            long readSize = 0;
            while ((readSize = stringStream.read(STRING_CHUNK_BUF, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) {
                //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks+1));
                //debug.write(STRING_CHUNK_BUF, 0, (int)readSize);
                FileExtractedChild chunk = new FileExtractedChild(this, this.numChunks + 1);
                try {
                    chunk.index(ingester, STRING_CHUNK_BUF, readSize + BOM_LEN);
                    ++this.numChunks;
                } catch (IngesterException ingEx) {
                    success = false;
                    logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '" + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);   
                    throw ingEx; //need to rethrow/return to signal error and move on
                } 
                //check if need invoke commit/search between chunks
                //not to delay commit if timer has gone off
                service.checkRunCommitSearch();
                //debug.close();    
            }
            //after all chunks, ingest the parent file without content itself, and store numChunks
            ingester.ingest(this);
        } catch (IOException ex) {
            logger.log(Level.WARNING, "Unable to read string stream and send to Solr, file: " + sourceFile.getName(), ex);
            success = false;
        } finally {
            if (stringStream != null) {
                try {
                    stringStream.close();
                } catch (IOException ex) {
                    logger.log(Level.WARNING, "Error closing string stream, file: " + sourceFile.getName(), ex);
                }
            }
        }
        return success;
    }
 }
 /**
 * Represents each string chunk to be indexed, a child of FileExtracted file
 */
 class FileExtractedChild {
    private int chunkID;
    private FileExtract parent;
    FileExtractedChild(FileExtract parent, int chunkID) {
        this.parent = parent;
        this.chunkID = chunkID;
    }
    public FileExtract getParentFile() {
        return parent;
    }
    public int getChunkId() {
        return chunkID;
    }
    /**
     * return String representation of the absolute id (parent and child)
     * @return 
     */
    public String getIdString() {
        return getFileExtractChildId(this.parent.getSourceFile().getId(), this.chunkID);
    }
    public boolean index(Ingester ingester, byte[] content, long contentSize) throws IngesterException {
        boolean success = true;
        ByteContentStream bcs = new ByteContentStream(content, contentSize, parent.getSourceFile(), AbstractFileStringStream.Encoding.UTF8);
        try {
            ingester.ingest(this, bcs);
            //logger.log(Level.INFO, "Ingesting string chunk: " + this.getName() + ": " + chunkID);
        } catch (Exception ingEx) {
            success = false;
            throw new IngesterException("Problem ingesting file string chunk: " + parent.getSourceFile().getId() + ", chunk: " + chunkID, ingEx);
        }
        return success;
    }
    public static String getFileExtractChildId(long parentID, int childID) {
        return Long.toString(parentID) + Server.ID_CHUNK_SEP + Integer.toString(childID);
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@ -73,6 +73,17 @@ public class Ingester {
        "pst", "xml", "class", "dwg", "eml", "emlx", "mbox", "mht"};
    private static Ingester instance;
    private Ingester() {
    }
    public static synchronized Ingester getDefault() {
        if (instance == null)
            instance = new Ingester();
        return instance;
    }
    @Override
    @SuppressWarnings("FinalizeDeclaration")
@ -99,16 +110,16 @@ public class Ingester {
    }
    /**
-     * Sends a FileExtract to Solr to have its content extracted and added to the
+     * Sends a AbstractFileExtract to Solr to have its content extracted and added to the
     * index. commit() should be called once you're done ingesting files.
     * FileExtract represents a parent of extracted file with actual content.  
-     * The parent itself has no content, only meta data and is used to associate the extracted FileExtractedChild
+     * The parent itself has no content, only meta data and is used to associate the extracted AbstractFileChunk
     * 
-     * @param fe FileExtract to ingest
+     * @param fe AbstractFileExtract to ingest
     * @throws IngesterException if there was an error processing a specific
     * file, but the Solr server is probably fine.
     */
-    void ingest(FileExtract fe) throws IngesterException {
+    void ingest(AbstractFileExtract fe) throws IngesterException {
        Map<String, String> params = getContentFields(fe.getSourceFile());
        params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
@ -117,23 +128,23 @@ public class Ingester {
    }
    /**
-     * Sends a FileExtractedChild to Solr and its extracted content stream to be added to the
+     * Sends a AbstractFileChunk to Solr and its extracted content stream to be added to the
     * index. commit() should be called once you're done ingesting files.
-     * FileExtractedChild represents a file chunk and its chunk content.
+     * AbstractFileChunk represents a file chunk and its chunk content.
     * 
-     * @param fec FileExtractedChild to ingest
+     * @param fec AbstractFileChunk to ingest
     * @throws IngesterException if there was an error processing a specific
     * file, but the Solr server is probably fine.
     */
-    void ingest(FileExtractedChild fec, ByteContentStream bcs) throws IngesterException {
+    void ingest(AbstractFileChunk fec, ByteContentStream bcs) throws IngesterException {
        AbstractContent sourceContent = bcs.getSourceContent();
        Map<String, String> params = getContentFields(sourceContent);
        //overwrite id with the chunk id
        params.put(Server.Schema.ID.toString(), 
-        FileExtractedChild.getFileExtractChildId(sourceContent.getId(), fec.getChunkId()));
+        Server.getChunkIdString(sourceContent.getId(), fec.getChunkId()));
-        ingest(bcs, params, FileExtract.MAX_STRING_CHUNK_SIZE);
+        ingest(bcs, params, AbstractFileStringExtract.MAX_STRING_CHUNK_SIZE);
    }
    /**
@ -448,8 +459,9 @@ public class Ingester {
     */
    static boolean isIngestible(AbstractFile aFile) {
        TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
-        if (! aType.equals(TSK_DB_FILES_TYPE_ENUM.FS) )
+        if (! aType.equals(TSK_DB_FILES_TYPE_ENUM.FS) ) {
                return false;
        }
        FsContent fsContent = (FsContent) aFile;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java
@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch;
 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@ -40,7 +41,6 @@ import org.netbeans.api.progress.ProgressHandleFactory;
 import org.openide.util.Cancellable;
 import org.openide.util.Exceptions;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.ingest.IngestManager;
 import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
 import org.sleuthkit.autopsy.ingest.IngestMessage;
 import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
@ -95,7 +95,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
    private final String hashDBServiceName = "Hash Lookup"; //NOTE this needs to match the HashDB service getName()
    private SleuthkitCase caseHandle = null;
    private boolean skipKnown = true;
-    boolean initialized = false;
+    private boolean initialized = false;
    private enum IngestStatus {
@ -200,6 +200,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
            managerProxy.postMessage(IngestMessage.createMessage(++messageID, MessageType.INFO, this, "Completed"));
        }
        //postSummary();
    }
@ -224,6 +225,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
        runSearcher = false;
        finalSearcherDone = true;
        //commit uncommited files, don't search again
        commit();
@ -498,16 +500,27 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
        private final Logger logger = Logger.getLogger(Indexer.class.getName());
-        private boolean extractAndIngest(AbstractFile aFile) {
+        /**
-            boolean indexed = false;
+         * Extract strings or text with Tika (by streaming) from the file Divide
-            final FileExtract fe = new FileExtract(KeywordSearchIngestService.this, aFile);
+         * the file into chunks and index the chunks
-            try {
+         *
-                indexed = fe.index(ingester);
+         * @param aFile file to extract strings from, divide into chunks and
-            } catch (IngesterException ex) {
+         * index
-                logger.log(Level.WARNING, "Error extracting strings and indexing file: " + aFile.getName(), ex);
+         * @param stringsOnly true if use stinrg extraction, false if use Tika
-                indexed = false;
+         * text extractor
         * @return true if the file was indexed, false otherwise
         */
        private boolean extractIndex(AbstractFile aFile, boolean stringsOnly) throws IngesterException {
            AbstractFileExtract fileExtract;
            if (stringsOnly) {
                fileExtract = new AbstractFileStringExtract(aFile);
            } else {
                fileExtract = new AbstractFileTikaTextExtract(aFile);
            }
-            return indexed;
+
            //divide into chunks and index
            return fileExtract.index();
        }
        private void indexFile(AbstractFile aFile, boolean indexContent) {
@ -537,9 +550,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
            boolean ingestibleFile = Ingester.isIngestible(aFile);
            final long size = aFile.getSize();
-            //if fs file, limit size of entire file, do not limit strings
+            //if fs file with no content (size is 0), index meta-data only
-            if (fsContent != null && (size == 0 || (ingestibleFile && size > MAX_INDEX_SIZE))) {
+            if (fsContent != null && size == 0) {
                //if fs file, index meta only, otherwise if unalloc, skip
                try {
                    ingester.ingest(fsContent, false); //meta-data only
                    ingestStatus.put(aFile.getId(), IngestStatus.INGESTED_META);
@ -548,15 +560,21 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
                    logger.log(Level.WARNING, "Unable to index meta-data for fsContent: " + fsContent.getId(), ex);
                }
-                return;
+            } else if (fsContent != null && ingestibleFile == true) {
-            }
+                //we know it's an allocated fs file (FsContent) with supported content
-
+                //extract text with Tika, divide into chunks and index with Solr
            if (fsContent != null && ingestibleFile == true) {
                //we know it's an allocated fs file (FsContent) with supported content 
                try {
                    //logger.log(Level.INFO, "indexing: " + fsContent.getName());
-                    ingester.ingest(fsContent, true);
+                    //ingester.ingest(fsContent, true);
-                    ingestStatus.put(fsContent.getId(), IngestStatus.INGESTED);
+                    if (!extractIndex(aFile, false)) {
                        logger.log(Level.WARNING, "Failed to extract Tika text and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").");
                        ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
                    } else {
                        ingestStatus.put(aFile.getId(), IngestStatus.INGESTED);
                    }
                } catch (IngesterException e) {
                    ingestStatus.put(fsContent.getId(), IngestStatus.SKIPPED);
                    //try to extract strings, if a file
@ -578,13 +596,19 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
        }
        private boolean processNonIngestible(AbstractFile aFile) {
-            if (!extractAndIngest(aFile)) {
+            try {
-                logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").");
+                if (!extractIndex(aFile, true)) {
                    logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").");
                    ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
                    return false;
                } else {
                    ingestStatus.put(aFile.getId(), IngestStatus.EXTRACTED_INGESTED);
                    return true;
                }
            } catch (IngesterException ex) {
                logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);
                ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
                return false;
            } else {
                ingestStatus.put(aFile.getId(), IngestStatus.EXTRACTED_INGESTED);
                return true;
            }
        }
    }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
@ -326,7 +326,7 @@ public class LuceneQuery implements KeywordSearchQuery {
        if (chunkID == 0) {
            contentIDStr = Long.toString(contentID);
        } else {
-            contentIDStr = FileExtractedChild.getFileExtractChildId(contentID, chunkID);
+            contentIDStr = Server.getChunkIdString(contentID, chunkID);
        }
        String idQuery = Server.Schema.ID.toString() + ":" + contentIDStr;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
@ -60,74 +60,63 @@ class Server {
    public static enum Schema {
        ID {
            @Override
            public String toString() {
                return "id";
            }
        },
        CONTENT {
            @Override
            public String toString() {
                return "content";
            }
        },
        CONTENT_WS {
            @Override
            public String toString() {
                return "content_ws";
            }
        },
        FILE_NAME {
            @Override
            public String toString() {
                return "file_name";
            }
        },
        CTIME {
            @Override
            public String toString() {
                return "ctime";
            }
        },
        ATIME {
            @Override
            public String toString() {
                return "atime";
            }
        },
        MTIME {
            @Override
            public String toString() {
                return "mtime";
            }
        },
        CRTIME {
            @Override
            public String toString() {
                return "crtime";
            }
        },
        NUM_CHUNKS {
            @Override
            public String toString() {
                return "num_chunks";
            }
-        },};
+        },
-    
+    };
    public static final String HL_ANALYZE_CHARS_UNLIMITED = "-1";
    //max content size we can send to Solr
    public static final long MAX_CONTENT_SIZE = 1L * 1024 * 1024 * 1024;
    private static final Logger logger = Logger.getLogger(Server.class.getName());
    private static final String DEFAULT_CORE_NAME = "coreCase";
    // TODO: DEFAULT_CORE_NAME needs to be replaced with unique names to support multiple open cases
@ -136,6 +125,8 @@ class Server {
    private String javaPath = "java";
    private static final int MAX_SOLR_MEM_MB = 512; //TODO set dynamically based on avail. system resources
    private Process curSolrProcess = null;
    private static Ingester ingester = null;
    public enum CORE_EVT_STATES {
@ -148,6 +139,7 @@ class Server {
    /**
     * New instance for the server at the given URL
     *
     * @param url should be something like "http://localhost:8983/solr/"
     */
    Server(String url) {
@ -220,7 +212,7 @@ class Server {
                    bw.newLine();
                    if (Version.getBuildType() == Version.Type.DEVELOPMENT) {
                        //flush buffers if dev version for debugging
-                        bw.flush(); 
+                        bw.flush();
                    }
                }
            } catch (IOException ex) {
@ -237,7 +229,7 @@ class Server {
    void start() {
        logger.log(Level.INFO, "Starting Solr server from: " + solrFolder.getAbsolutePath());
        try {
-            final String MAX_SOLR_MEM_MB_PAR = " -Xmx" + Integer.toString(MAX_SOLR_MEM_MB) + "m"; 
+            final String MAX_SOLR_MEM_MB_PAR = " -Xmx" + Integer.toString(MAX_SOLR_MEM_MB) + "m";
            final String SOLR_START_CMD = javaPath + MAX_SOLR_MEM_MB_PAR + " -DSTOP.PORT=8079 -DSTOP.KEY=mysecret -jar start.jar";
            logger.log(Level.INFO, "Starting Solr using: " + SOLR_START_CMD);
            curSolrProcess = Runtime.getRuntime().exec(SOLR_START_CMD, null, solrFolder);
@ -259,9 +251,8 @@ class Server {
    /**
     * Tries to stop a Solr instance.
-     * 
+     *
-     * Waits for the stop command to finish
+     * Waits for the stop command to finish before returning.
     * before returning.
     */
    synchronized void stop() {
        try {
@ -283,8 +274,11 @@ class Server {
    }
    /**
-     * Tests if there's a Solr server running by sending it a core-status request.
+     * Tests if there's a Solr server running by sending it a core-status
-     * @return false if the request failed with a connection error, otherwise true
+     * request.
     *
     * @return false if the request failed with a connection error, otherwise
     * true
     */
    synchronized boolean isRunning() {
@ -311,7 +305,9 @@ class Server {
        return true;
    }
-    /**** Convenience methods for use while we only open one case at a time ****/
+    /**
     * ** Convenience methods for use while we only open one case at a time ***
     */
    private volatile Core currentCore = null;
    synchronized void openCore() {
@ -331,11 +327,14 @@ class Server {
        serverAction.putValue(CORE_EVT, CORE_EVT_STATES.STOPPED);
    }
-    /**** end single-case specific methods ****/
+    /**
     * ** end single-case specific methods ***
     */
    /**
     * Open a core for the given case
     *
     * @param c
-     * @return 
+     * @return
     */
    synchronized Core openCore(Case c) {
        String sep = File.separator;
@ -345,6 +344,7 @@ class Server {
    /**
     * commit current core if it exists
     *
     * @throws SolrServerException, NoOpenCoreException
     */
    synchronized void commit() throws SolrServerException, NoOpenCoreException {
@ -362,10 +362,12 @@ class Server {
    }
    /**
-     * Execute query that gets only number of all Solr files indexed
+     * Execute query that gets only number of all Solr files indexed without
-     * without actually returning the files.  The result does not include chunks, only number of actual files.
+     * actually returning the files. The result does not include chunks, only
     * number of actual files.
     *
     * @return int representing number of indexed files
-     * @throws SolrServerException 
+     * @throws SolrServerException
     */
    public int queryNumIndexedFiles() throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -374,12 +376,13 @@ class Server {
        return currentCore.queryNumIndexedFiles();
    }
-    
+
-     /**
+    /**
-     * Execute query that gets only number of all Solr documents indexed (files and chunks)
+     * Execute query that gets only number of all Solr documents indexed (files
-     * without actually returning the documents
+     * and chunks) without actually returning the documents
     *
     * @return int representing number of indexed files (files and chunks)
-     * @throws SolrServerException 
+     * @throws SolrServerException
     */
    public int queryNumIndexedDocuments() throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -391,6 +394,7 @@ class Server {
    /**
     * Return true if the file is indexed (either as a whole as a chunk)
     *
     * @param contentID
     * @return true if it is indexed
     * @throws SolrServerException, NoOpenCoreException
@ -405,9 +409,11 @@ class Server {
    /**
     * Execute query that gets number of indexed file chunks for a file
     *
     * @param fileID file id of the original file broken into chunks and indexed
-     * @return int representing number of indexed file chunks, 0 if there is no chunks
+     * @return int representing number of indexed file chunks, 0 if there is no
-     * @throws SolrServerException 
+     * chunks
     * @throws SolrServerException
     */
    public int queryNumFileChunks(long fileID) throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -419,10 +425,11 @@ class Server {
    /**
     * Execute solr query
     *
     * @param sq query
     * @return query response
     * @throws SolrServerException
-     * @throws NoOpenCoreException 
+     * @throws NoOpenCoreException
     */
    public QueryResponse query(SolrQuery sq) throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -433,11 +440,12 @@ class Server {
    /**
     * Execute solr query
     *
     * @param sq the query
     * @param method http method to use
     * @return query response
     * @throws SolrServerException
-     * @throws NoOpenCoreException 
+     * @throws NoOpenCoreException
     */
    public QueryResponse query(SolrQuery sq, SolrRequest.METHOD method) throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -448,10 +456,11 @@ class Server {
    /**
     * Execute Solr terms query
     *
     * @param sq the query
     * @return terms response
     * @throws SolrServerException
-     * @throws NoOpenCoreException 
+     * @throws NoOpenCoreException
     */
    public TermsResponse queryTerms(SolrQuery sq) throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -462,10 +471,11 @@ class Server {
    /**
     * Execute Solr query to get content text
     *
     * @param content to get the text for
     * @return content text string
     * @throws SolrServerException
-     * @throws NoOpenCoreException 
+     * @throws NoOpenCoreException
     */
    public String getSolrContent(final Content content) throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -473,14 +483,16 @@ class Server {
        }
        return currentCore.getSolrContent(content.getId(), 0);
    }
-    
+
    /**
     * Execute Solr query to get content text from content chunk
     *
     * @param content to get the text for
-     * @param chunkID chunk number to query (starting at 1), or 0 if there is no chunks for that content
+     * @param chunkID chunk number to query (starting at 1), or 0 if there is no
     * chunks for that content
     * @return content text string
     * @throws SolrServerException
-     * @throws NoOpenCoreException 
+     * @throws NoOpenCoreException
     */
    public String getSolrContent(final Content content, int chunkID) throws SolrServerException, NoOpenCoreException {
        if (currentCore == null) {
@ -490,15 +502,28 @@ class Server {
    }
    /**
-     * factory method to create ingester
+     * Method to return ingester instance
-     * @return ingester
+     *
     * @return ingester instance
     */
-    public Ingester getIngester() {
+    public static Ingester getIngester() {
-        return new Ingester();
+        return Ingester.getDefault();
    }
    /**
     * Given file parent id and child chunk ID, return the ID string of the chunk
     * as stored in Solr, e.g. FILEID_CHUNKID
     * @param parentID the parent file id (id of the source content)
     * @param childID the child chunk id
     * @return formatted string id
     */
    public static String getChunkIdString(long parentID, int childID) {
        return Long.toString(parentID) + Server.ID_CHUNK_SEP + Integer.toString(childID);
    }
    /**
     * Open a new core
     *
     * @param coreName name to refer to the core by in Solr
     * @param dataDir directory to load/store the core data from/to
     * @return new core
@ -574,13 +599,13 @@ class Server {
            }
        }
-        
+        private String getSolrContent(long contentID, int chunkID) {
         private String getSolrContent(long contentID, int chunkID) {
            final SolrQuery q = new SolrQuery();
            q.setQuery("*:*");
            String filterQuery = Schema.ID.toString() + ":" + contentID;
-            if (chunkID != 0)
+            if (chunkID != 0) {
                filterQuery = filterQuery + Server.ID_CHUNK_SEP + chunkID;
            }
            q.addFilterQuery(filterQuery);
            q.setFields(Schema.CONTENT.toString());
            try {
@ -602,11 +627,12 @@ class Server {
        }
        /**
-         * Execute query that gets only number of all Solr files (not chunks) indexed
+         * Execute query that gets only number of all Solr files (not chunks)
-         * without actually returning the files
+         * indexed without actually returning the files
-         * 
+         *
-         * @return int representing number of indexed files (entire files, not chunks)
+         * @return int representing number of indexed files (entire files, not
-         * @throws SolrServerException 
+         * chunks)
         * @throws SolrServerException
         */
        private int queryNumIndexedFiles() throws SolrServerException {
            SolrQuery q = new SolrQuery(Server.Schema.ID + ":*" + Server.ID_CHUNK_SEP + "*");
@ -614,14 +640,15 @@ class Server {
            int numChunks = (int) query(q).getResults().getNumFound();
            return queryNumIndexedDocuments() - numChunks;
        }
-        
+
        /**
         * Execute query that gets only number of all Solr documents indexed
-         * without actually returning the documents.  Documents include entire indexed files
+         * without actually returning the documents. Documents include entire
-         * as well as chunks, which are treated as documents.
+         * indexed files as well as chunks, which are treated as documents.
-         * 
+         *
-         * @return int representing number of indexed documents (entire files and chunks)
+         * @return int representing number of indexed documents (entire files
-         * @throws SolrServerException 
+         * and chunks)
         * @throws SolrServerException
         */
        private int queryNumIndexedDocuments() throws SolrServerException {
            SolrQuery q = new SolrQuery("*:*");
@ -631,9 +658,10 @@ class Server {
        /**
         * Return true if the file is indexed (either as a whole as a chunk)
         *
         * @param contentID
         * @return true if it is indexed
-         * @throws SolrServerException 
+         * @throws SolrServerException
         */
        private boolean queryIsIndexed(long contentID) throws SolrServerException {
            SolrQuery q = new SolrQuery("*:*");
@ -645,12 +673,15 @@ class Server {
        /**
         * Execute query that gets number of indexed file chunks for a file
-         * @param contentID file id of the original file broken into chunks and indexed
+         *
-         * @return int representing number of indexed file chunks, 0 if there is no chunks
+         * @param contentID file id of the original file broken into chunks and
-         * @throws SolrServerException 
+         * indexed
         * @return int representing number of indexed file chunks, 0 if there is
         * no chunks
         * @throws SolrServerException
         */
        private int queryNumFileChunks(long contentID) throws SolrServerException {
-            final SolrQuery q = 
+            final SolrQuery q =
                    new SolrQuery(Server.Schema.ID + ":" + Long.toString(contentID) + Server.ID_CHUNK_SEP + "*");
            q.setRows(0);
            return (int) query(q).getResults().getNumFound();
--- a/thunderbirdparser/nbproject/project.properties
+++ b/thunderbirdparser/nbproject/project.properties
@ -1,5 +1,3 @@
 file.reference.commons-lang-2.4.jar=release/modules/ext/commons-lang-2.4.jar
 file.reference.tika-core-1.1.jar=release/modules/ext/tika-core-1.1.jar
 file.reference.tika-parsers-1.1.jar=release/modules/ext/tika-parsers-1.1.jar
 javac.source=1.6
 javac.compilerargs=-Xlint -Xlint:-serial
--- a/thunderbirdparser/nbproject/project.xml
+++ b/thunderbirdparser/nbproject/project.xml
@ -53,17 +53,13 @@
            </module-dependencies>
            <public-packages/>
            <class-path-extension>
-                <runtime-relative-path>ext/tika-core-1.1.jar</runtime-relative-path>
+                <runtime-relative-path>ext/tika-core-0.10.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/tika-core-1.1.jar</binary-origin>
+                <binary-origin>release/modules/ext/tika-core-0.10.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-lang-2.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-lang-2.4.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/tika-parsers-1.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/tika-parsers-1.1.jar</binary-origin>
            </class-path-extension>
        </data>
    </configuration>
 </project>
--- a/thunderbirdparser/release/modules/ext/tika-core-0.10.jar
+++ b/thunderbirdparser/release/modules/ext/tika-core-0.10.jar
--- a/thunderbirdparser/release/modules/ext/tika-core-1.1.jar
+++ b/thunderbirdparser/release/modules/ext/tika-core-1.1.jar
--- a/thunderbirdparser/release/modules/ext/tika-parsers-1.1.jar
+++ b/thunderbirdparser/release/modules/ext/tika-parsers-1.1.jar
--- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java
+++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java
@ -17,7 +17,7 @@ import org.apache.tika.metadata.*;
 * @author arivera
 */
 public class ThunderbirdMetadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders,
-        IPTC, Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
+        Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
        Serializable {
    private int strArrCount = 0;