Merge latest

2025-07-17 18:17:43 +00:00 · 2017-01-11 12:20:16 -05:00 · 2017-01-11 12:20:16 -05:00 · 8518a208ff
commit 8518a208ff
parent 87e2ab623a c6adff9c59
12 changed files with 471 additions and 250 deletions
--- a/Core/src/org/sleuthkit/autopsy/datamodel/ImageNode.java
+++ b/Core/src/org/sleuthkit/autopsy/datamodel/ImageNode.java
@ -19,6 +19,8 @@
 package org.sleuthkit.autopsy.datamodel;
 import java.awt.event.ActionEvent;
 import java.beans.PropertyChangeEvent;
 import java.beans.PropertyChangeListener;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.ArrayList;
@ -27,6 +29,7 @@ import java.util.List;
 import java.util.logging.Level;
 import javax.swing.AbstractAction;
 import javax.swing.Action;
 import org.openide.nodes.Children;
 import org.openide.nodes.Sheet;
 import org.openide.util.NbBundle;
 import org.openide.util.NbBundle.Messages;
@ -35,11 +38,14 @@ import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.directorytree.ExplorerNodeActionVisitor;
 import org.sleuthkit.autopsy.directorytree.FileSearchAction;
 import org.sleuthkit.autopsy.directorytree.NewWindowViewAction;
 import org.sleuthkit.autopsy.ingest.IngestManager;
 import org.sleuthkit.autopsy.ingest.ModuleContentEvent;
 import org.sleuthkit.autopsy.ingest.RunIngestModulesDialog;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.Image;
 import org.sleuthkit.datamodel.SleuthkitCase.CaseDbQuery;
 import org.sleuthkit.datamodel.TskCoreException;
 import org.sleuthkit.datamodel.VirtualDirectory;
 /**
 * This class is used to represent the "Node" for the image. The children of
@ -71,6 +77,16 @@ public class ImageNode extends AbstractContentNode<Image> {
        String imgName = nameForImage(img);
        this.setDisplayName(imgName);
        this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/hard-drive-icon.jpg"); //NON-NLS
        // Listen for ingest events so that we can detect new added files (e.g. carved)
        IngestManager.getInstance().addIngestModuleEventListener(pcl);        
        // Listen for case events so that we can detect when case is closed
        Case.addPropertyChangeListener(pcl);
    }
    private void removeListeners() {
        IngestManager.getInstance().removeIngestModuleEventListener(pcl);
        Case.removePropertyChangeListener(pcl);
    }
    /**
@ -199,4 +215,46 @@ public class ImageNode extends AbstractContentNode<Image> {
    public String getItemType() {
        return getClass().getName();
    }
    private final PropertyChangeListener pcl = (PropertyChangeEvent evt) -> {
        String eventType = evt.getPropertyName();
        // See if the new file is a child of ours
        if (eventType.equals(IngestManager.IngestModuleEvent.CONTENT_CHANGED.toString())) {
            if ((evt.getOldValue() instanceof ModuleContentEvent) == false) {
                return;
            }
            ModuleContentEvent moduleContentEvent = (ModuleContentEvent) evt.getOldValue();
            if ((moduleContentEvent.getSource() instanceof Content) == false) {
                return;
            }
            Content newContent = (Content) moduleContentEvent.getSource();
            try {
                Content parent = newContent.getParent();
                if (parent != null) {
                    // Is this a new carved file?
                    if (parent.getName().equals(VirtualDirectory.NAME_CARVED)) {
                        // Was this new carved file produced from this image?
                        if (parent.getParent().getId() == getContent().getId()) {
                            Children children = getChildren();
                            if (children != null) {
                                ((ContentChildren) children).refreshChildren();
                                children.getNodesCount();
                            }
                        }
                    }
                }
            } catch (TskCoreException ex) {
                // Do nothing.
            }
        } else if (eventType.equals(Case.Events.CURRENT_CASE.toString())) {
            if (evt.getNewValue() == null) {
                // case was closed. Remove listeners so that we don't get called with a stale case handle
                removeListeners();
            }
        }
    };
 }
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/Bundle.properties
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/Bundle.properties
@ -82,7 +82,13 @@ ConfirmationDialog.Exit=Exit
 ConfirmationDialog.DoNotExit=Do Not Exit
 ConfirmationDialog.ConfirmExit=All incomplete copy jobs will be cancelled. Are you sure?
 ConfirmationDialog.ConfirmExitHeader=Confirm Exit
 OpenIDE-Module-Long-Description=\
    This module contains features that are being developed by Basis Technology and are not part of the default Autopsy distribution.  \
    You can enable this module to use the new features.  \
    The features should be stable, but their exact behavior and API are subject to change.  \n\n\
    We make no guarantee that the API of this module will not change, so developers should be careful when relying on it.
 OpenIDE-Module-Name=Experimental
 OpenIDE-Module-Short-Description=This module contains features that are being developed by Basis Technology and are not part of the default Autopsy distribution.
 ReviewModeCasePanel.bnRefresh.text=&Refresh
 ReviewModeCasePanel.bnOpen.text=&Open
 ReviewModeCasePanel.rbGroupLabel.text=Show Last 10:
--- a/KeywordSearch/release/solr/solr/configsets/AutopsyConfig/conf/schema.xml
+++ b/KeywordSearch/release/solr/solr/configsets/AutopsyConfig/conf/schema.xml
@ -524,6 +524,7 @@
   <!-- file chunk-specific fields (optional for others) -->
   <!-- for a parent file with no content, number of chunks are specified -->
   <field name="num_chunks" type="int" indexed="true" stored="true" required="false" />
   <field name="chunk_size" type="int" indexed="true" stored="true" required="false" />
   <!-- Common metadata fields, named specifically to match up with
     SolrCell metadata when parsing rich documents such as Word, PDF.
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java
@ -38,7 +38,7 @@ import org.sleuthkit.datamodel.TskCoreException;
 * Extracts text from artifacts by concatenating the values of all of the
 * artifact's attributes.
 */
-public class ArtifactTextExtractor extends TextExtractor<BlackboardArtifact> {
+class ArtifactTextExtractor implements TextExtractor<BlackboardArtifact> {
    static final private Logger logger = Logger.getLogger(ArtifactTextExtractor.class.getName());
    /**
@ -82,13 +82,16 @@ public class ArtifactTextExtractor extends TextExtractor<BlackboardArtifact> {
    }
    @Override
-    boolean isDisabled() {
+     public boolean isDisabled() {
        return false;
     }
     @Override
     public void logWarning(final String msg, Exception ex) {
        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
-
+    private InputStream getInputStream(BlackboardArtifact artifact) {
    @Override
    InputStream getInputStream(BlackboardArtifact artifact) {
        // Concatenate the string values of all attributes into a single
        // "content" string to be indexed.
        StringBuilder artifactContents = new StringBuilder();
@ -127,17 +130,17 @@ public class ArtifactTextExtractor extends TextExtractor<BlackboardArtifact> {
    }
    @Override
-    Reader getReader(InputStream stream, BlackboardArtifact source) throws Ingester.IngesterException {
+    public Reader getReader(BlackboardArtifact source) throws Ingester.IngesterException {
-        return new InputStreamReader(stream, StandardCharsets.UTF_8);
+        return new InputStreamReader(getInputStream(source), StandardCharsets.UTF_8);
    }
    @Override
-    long getID(BlackboardArtifact source) {
+    public long getID(BlackboardArtifact source) {
        return source.getArtifactID();
    }
    @Override
-    String getName(BlackboardArtifact source) {
+    public String getName(BlackboardArtifact source) {
        return source.getDisplayName() + "_" + source.getArtifactID();
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileTextExtractor.java
@ -18,7 +18,6 @@
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.InputStream;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.List;
@ -28,7 +27,7 @@ import org.sleuthkit.datamodel.AbstractFile;
 * Common methods for utilities that extract text and content and divide into
 * chunks
 */
-abstract class FileTextExtractor extends TextExtractor< AbstractFile> {
+abstract class FileTextExtractor implements TextExtractor< AbstractFile> {
    static final List<String> BLOB_MIME_TYPES
@ -96,17 +95,16 @@ abstract class FileTextExtractor extends TextExtractor< AbstractFile> {
    abstract boolean isSupported(AbstractFile file, String detectedFormat);
    @Override
-    abstract Reader getReader(InputStream stream, AbstractFile source) throws Ingester.IngesterException;
+    public abstract Reader getReader(AbstractFile source) throws Ingester.IngesterException;
    @Override
-    long getID(AbstractFile source) {
+    public long getID(AbstractFile source) {
        return source.getId();
    }
    @Override
-    String getName(AbstractFile source) {
+    public String getName(AbstractFile source) {
        return source.getName();
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
@ -19,16 +19,17 @@
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Arrays;
 import java.util.List;
 import java.util.logging.Level;
 import net.htmlparser.jericho.Attributes;
 import net.htmlparser.jericho.Renderer;
 import net.htmlparser.jericho.Source;
 import net.htmlparser.jericho.StartTag;
 import net.htmlparser.jericho.StartTagType;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.ReadContentInputStream;
@ -37,6 +38,7 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
 */
 class HtmlTextExtractor extends FileTextExtractor {
    static final private Logger logger = Logger.getLogger(HtmlTextExtractor.class.getName());
    private static final int MAX_SIZE = 50_000_000; //50MB
    static final List<String> WEB_MIME_TYPES = Arrays.asList(
@ -61,7 +63,9 @@ class HtmlTextExtractor extends FileTextExtractor {
    }
    @Override
-    Reader getReader(InputStream in, AbstractFile sourceFile) throws Ingester.IngesterException {
+    public Reader getReader(AbstractFile sourceFile) throws Ingester.IngesterException {
        ReadContentInputStream stream = new ReadContentInputStream(sourceFile);
        //Parse the stream with Jericho and put the results in a Reader
        try {
            StringBuilder scripts = new StringBuilder();
@ -75,7 +79,7 @@ class HtmlTextExtractor extends FileTextExtractor {
            int numComments = 0;
            int numOthers = 0;
-            Source source = new Source(in);
+            Source source = new Source(stream);
            source.fullSequentialParse();
            Renderer renderer = source.getRenderer();
            renderer.setNewLine("\n");
@ -158,12 +162,11 @@ class HtmlTextExtractor extends FileTextExtractor {
    }
    @Override
-    InputStream getInputStream(AbstractFile sourceFile1) {
+    public boolean isDisabled() {
        return new ReadContentInputStream(sourceFile1);
    }
    @Override
    boolean isDisabled() {
        return false;
    }
    public void logWarning(final String msg, Exception ex) {
        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@ -18,12 +18,15 @@
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import com.google.common.base.Utf8;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.logging.Level;
 import javax.annotation.concurrent.NotThreadSafe;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.common.SolrInputDocument;
 import org.openide.util.NbBundle;
@ -54,9 +57,7 @@ class Ingester {
    private final Server solrServer = KeywordSearch.getServer();
    private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
    private static Ingester instance;
-    private static final int MAX_EXTR_TEXT_CHARS = 512 * 1024; //chars
+    private static final int SINGLE_READ_CHARS = 512;
    private static final int SINGLE_READ_CHARS = 1024;
    private static final int EXTRA_CHARS = 128;
    private Ingester() {
    }
@ -120,6 +121,136 @@ class Ingester {
        return item.accept(SOLR_FIELDS_VISITOR);
    }
    /**
     * Use the given TextExtractor to extract text from the given source. The
     * text will be chunked and each chunk passed to Solr to add to the index.
     *
     *
     * @param <A>       The type of the Appendix provider that provides
     *                  additional text to append to the final chunk.
     * @param <T>       A subclass of SleuthkitVisibleItem.
     * @param extractor The TextExtractor that will be used to extract text from
     *                  the given source.
     * @param source    The source from which text will be extracted, chunked,
     *                  and indexed.
     * @param context   The ingest job context that can be used to cancel this
     *                  process.
     *
     * @return True if this method executed normally. or False if there was an
     *         unexpected exception. //JMTODO: This policy needs to be reviewed.
     *
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
     */
    < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
        final long sourceID = extractor.getID(source);
        final String sourceName = extractor.getName(source);
        int numChunks = 0; //unknown until chunking is done
        if (extractor.isDisabled()) {
            /* some Extrctors, notable the strings extractor, have options which
             * can be configured such that no extraction should be done */
            return true;
        }
        Map<String, String> fields = getContentFields(source);
        //Get a reader for the content of the given source
        try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
            Chunker chunker = new Chunker(reader);
            for (Chunk chunk : chunker) {
                String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
                fields.put(Server.Schema.ID.toString(), chunkId);
                fields.put(Server.Schema.CHUNK_SIZE.toString(), String.valueOf(chunk.getBaseChunkLength()));
                try {
                    //add the chunk text to Solr index
                    indexChunk(chunk.toString(), sourceName, fields);
                    numChunks++;
                } catch (Ingester.IngesterException ingEx) {
                    extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
                            + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
                    throw ingEx; //need to rethrow to signal error and move on
                } catch (Exception ex) {
                    throw new IngesterException(String.format("Error ingesting (indexing) file chunk: %s", chunkId), ex);
                }
            }
        } catch (IOException ex) {
            extractor.logWarning("Unable to read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
            return false;
        } catch (Exception ex) {
            extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
            return false;
        } finally {
            //after all chunks, index just the meta data, including the  numChunks, of the parent file
            fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
            fields.put(Server.Schema.ID.toString(), Long.toString(sourceID)); //reset id field to base document id
            indexChunk(null, sourceName, fields);
        }
        return true;
    }
    /**
     * Add one chunk as to the Solr index as a seperate sold document.
     *
     * TODO see if can use a byte or string streaming way to add content to
     * /update handler e.g. with XMLUpdateRequestHandler (deprecated in SOlr
     * 4.0.0), see if possible to stream with UpdateRequestHandler
     *
     * @param chunk  The chunk content as a string
     * @param fields
     * @param size
     *
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
     */
    private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
        if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
            //JMTODO: actually if the we couldn't get the image id it is set to -1,
            // but does this really mean we don't want to index it?
            //skip the file, image id unknown
            //JMTODO: does this need to ne internationalized?
            String msg = NbBundle.getMessage(Ingester.class,
                    "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
            logger.log(Level.SEVERE, msg);
            throw new IngesterException(msg);
        }
        //Make a SolrInputDocument out of the field map
        SolrInputDocument updateDoc = new SolrInputDocument();
        for (String key : fields.keySet()) {
            updateDoc.addField(key, fields.get(key));
        }
        //add the content to the SolrInputDocument
        //JMTODO: can we just add it to the field map before passing that in?
        updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
        try {
            //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
            solrServer.addDocument(updateDoc);
            uncommitedIngests = true;
        } catch (KeywordSearchModuleException ex) {
            //JMTODO: does this need to ne internationalized?
            throw new IngesterException(
                    NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
        }
    }
    /**
     * Tells Solr to commit (necessary before ingested files will appear in
     * searches)
     */
    void commit() {
        try {
            solrServer.commit();
            uncommitedIngests = false;
        } catch (NoOpenCoreException | SolrServerException ex) {
            logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
        }
    }
    /**
     * Visitor used to create fields to send to SOLR index.
     */
@ -221,192 +352,6 @@ class Ingester {
        }
    }
    /**
     * Use the given TextExtractor to extract text from the given source. The
     * text will be chunked and each chunk passed to Solr to add to the index.
     *
     *
     * @param <A>       The type of the Appendix provider that provides
     *                  additional text to append to the final chunk.
     * @param <T>       A subclass of SleuthkitVisibleItem.
     * @param extractor The TextExtractor that will be used to extract text from
     *                  the given source.
     * @param source    The source from which text will be extracted, chunked,
     *                  and indexed.
     * @param context   The ingest job context that can be used to cancel this
     *                  process.
     *
     * @return True if this method executed normally. or False if there was an
     *         unexpected exception. //JMTODO: This policy needs to be reviewed.
     *
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
     */
    < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
        final long sourceID = extractor.getID(source);
        final String sourceName = extractor.getName(source);
        int numChunks = 0; //unknown until chunking is done
        if (extractor.isDisabled()) {
            /* some Extrctors, notable the strings extractor, have options which
             * can be configured such that no extraction should be done */
            return true;
        }
        Map<String, String> fields = getContentFields(source);
        //Get a stream and a reader for that stream
        try (final InputStream stream = extractor.getInputStream(source);
                Reader reader = extractor.getReader(stream, source);) {
            //we read max 1024 chars at time, this seems to max what some Readers would return
            char[] textChunkBuf = new char[MAX_EXTR_TEXT_CHARS];
            boolean eof = false;  //have we read until the end of the file yet
            while (!eof) {
                int chunkSizeInChars = 0;  // the size in chars of the chunk (so far)
                if (context != null && context.fileIngestIsCancelled()) {
                    return true;
                }
                long charsRead = 0;  // number of chars read in the most recent read operation
                //consume bytes to fill entire chunk (but leave EXTRA_CHARS to end the word)
                while ((chunkSizeInChars < MAX_EXTR_TEXT_CHARS - SINGLE_READ_CHARS - EXTRA_CHARS)
                        && (charsRead = reader.read(textChunkBuf, chunkSizeInChars, SINGLE_READ_CHARS)) != -1) {
                    chunkSizeInChars += charsRead;
                }
                if (charsRead == -1) {
                    //this is the last chunk
                    eof = true;
                } else {
                    chunkSizeInChars += charsRead;
                    //if we haven't reached the end of the file,
                    //try to read char-by-char until whitespace to not break words
                    while ((chunkSizeInChars < MAX_EXTR_TEXT_CHARS - 1)
                            && (Character.isWhitespace(textChunkBuf[chunkSizeInChars - 1]) == false)
                            && (charsRead = reader.read(textChunkBuf, chunkSizeInChars, 1)) != -1) {
                        chunkSizeInChars += charsRead;
                    }
                    if (charsRead == -1) {
                        //this is the last chunk
                        eof = true;
                    }
                }
                StringBuilder sb = new StringBuilder(chunkSizeInChars)
                        .append(textChunkBuf, 0, chunkSizeInChars);
                sanitizeToUTF8(sb);   //replace non UTF8 chars with '^'
                String chunkId = Server.getChunkIdString(sourceID, numChunks + 1);
                fields.put(Server.Schema.ID.toString(), chunkId);
                try {
                    //pass the chunk to method that adds it to Solr index
                    indexChunk(sb.toString(), sourceName, fields);
                    numChunks++;
                } catch (Ingester.IngesterException ingEx) {
                    extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
                            + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
                    throw ingEx; //need to rethrow to signal error and move on
                } catch (Exception ex) {
                    throw new IngesterException(String.format("Error ingesting (indexing) file chunk: %s", chunkId), ex);
                }
            }
        } catch (IOException ex) {
            extractor.logWarning("Unable to read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
            return false;
        } catch (Exception ex) {
            extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
            return false;
        } finally {
            //after all chunks, index just the meta data, including the  numChunks, of the parent file
            fields.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(numChunks));
            fields.put(Server.Schema.ID.toString(), Long.toString(sourceID)); //reset id field to base document id
            indexChunk(null, sourceName, fields);
        }
        return true;
    }
    /**
     * Sanitize the given StringBuilder by replacing non-UTF-8 characters with
     * caret '^'
     *
     * @param sb the StringBuilder to sanitize
     *
     * //JMTODO: use Charsequence.chars() or codePoints() and then a mapping
     * function?
     */
    private static void sanitizeToUTF8(StringBuilder sb) {
        final int length = sb.length();
        // Sanitize by replacing non-UTF-8 characters with caret '^'
        for (int i = 0; i < length; i++) {
            if (TextUtil.isValidSolrUTF8(sb.charAt(i)) == false) {
                sb.replace(i, i + 1, "^");
            }
        }
    }
    /**
     * Add one chunk as to the Solr index as a seperate sold document.
     *
     * TODO see if can use a byte or string streaming way to add content to
     * /update handler e.g. with XMLUpdateRequestHandler (deprecated in SOlr
     * 4.0.0), see if possible to stream with UpdateRequestHandler
     *
     * @param chunk  The chunk content as a string
     * @param fields
     * @param size
     *
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
     */
    private void indexChunk(String chunk, String sourceName, Map<String, String> fields) throws IngesterException {
        if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
            //JMTODO: actually if the we couldn't get the image id it is set to -1,
            // but does this really mean we don't want to index it?
            //skip the file, image id unknown
            //JMTODO: does this need to ne internationalized?
            String msg = NbBundle.getMessage(Ingester.class,
                    "Ingester.ingest.exception.unknownImgId.msg", sourceName); //JMTODO: does this need to ne internationalized?
            logger.log(Level.SEVERE, msg);
            throw new IngesterException(msg);
        }
        //Make a SolrInputDocument out of the field map
        SolrInputDocument updateDoc = new SolrInputDocument();
        for (String key : fields.keySet()) {
            updateDoc.addField(key, fields.get(key));
        }
        //add the content to the SolrInputDocument
        //JMTODO: can we just add it to the field map before passing that in?
        updateDoc.addField(Server.Schema.CONTENT.toString(), chunk);
        try {
            //TODO: consider timeout thread, or vary socket timeout based on size of indexed content
            solrServer.addDocument(updateDoc);
            uncommitedIngests = true;
        } catch (KeywordSearchModuleException ex) {
            //JMTODO: does this need to ne internationalized?
            throw new IngesterException(
                    NbBundle.getMessage(Ingester.class, "Ingester.ingest.exception.err.msg", sourceName), ex);
        }
    }
    /**
     * Tells Solr to commit (necessary before ingested files will appear in
     * searches)
     */
    void commit() {
        try {
            solrServer.commit();
            uncommitedIngests = false;
        } catch (NoOpenCoreException | SolrServerException ex) {
            logger.log(Level.WARNING, "Error commiting index", ex); //NON-NLS
        }
    }
    /**
     * Indicates that there was an error with the specific ingest operation, but
     * it's still okay to continue ingesting files.
@ -424,3 +369,211 @@ class Ingester {
        }
    }
 }
 /**
 * Encapsulates the content chunking algorithm in an implementation of the
 * Iterator interface. Also implements Iterable so it can be used directly in a
 * for loop. The base chunk is the part of the chunk before the overlapping
 * window. The window will be included at the end of the current chunk as well
 * as at the beginning of the next chunk.
 */
@NotThreadSafe
 class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
    //Chunking algorithm paramaters-------------------------------------//
    /** the maximum size of a chunk, including the window. */
    private static final int MAX_TOTAL_CHUNK_SIZE = 32766; //bytes
    /** the minimum to read before we start the process of looking for
     * whitespace to break at and creating an overlapping window. */
    private static final int MINIMUM_BASE_CHUNK_SIZE = 30 * 1024; //bytes
    /** The maximum size of the chunk, before the overlapping window, even if we
     * couldn't find whitespace to break at. */
    private static final int MAXIMUM_BASE_CHUNK_SIZE = 31 * 1024; //bytes
    /** The amount of text we will read through before we give up on finding
     * whitespace to break the chunk/window at. */
    private static final int WHITE_SPACE_BUFFER_SIZE = 512; //bytes
    /** The number of characters to read in one go from the Reader. */
    private static final int READ_CHARS_BUFFER_SIZE = 512; //chars
    ////chunker state--------------------------------------------///
    /** The Reader that this chunk reads from, and divides into chunks. It must
     * be a buffered reader to ensure that mark/reset are supported. */
    private final BufferedReader reader;
    /** The local buffer of characters read from the Reader. */
    private final char[] tempChunkBuf = new char[READ_CHARS_BUFFER_SIZE];
    /** number of chars read in the most recent read operation. */
    private int charsRead = 0;
    /** The text of the current chunk (so far). */
    private StringBuilder currentChunk;
    /** the size in bytes of the chunk (so far). */
    private int chunkSizeBytes = 0;
    /** the size in chars of the (base) chunk (so far). */
    private int baseChunkSizeChars;
    /** has the chunker found whitespace to break on? */
    private boolean whitespaceFound = false;
    /** has the chunker reached the end of the Reader? If so, there are no more
     * chunks, and the current chunk does not need a window. */
    private boolean endOfReaderReached = false;
    /**
     * Create a Chunker that will chunk the content of the given Reader.
     *
     * @param reader The content to chunk.
     */
    Chunker(BufferedReader reader) {
        this.reader = reader;
    }
    @Override
    public Iterator<Chunk> iterator() {
        return this;
    }
    @Override
    public boolean hasNext() {
        return endOfReaderReached == false;
    }
    /**
     * Sanitize the given StringBuilder by replacing non-UTF-8 characters with
     * caret '^'
     *
     * @param sb the StringBuilder to sanitize
     *
     * //JMTODO: use Charsequence.chars() or codePoints() and then a mapping
     * function?
     */
    private static StringBuilder sanitizeToUTF8(StringBuilder sb) {
        final int length = sb.length();
        for (int i = 0; i < length; i++) {
            if (TextUtil.isValidSolrUTF8(sb.charAt(i)) == false) {
                sb.replace(i, i + 1, "^");
            }
        }
        return sb;
    }
    @Override
    public Chunk next() {
        if (endOfReaderReached) {
            throw new NoSuchElementException("There are no more chunks.");
        }
        //reset state for the next chunk
        currentChunk = new StringBuilder();
        chunkSizeBytes = 0;
        baseChunkSizeChars = 0;
        try {
            readBaseChunk();
            baseChunkSizeChars = currentChunk.length();
            reader.mark(2048); //mark the reader so we can rewind the reader here to begin the next chunk
            readWindow();
        } catch (IOException ioEx) {
            throw new RuntimeException("IOException while reading chunk.", ioEx);
        }
        try {
            reader.reset(); //reset the reader the so the next chunk can begin at the position marked above
        } catch (IOException ex) {
            throw new RuntimeException("IOException while resetting chunk reader.", ex);
        }
        if (endOfReaderReached) {
            /* if we have reached the end of the content,we won't make another
             * overlapping chunk, so the base chunk can be extended to the end. */
            baseChunkSizeChars = currentChunk.length();
        }
        //sanitize the text and return a Chunk object, that includes the base chunk length.
        return new Chunk(sanitizeToUTF8(currentChunk), baseChunkSizeChars);
    }
    /**
     * Read the base chunk from the reader, and attempt to break at whitespace.
     *
     * @throws IOException if there is a problem reading from the reader.
     */
    private void readBaseChunk() throws IOException {
        //read the chunk until the minimum base chunk size
        readHelper(MINIMUM_BASE_CHUNK_SIZE, false);
        //keep reading until the maximum base chunk size or white space is reached.
        whitespaceFound = false;
        readHelper(MAXIMUM_BASE_CHUNK_SIZE, true);
    }
    /**
     * Read the window from the reader, and attempt to break at whitespace.
     *
     * @throws IOException if there is a problem reading from the reader.
     */
    private void readWindow() throws IOException {
        //read the window, leaving some room to look for white space to break at.
        int windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, chunkSizeBytes + 1024);
        readHelper(windowEnd, false);
        whitespaceFound = false;
        //keep reading until the max chunk size, or until whitespace is reached.
        windowEnd = Math.min(MAX_TOTAL_CHUNK_SIZE, chunkSizeBytes + 1024);
        readHelper(windowEnd, true);
    }
    /** Helper method that implements reading in a loop.
     *
     * @param maxBytes           The max cummulative length of the content,in
     *                           bytes, to read from the Reader. That is, when
     *                           chunkSizeBytes >= maxBytes stop reading.
     * @param inWhiteSpaceBuffer Should the current read stop once whitespace is
     *                           found?
     *
     * @throws IOException If there is a problem reading from the Reader.
     */
    private void readHelper(int maxBytes, boolean inWhiteSpaceBuffer) throws IOException {
        //only read one character at a time if we are looking for whitespace.
        final int readSize = inWhiteSpaceBuffer ? 1 : READ_CHARS_BUFFER_SIZE;
        //read chars up to maxBytes, whitespaceFound if also inWhiteSpaceBuffer, or we reach the end of the reader.
        while ((chunkSizeBytes < maxBytes)
                && (false == (inWhiteSpaceBuffer && whitespaceFound))
                && (endOfReaderReached == false)) {
            charsRead = reader.read(tempChunkBuf, 0, readSize);
            if (-1 == charsRead) {
                //this is the last chunk
                endOfReaderReached = true;
            } else {
                if (inWhiteSpaceBuffer) {
                    //chec for whitespace.
                    whitespaceFound = Character.isWhitespace(tempChunkBuf[0]);
                }
                //add read chars to the chunk and update the length.
                String chunkSegment = new String(tempChunkBuf, 0, charsRead);
                chunkSizeBytes += Utf8.encodedLength(chunkSegment);
                currentChunk.append(chunkSegment);
            }
        }
    }
 }
 /**
 * Represents one chunk as the text in it and the length of the base chunk, in
 * chars.
 */
 class Chunk {
    private final StringBuilder sb;
    private final int chunksize;
    Chunk(StringBuilder sb, int baseChunkLength) {
        this.sb = sb;
        this.chunksize = baseChunkLength;
    }
    @Override
    public String toString() {
        return sb.toString();
    }
    int getBaseChunkLength() {
        return chunksize;
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
@ -158,6 +158,12 @@ public class Server {
                return "num_chunks"; //NON-NLS
            }
        },
        CHUNK_SIZE {
            @Override
            public String toString() {
                return "chunk_size"; //NON-NLS
            }
        }
    };
    public static final String HL_ANALYZE_CHARS_UNLIMITED = "500000"; //max 1MB in a chunk. use -1 for unlimited, but -1 option may not be supported (not documented)
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
@ -19,20 +19,20 @@
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.IOException;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.openide.util.NbBundle;
 import java.net.InetAddress;
 import java.util.List;
 import java.util.MissingResourceException;
-import org.sleuthkit.autopsy.core.RuntimeProperties;
+import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
 import org.openide.util.NbBundle;
 import org.openide.util.lookup.ServiceProvider;
 import org.openide.util.lookup.ServiceProviders;
 import org.sleuthkit.autopsy.core.RuntimeProperties;
 import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.TskCoreException;
 import org.sleuthkit.autopsy.corecomponentinterfaces.AutopsyService;
 /**
 * An implementation of the KeywordSearchService interface that uses Solr for
@ -47,7 +47,7 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService
    private static final String BAD_IP_ADDRESS_FORMAT = "ioexception occurred when talking to server"; //NON-NLS
    private static final String SERVER_REFUSED_CONNECTION = "server refused connection"; //NON-NLS
    private static final int IS_REACHABLE_TIMEOUT_MS = 1000;
-    private static final String SERVICE_NAME = "Solr Search Service";
+    private static final String SERVICE_NAME = "Solr Keyword Search Service";
    ArtifactTextExtractor extractor = new ArtifactTextExtractor();
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.logging.Level;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.StringExtract;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
@ -37,6 +38,8 @@ import org.sleuthkit.datamodel.TskException;
 */
 class StringsTextExtractor extends FileTextExtractor {
    static final private Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
    /**
     * Options for this extractor
     */
@ -92,7 +95,12 @@ class StringsTextExtractor extends FileTextExtractor {
    }
    @Override
-    boolean isDisabled() {
+    public void logWarning(final String msg, Exception ex) {
        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
    @Override
    public boolean isDisabled() {
        boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
        boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
@ -100,11 +108,11 @@ class StringsTextExtractor extends FileTextExtractor {
    }
    @Override
-    InputStreamReader getReader(final InputStream stringStream, AbstractFile sourceFile) throws Ingester.IngesterException {
+    public InputStreamReader getReader(AbstractFile sourceFile) throws Ingester.IngesterException {
        InputStream stringStream = getInputStream(sourceFile);
        return new InputStreamReader(stringStream, Server.DEFAULT_INDEXED_TEXT_CHARSET);
    }
    @Override
    InputStream getInputStream(AbstractFile sourceFile) {
        //check which extract stream to use
        if (extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)) {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
@ -18,10 +18,7 @@
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.InputStream;
 import java.io.Reader;
 import java.util.logging.Level;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
 /**
@ -31,9 +28,8 @@ import org.sleuthkit.datamodel.SleuthkitVisitableItem;
 * @param <TextSource> The subtype of SleuthkitVisitableItem an implementation
 *                     is able to process.
 */
-abstract class TextExtractor< TextSource extends SleuthkitVisitableItem> {
+interface TextExtractor< TextSource extends SleuthkitVisitableItem> {
    static final private Logger logger = Logger.getLogger(TextExtractor.class.getName());
    /**
     * Is this extractor configured such that no extraction will/should be done?
@ -48,18 +44,8 @@ abstract class TextExtractor< TextSource extends SleuthkitVisitableItem> {
     * @param msg
     * @param ex
     */
-    void logWarning(String msg, Exception ex) {
+    abstract void logWarning(String msg, Exception ex);
        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
    /**
     * Get an input stream over the content of the given source.
     *
     * @param source
     *
     * @return
     */
    abstract InputStream getInputStream(TextSource source);
    /**
     * Get a reader that over the text extracted from the given source.
@ -71,7 +57,7 @@ abstract class TextExtractor< TextSource extends SleuthkitVisitableItem> {
     *
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
     */
-    abstract Reader getReader(InputStream stream, TextSource source) throws Ingester.IngesterException;
+    abstract Reader getReader(TextSource source) throws Ingester.IngesterException;
    /**
     * Get the 'object' id of the given source.
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
@ -20,7 +20,6 @@ package org.sleuthkit.autopsy.keywordsearch;
 import com.google.common.io.CharSource;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.util.List;
 import java.util.MissingResourceException;
@ -36,6 +35,7 @@ import org.apache.tika.Tika;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.ParseContext;
 import org.openide.util.NbBundle;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.ReadContentInputStream;
@ -46,6 +46,7 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
 */
 class TikaTextExtractor extends FileTextExtractor {
    static final private Logger logger = Logger.getLogger(TikaTextExtractor.class.getName());
    private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
    private static final List<String> TIKA_SUPPORTED_TYPES
@ -55,13 +56,15 @@ class TikaTextExtractor extends FileTextExtractor {
            .collect(Collectors.toList());
    @Override
-    void logWarning(final String msg, Exception ex) {
+    public void logWarning(final String msg, Exception ex) {
        KeywordSearch.getTikaLogger().log(Level.WARNING, msg, ex);
-        super.logWarning(msg, ex);
+        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
    @Override
-    Reader getReader(final InputStream stream, AbstractFile sourceFile) throws IngesterException, MissingResourceException {
+    public Reader getReader(AbstractFile sourceFile) throws IngesterException, MissingResourceException {
        ReadContentInputStream stream = new ReadContentInputStream(sourceFile);
        Metadata metadata = new Metadata();
        //Parse the file in a task, a convenient way to have a timeout...
        final Future<Reader> future = tikaParseExecutor.submit(() -> new Tika().parse(stream, metadata));
@ -117,13 +120,9 @@ class TikaTextExtractor extends FileTextExtractor {
        return TIKA_SUPPORTED_TYPES.contains(detectedFormat);
    }
    @Override
    InputStream getInputStream(AbstractFile sourceFile1) {
        return new ReadContentInputStream(sourceFile1);
    }
    @Override
-    boolean isDisabled() {
+    public boolean isDisabled() {
        return false;
    }