Merge pull request #4330 from dannysmyda/4425-text-abstraction-impl

4425 - Move TextExtractors out of KWS and into Core.
2025-07-14 17:06:16 +00:00 · 2018-12-10 16:49:27 -05:00 · 2018-12-10 16:49:27 -05:00 · 14d6c52e09
commit 14d6c52e09
parent 7bd473c6d5 32a6066a75
23 changed files with 1233 additions and 754 deletions
--- a/Core/ivy.xml
+++ b/Core/ivy.xml
@ -35,6 +35,11 @@
 		<dependency conf="core->default" org="com.fasterxml.jackson.core" name="jackson-core" rev="2.9.7"/>
        <dependency conf="core->default" org="commons-validator" name="commons-validator" rev="1.6"/>
        <dependency conf="core->default" org="net.htmlparser.jericho" name="jericho-html" rev="3.3"/>
        <!-- Tika 1.14 seems to declare a (transitive?) dependency on cleartk-util 3.2.2, but the most recent
        version available is 2.0.0  Overriding the version worked-->
        <override  org="org.cleartk" module="cleartk-util" rev="2.0.0"/>
    </dependencies>
 </ivy-module>
--- a/Core/nbproject/project.properties
+++ b/Core/nbproject/project.properties
@ -1,26 +1,59 @@
 file.reference.activemq-all-5.11.1.jar=release/modules/ext/activemq-all-5.11.1.jar
 file.reference.apache-mime4j-core-0.8.1.jar=release/modules/ext/apache-mime4j-core-0.8.1.jar
 file.reference.apache-mime4j-dom-0.8.1.jar=release/modules/ext/apache-mime4j-dom-0.8.1.jar
 file.reference.asm-5.0.4.jar=release/modules/ext/asm-5.0.4.jar
 file.reference.bcmail-jdk15on-1.54.jar=release/modules/ext/bcmail-jdk15on-1.54.jar
 file.reference.bcprov-jdk15on-1.54.jar=release/modules/ext/bcprov-jdk15on-1.54.jar
 file.reference.boilerpipe-1.1.0.jar=release/modules/ext/boilerpipe-1.1.0.jar
 file.reference.c3p0-0.9.5.jar=release/modules/ext/c3p0-0.9.5.jar
 file.reference.cdm-4.5.5.jar=release/modules/ext/cdm-4.5.5.jar
 file.reference.commons-codec-1.6.jar=release/modules/ext/commons-codec-1.6.jar
 file.reference.commons-compress-1.14.jar=release/modules/ext/commons-compress-1.14.jar
-file.reference.commons-dbcp2-2.1.1.jar=release\\modules\\ext\\commons-dbcp2-2.1.1.jar
+file.reference.commons-dbcp2-2.1.1.jar=release/modules/ext/commons-dbcp2-2.1.1.jar
-file.reference.commons-pool2-2.4.2.jar=release\\modules\\ext\\commons-pool2-2.4.2.jar
+file.reference.commons-io-2.5.jar=release/modules/ext/commons-io-2.5.jar
 file.reference.commons-pool2-2.4.2.jar=release/modules/ext/commons-pool2-2.4.2.jar
 file.reference.dd-plist-1.20.jar=release/modules/ext/dd-plist-1.20.jar
 file.reference.geoapi-3.0.0.jar=release/modules/ext/geoapi-3.0.0.jar
 file.reference.grib-4.5.5.jar=release/modules/ext/grib-4.5.5.jar
 file.reference.gson-2.8.1.jar=release/modules/ext/gson-2.8.1.jar
 file.reference.httpservices-4.5.5.jar=release/modules/ext/httpservices-4.5.5.jar
 file.reference.isoparser-1.1.18.jar=release/modules/ext/isoparser-1.1.18.jar
 file.reference.jackcess-2.2.0.jar=release/modules/ext/jackcess-2.2.0.jar
 file.reference.jackcess-encrypt-2.1.4.jar=release/modules/ext/jackcess-encrypt-2.1.4.jar
 file.reference.java-libpst-0.8.1.jar=release/modules/ext/java-libpst-0.8.1.jar
 file.reference.jcl-over-slf4j-1.7.24.jar=release/modules/ext/jcl-over-slf4j-1.7.24.jar
 file.reference.jackson-core-2.9.7.jar=release/modules/ext/jackson-core-2.9.7.jar
 file.reference.jdom-2.0.5-contrib.jar=release/modules/ext/jdom-2.0.5-contrib.jar
 file.reference.jdom-2.0.5.jar=release/modules/ext/jdom-2.0.5.jar
 file.reference.jericho-html-3.3.jar=release/modules/ext/jericho-html-3.3.jar
 file.reference.jgraphx-v3.8.0.jar=release/modules/ext/jgraphx-v3.8.0.jar
 file.reference.jhighlight-1.0.2.jar=release/modules/ext/jhighlight-1.0.2.jar
 file.reference.jmatio-1.2.jar=release/modules/ext/jmatio-1.2.jar
 file.reference.json-1.8.jar=release/modules/ext/json-1.8.jar
 file.reference.json-simple-1.1.1.jar=release/modules/ext/json-simple-1.1.1.jar
 file.reference.jsoup-1.10.3.jar=release/modules/ext/jsoup-1.10.3.jar
 file.reference.jul-to-slf4j-1.7.24.jar=release/modules/ext/jul-to-slf4j-1.7.24.jar
 file.reference.juniversalchardet-1.0.3.jar=release/modules/ext/juniversalchardet-1.0.3.jar
 file.reference.junrar-0.7.jar=release/modules/ext/junrar-0.7.jar
 file.reference.jython-standalone-2.7.0.jar=release/modules/ext/jython-standalone-2.7.0.jar
 file.reference.mchange-commons-java-0.2.9.jar=release/modules/ext/mchange-commons-java-0.2.9.jar
 file.reference.metadata-extractor-2.10.1.jar=release/modules/ext/metadata-extractor-2.10.1.jar
 file.reference.netcdf4-4.5.5.jar=release/modules/ext/netcdf4-4.5.5.jar
 file.reference.opennlp-tools-1.8.3.jar=release/modules/ext/opennlp-tools-1.8.3.jar
 file.reference.poi-3.17.jar=release/modules/ext/poi-3.17.jar
 file.reference.poi-ooxml-3.17.jar=release/modules/ext/poi-ooxml-3.17.jar
 file.reference.poi-scratchpad-3.17.jar=release/modules/ext/poi-scratchpad-3.17.jar
 file.reference.postgresql-9.4.1211.jre7.jar=release/modules/ext/postgresql-9.4.1211.jre7.jar
 file.reference.Rejistry-1.0-SNAPSHOT.jar=release/modules/ext/Rejistry-1.0-SNAPSHOT.jar
 file.reference.rome-1.5.1.jar=release/modules/ext/rome-1.5.1.jar
 file.reference.sevenzipjbinding-AllPlatforms.jar=release/modules/ext/sevenzipjbinding-AllPlatforms.jar
 file.reference.sevenzipjbinding.jar=release/modules/ext/sevenzipjbinding.jar
-file.reference.sqlite-jdbc-3.8.11.jar=release\\modules\\ext\\sqlite-jdbc-3.8.11.jar
+file.reference.sis-metadata-0.6.jar=release/modules/ext/sis-metadata-0.6.jar
 file.reference.sis-netcdf-0.6.jar=release/modules/ext/sis-netcdf-0.6.jar
 file.reference.sis-utility-0.6.jar=release/modules/ext/sis-utility-0.6.jar
 file.reference.slf4j-api-1.7.24.jar=release/modules/ext/slf4j-api-1.7.24.jar
 file.reference.sqlite-jdbc-3.8.11.jar=release/modules/ext/sqlite-jdbc-3.8.11.jar
 file.reference.StixLib.jar=release/modules/ext/StixLib.jar
 file.reference.bcprov-jdk15on-1.54.jar=release/modules/ext/bcprov-jdk15on-1.54.jar
 file.reference.jackcess-2.2.0.jar=release/modules/ext/jackcess-2.2.0.jar
 file.reference.jackcess-encrypt-2.1.4.jar=release/modules/ext/jackcess-encrypt-2.1.4.jar
 file.reference.jempbox-1.8.13.jar=release/modules/ext/jempbox-1.8.13.jar
 file.reference.javax.ws.rs-api-2.0.1.jar=release/modules/ext/javax.ws.rs-api-2.0.1.jar
 file.reference.cxf-core-3.0.16.jar=release/modules/ext/cxf-core-3.0.16.jar
@ -31,11 +64,14 @@ file.reference.fontbox-2.0.8.jar=release/modules/ext/fontbox-2.0.8.jar
 file.reference.pdfbox-2.0.8.jar=release/modules/ext/pdfbox-2.0.8.jar
 file.reference.pdfbox-tools-2.0.8.jar=release/modules/ext/pdfbox-tools-2.0.8.jar
 file.reference.sleuthkit-postgresql-4.6.4.jar=release/modules/ext/sleuthkit-postgresql-4.6.4.jar
 file.reference.tagsoup-1.2.1.jar=release/modules/ext/tagsoup-1.2.1.jar
 file.reference.tika-core-1.17.jar=release/modules/ext/tika-core-1.17.jar
 file.reference.tika-parsers-1.17.jar=release/modules/ext/tika-parsers-1.17.jar
 file.reference.curator-client-2.8.0.jar=release/modules/ext/curator-client-2.8.0.jar
 file.reference.curator-framework-2.8.0.jar=release/modules/ext/curator-framework-2.8.0.jar
 file.reference.curator-recipes-2.8.0.jar=release/modules/ext/curator-recipes-2.8.0.jar
 file.reference.vorbis-java-core-0.8.jar=release/modules/ext/vorbis-java-core-0.8.jar
 file.reference.vorbis-java-tika-0.8.jar=release/modules/ext/vorbis-java-tika-0.8.jar
 file.reference.xmpcore-5.1.3.jar=release/modules/ext/xmpcore-5.1.3.jar
 file.reference.xz-1.6.jar=release/modules/ext/xz-1.6.jar
 file.reference.zookeeper-3.4.6.jar=release/modules/ext/zookeeper-3.4.6.jar
--- a/Core/nbproject/project.xml
+++ b/Core/nbproject/project.xml
@ -338,81 +338,59 @@
                <package>org.sleuthkit.autopsy.modules.vmextractor</package>
                <package>org.sleuthkit.autopsy.progress</package>
                <package>org.sleuthkit.autopsy.report</package>
                <package>org.sleuthkit.autopsy.textextractors</package>
                <package>org.sleuthkit.autopsy.textextractors.extractionconfigs</package>
                <package>org.sleuthkit.autopsy.texttranslation</package>
                <package>org.sleuthkit.datamodel</package>
            </public-packages>
            <class-path-extension>
                <runtime-relative-path>ext/apache-mime4j-dom-0.8.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/apache-mime4j-dom-0.8.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jackcess-2.2.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jackcess-2.2.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/zookeeper-3.4.6.jar</runtime-relative-path>
+                <runtime-relative-path>ext/jericho-html-3.3.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/zookeeper-3.4.6.jar</binary-origin>
+                <binary-origin>release/modules/ext/jericho-html-3.3.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/jdom-2.0.5.jar</runtime-relative-path>
+                <runtime-relative-path>ext/cdm-4.5.5.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/jdom-2.0.5.jar</binary-origin>
+                <binary-origin>release/modules/ext/cdm-4.5.5.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/cxf-rt-transports-http-3.0.16.jar</runtime-relative-path>
+                <runtime-relative-path>ext/httpservices-4.5.5.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/cxf-rt-transports-http-3.0.16.jar</binary-origin>
+                <binary-origin>release/modules/ext/httpservices-4.5.5.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-validator-1.6.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-validator-1.6.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/curator-framework-2.8.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/curator-framework-2.8.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/bcprov-jdk15on-1.54.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/bcprov-jdk15on-1.54.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-compress-1.14.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-compress-1.14.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/fontbox-2.0.8.jar</runtime-relative-path>
+                <runtime-relative-path>ext/geoapi-3.0.0.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/fontbox-2.0.8.jar</binary-origin>
+                <binary-origin>release/modules/ext/geoapi-3.0.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/commons-dbcp2-2.1.1.jar</runtime-relative-path>
+                <runtime-relative-path>ext/boilerpipe-1.1.0.jar</runtime-relative-path>
-                <binary-origin>release\modules\ext\commons-dbcp2-2.1.1.jar</binary-origin>
+                <binary-origin>release/modules/ext/boilerpipe-1.1.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jgraphx-v3.8.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jgraphx-v3.8.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jython-standalone-2.7.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jython-standalone-2.7.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/sevenzipjbinding.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/sevenzipjbinding.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/sleuthkit-postgresql-4.6.4.jar</runtime-relative-path>
+                <runtime-relative-path>ext/bcmail-jdk15on-1.54.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/sleuthkit-postgresql-4.6.4.jar</binary-origin>
+                <binary-origin>release/modules/ext/bcmail-jdk15on-1.54.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/mchange-commons-java-0.2.9.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/mchange-commons-java-0.2.9.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/cxf-core-3.0.16.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/cxf-core-3.0.16.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/javax.ws.rs-api-2.0.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/javax.ws.rs-api-2.0.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/postgresql-9.4.1211.jre7.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/postgresql-9.4.1211.jre7.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/curator-recipes-2.8.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/curator-recipes-2.8.0.jar</binary-origin>
@ -421,6 +399,14 @@
                <runtime-relative-path>ext/metadata-extractor-2.10.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/metadata-extractor-2.10.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/apache-mime4j-core-0.8.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/apache-mime4j-core-0.8.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/tagsoup-1.2.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/tagsoup-1.2.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/tika-core-1.17.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/tika-core-1.17.jar</binary-origin>
@ -429,45 +415,37 @@
                <runtime-relative-path>ext/StixLib.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/StixLib.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/curator-client-2.8.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/curator-client-2.8.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jackson-core-2.9.7.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jackson-core-2.9.7.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/cxf-rt-frontend-jaxrs-3.0.16.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/cxf-rt-frontend-jaxrs-3.0.16.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/pdfbox-tools-2.0.8.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/pdfbox-tools-2.0.8.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/asm-5.0.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/asm-5.0.4.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jcl-over-slf4j-1.7.24.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jcl-over-slf4j-1.7.24.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/tika-parsers-1.17.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/tika-parsers-1.17.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/sqlite-jdbc-3.8.11.jar</runtime-relative-path>
-                <binary-origin>release\modules\ext\sqlite-jdbc-3.8.11.jar</binary-origin>
+                <binary-origin>release/modules/ext/sqlite-jdbc-3.8.11.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/activemq-all-5.11.1.jar</runtime-relative-path>
+                <runtime-relative-path>ext/json-simple-1.1.1.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/activemq-all-5.11.1.jar</binary-origin>
+                <binary-origin>release/modules/ext/json-simple-1.1.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/xz-1.6.jar</runtime-relative-path>
+                <runtime-relative-path>ext/sis-utility-0.6.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/xz-1.6.jar</binary-origin>
+                <binary-origin>release/modules/ext/sis-utility-0.6.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/Rejistry-1.0-SNAPSHOT.jar</runtime-relative-path>
+                <runtime-relative-path>ext/jhighlight-1.0.2.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/Rejistry-1.0-SNAPSHOT.jar</binary-origin>
+                <binary-origin>release/modules/ext/jhighlight-1.0.2.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/dd-plist-1.20.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/dd-plist-1.20.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jempbox-1.8.13.jar</runtime-relative-path>
@ -477,21 +455,9 @@
                <runtime-relative-path>ext/cxf-rt-rs-client-3.0.16.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/cxf-rt-rs-client-3.0.16.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/sevenzipjbinding-AllPlatforms.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/sevenzipjbinding-AllPlatforms.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-pool2-2.4.2.jar</runtime-relative-path>
-                <binary-origin>release\modules\ext\commons-pool2-2.4.2.jar</binary-origin>
+                <binary-origin>release/modules/ext/commons-pool2-2.4.2.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jackcess-encrypt-2.1.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jackcess-encrypt-2.1.4.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jsoup-1.10.3.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jsoup-1.10.3.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jdom-2.0.5-contrib.jar</runtime-relative-path>
@ -513,6 +479,190 @@
                <runtime-relative-path>ext/xmpcore-5.1.3.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/xmpcore-5.1.3.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/zookeeper-3.4.6.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/zookeeper-3.4.6.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jdom-2.0.5.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jdom-2.0.5.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/cxf-rt-transports-http-3.0.16.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/cxf-rt-transports-http-3.0.16.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/sis-metadata-0.6.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/sis-metadata-0.6.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/isoparser-1.1.18.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/isoparser-1.1.18.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/sleuthkit-postgresql-4.6.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/sleuthkit-postgresql-4.6.4.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/vorbis-java-core-0.8.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/vorbis-java-core-0.8.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-codec-1.6.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-codec-1.6.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/netcdf4-4.5.5.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/netcdf4-4.5.5.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/slf4j-api-1.7.24.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/slf4j-api-1.7.24.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/java-libpst-0.8.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/java-libpst-0.8.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jul-to-slf4j-1.7.24.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jul-to-slf4j-1.7.24.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/gson-2.8.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/gson-2.8.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/poi-3.17.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/poi-3.17.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/poi-scratchpad-3.17.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/poi-scratchpad-3.17.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/sis-netcdf-0.6.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/sis-netcdf-0.6.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-io-2.5.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-io-2.5.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/curator-framework-2.8.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/curator-framework-2.8.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/bcprov-jdk15on-1.54.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/bcprov-jdk15on-1.54.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/fontbox-2.0.8.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/fontbox-2.0.8.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-dbcp2-2.1.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-dbcp2-2.1.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jgraphx-v3.8.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jgraphx-v3.8.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/juniversalchardet-1.0.3.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/juniversalchardet-1.0.3.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jython-standalone-2.7.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jython-standalone-2.7.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jackcess-encrypt-2.1.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jackcess-encrypt-2.1.4.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/cxf-core-3.0.16.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/cxf-core-3.0.16.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/javax.ws.rs-api-2.0.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/javax.ws.rs-api-2.0.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/opennlp-tools-1.8.3.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/opennlp-tools-1.8.3.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/junrar-0.7.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/junrar-0.7.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/postgresql-9.4.1211.jre7.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/postgresql-9.4.1211.jre7.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/poi-ooxml-3.17.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/poi-ooxml-3.17.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/curator-client-2.8.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/curator-client-2.8.0.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jackson-core-2.9.7.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jackson-core-2.9.7.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/cxf-rt-frontend-jaxrs-3.0.16.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/cxf-rt-frontend-jaxrs-3.0.16.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/grib-4.5.5.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/grib-4.5.5.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jackson-core-2.9.2.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jackson-core-2.9.2.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/activemq-all-5.11.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/activemq-all-5.11.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/xz-1.6.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/xz-1.6.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/Rejistry-1.0-SNAPSHOT.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/Rejistry-1.0-SNAPSHOT.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/dd-plist-1.20.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/dd-plist-1.20.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/rome-1.5.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/rome-1.5.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/sevenzipjbinding-AllPlatforms.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/sevenzipjbinding-AllPlatforms.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jmatio-1.2.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jmatio-1.2.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jsoup-1.10.3.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jsoup-1.10.3.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/vorbis-java-tika-0.8.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/vorbis-java-tika-0.8.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/json-1.8.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/json-1.8.jar</binary-origin>
            </class-path-extension>
        </data>
    </configuration>
 </project>
--- a/Core/src/org/sleuthkit/autopsy/textextractors/ArtifactTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/ArtifactTextExtractor.java
@ -0,0 +1,89 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-2018 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.textextractors;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 import org.apache.commons.io.IOUtils;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.BlackboardAttribute;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.TskCoreException;
 /**
 * Extracts text from artifacts by concatenating the values of all of the
 * artifact's attributes.
 */
 class ArtifactTextExtractor extends TextExtractor {
    private final BlackboardArtifact artifact;
    public ArtifactTextExtractor(Content artifact) {
        this.artifact = (BlackboardArtifact) artifact;
    }
    @Override
    public Reader getReader() throws ExtractionException {
        // Concatenate the string values of all attributes into a single
        // "content" string to be indexed.
        StringBuilder artifactContents = new StringBuilder();
        Content dataSource = null;
        try {
            dataSource = artifact.getDataSource();
        } catch (TskCoreException tskCoreException) {
            throw new ExtractionException("Unable to get datasource for artifact: " + artifact.toString(), tskCoreException);
        }
        if (dataSource == null) {
            throw new ExtractionException("Datasource was null for artifact: " + artifact.toString());
        }
        try {
            for (BlackboardAttribute attribute : artifact.getAttributes()) {
                artifactContents.append(attribute.getAttributeType().getDisplayName());
                artifactContents.append(" : ");
                // We have also discussed modifying BlackboardAttribute.getDisplayString()
                // to magically format datetime attributes but that is complicated by
                // the fact that BlackboardAttribute exists in Sleuthkit data model
                // while the utility to determine the timezone to use is in ContentUtils
                // in the Autopsy datamodel.
                switch (attribute.getValueType()) {
                    case DATETIME:
                        artifactContents.append(ContentUtils.getStringTime(attribute.getValueLong(), dataSource));
                        break;
                    default:
                        artifactContents.append(attribute.getDisplayString());
                }
                artifactContents.append(System.lineSeparator());
            }
        } catch (TskCoreException tskCoreException) {
            throw new ExtractionException("Unable to get attributes for artifact: " + artifact.toString(), tskCoreException);
        }
        return new InputStreamReader(IOUtils.toInputStream(artifactContents,
                StandardCharsets.UTF_8), StandardCharsets.UTF_8);
    }
    @Override
    public boolean isSupported(Content file, String detectedFormat) {
        return true;
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HtmlTextExtractor.java
@ -16,7 +16,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.keywordsearch;
+package org.sleuthkit.autopsy.textextractors;
 import java.io.IOException;
 import java.io.Reader;
@ -38,10 +38,11 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
 /**
 * Extracts text from HTML content.
 */
-class HtmlTextExtractor extends ContentTextExtractor {
+final class HtmlTextExtractor extends TextExtractor {
    static final private Logger logger = Logger.getLogger(HtmlTextExtractor.class.getName());
-    private static final int MAX_SIZE = 50_000_000; //50MB
+    private final int MAX_SIZE;
    private final Content file;
    static final List<String> WEB_MIME_TYPES = Arrays.asList(
            "application/javascript", //NON-NLS
@ -51,27 +52,51 @@ class HtmlTextExtractor extends ContentTextExtractor {
            "text/html", //NON-NLS NON-NLS
            "text/javascript" //NON-NLS
    );
-    
+
    static {
        // Disable Jericho HTML Parser log messages.
        Config.LoggerProvider = LoggerProvider.DISABLED;
    }
-    @Override
+    /**
-    boolean isContentTypeSpecific() {
+     * Creates a default instance of the HtmlTextExtractor. Supported file size
-        return true;
+     * is 50MB.
     */
    public HtmlTextExtractor(Content file) {
        //Set default to be 50 MB.
        MAX_SIZE = 50_000_000;
        this.file = file;
    }
    /**
     * Determines if this content type is supported by this extractor.
     *
     * @param content        Content instance to be analyzed
     * @param detectedFormat Mimetype of content instance
     *
     * @return flag indicating support
     */
    @Override
-    boolean isSupported(Content content, String detectedFormat) {
+    public boolean isSupported(Content content, String detectedFormat) {
        return detectedFormat != null
                && WEB_MIME_TYPES.contains(detectedFormat)
                && content.getSize() <= MAX_SIZE;
    }
    /**
     * Returns a reader that will iterate over the text of an HTML document.
     *
     * @param content Html document source
     *
     * @return A reader instance containing the document source text
     *
     * @throws TextExtractorException
     */
    @Override
-    public Reader getReader(Content content) throws TextExtractorException {
+    public Reader getReader() throws ExtractionException {
-        ReadContentInputStream stream = new ReadContentInputStream(content);
+        //TODO JIRA-4467, there is only harm in excluding HTML documents greater
        //than 50MB due to our troubled approach of extraction.
        ReadContentInputStream stream = new ReadContentInputStream(file);
        //Parse the stream with Jericho and put the results in a Reader
        try {
@ -164,17 +189,8 @@ class HtmlTextExtractor extends ContentTextExtractor {
            // All done, now make it a reader
            return new StringReader(stringBuilder.toString());
        } catch (IOException ex) {
-            throw new TextExtractorException("Error extracting HTML from content.", ex);
+            logger.log(Level.WARNING, "Error extracting HTML from content.", ex);
            throw new ExtractionException("Error extracting HTML from content.", ex);
        }
    }
    @Override
    public boolean isDisabled() {
        return false;
    }
    @Override
    public void logWarning(final String msg, Exception ex) {
        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SqliteTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SqliteTextExtractor.java
@ -1,24 +1,23 @@
-/*	
+/*
- * Autopsy Forensic Browser	
+ * Autopsy Forensic Browser
- *	
+ *
- * Copyright 2018-2018 Basis Technology Corp.	
+ * Copyright 2018-2018 Basis Technology Corp.
- * Contact: carrier <at> sleuthkit <dot> org	
+ * Contact: carrier <at> sleuthkit <dot> org
- *	
+ *
- * Licensed under the Apache License, Version 2.0 (the "License");	
+ * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.	
+ * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at	
+ * You may obtain a copy of the License at
- *	
+ *
- *     http://www.apache.org/licenses/LICENSE-2.0	
+ *     http://www.apache.org/licenses/LICENSE-2.0
- *	
+ *
- * Unless required by applicable law or agreed to in writing, software	
+ * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,	
+ * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.	
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and	
+ * See the License for the specific language governing permissions and
- * limitations under the License.	
+ * limitations under the License.
 */
-package org.sleuthkit.autopsy.keywordsearch;
+package org.sleuthkit.autopsy.textextractors;
 import com.google.common.io.CharSource;
 import java.io.IOException;
 import java.io.Reader;
 import java.util.Iterator;
@ -28,37 +27,27 @@ import java.util.logging.Level;
 import org.sleuthkit.autopsy.coreutils.SQLiteTableReaderException;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.SQLiteTableReader;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.Content;
 /**
- * Dedicated SqliteTextExtractor to solve the problems associated with Tika's
+ * Extracts text from SQLite database files.
 * Sqlite parser.
 *
- * Tika problems: 1) Tika fails to open virtual tables 2) Tika fails to open
+ * This is a dedicated solution to address the problems associated with 
- * tables with spaces in table name 3) Tika fails to include the table names in
+ * Tika's sqlite parser (version 1.17), which include the following:
- * output (except for the first table it parses)
+ *  1) Virtual tables cause the parser to bail
 *  2) Tables that contain spaces in their name are not extracted
 *  3) Table names are not included in its output text
 */
-class SqliteTextExtractor extends ContentTextExtractor {
+final class SqliteTextExtractor extends TextExtractor {
    private static final String SQLITE_MIMETYPE = "application/x-sqlite3";
    private static final Logger logger = Logger.getLogger(SqliteTextExtractor.class.getName());
    private final AbstractFile file;
-    @Override
+    public SqliteTextExtractor(Content file) {
-    boolean isContentTypeSpecific() {
+        this.file = (AbstractFile) file;
        return true;
    }
    @Override
    public boolean isDisabled() {
        return false;
    }
    @Override
    public void logWarning(String msg, Exception exception) {
        logger.log(Level.WARNING, msg, exception); //NON-NLS
    }
    /**
     * Supports only the sqlite mimetypes
     *
@ -68,44 +57,34 @@ class SqliteTextExtractor extends ContentTextExtractor {
     * @return true if x-sqlite3
     */
    @Override
-    boolean isSupported(Content file, String detectedFormat) {
+    public boolean isSupported(Content file, String detectedFormat) {
        return SQLITE_MIMETYPE.equals(detectedFormat);
    }
    /**
-     * Returns a stream that will read from a sqlite database.
+     * Returns a reader that will iterate over the text of a sqlite database.
     *
     * @param source Content file
     *
-     * @return An InputStream that reads from a Sqlite database.
+     * @return An InputStream that reads from a Sqlite database
     *
-     * @throws
+     * @throws TextExtractorException
     * org.sleuthkit.autopsy.keywordsearch.TextExtractor.TextExtractorException
     */
    @Override
-    public Reader getReader(Content source) throws TextExtractorException {
+    public Reader getReader() throws ExtractionException {
-        //Firewall for any content that is not an AbstractFile
+        return new SQLiteStreamReader(file);
        if (!AbstractFile.class.isInstance(source)) {
            try {
                return CharSource.wrap("").openStream();
            } catch (IOException ex) {
                throw new TextExtractorException("", ex);
            }
        }
        return new SQLiteStreamReader((AbstractFile) source);
    }
-
+    
    /**
     * Produces a continuous stream of characters from a database file. To
     * achieve this, all table names are queues up and a SQLiteTableReader is
     * used to do the actual queries and table iteration.
     */
-    public class SQLiteStreamReader extends Reader {
+    private class SQLiteStreamReader extends Reader {
        private final SQLiteTableReader reader;
        private final AbstractFile file;
-        
+
        private Iterator<String> tableNames;
        private String currentTableName;
@ -217,9 +196,10 @@ class SqliteTextExtractor extends ContentTextExtractor {
        }
        /**
-         * Reads database values into the buffer. This function is responsible for 
+         * Reads database values into the buffer. This function is responsible
-         * getting the next table in the queue, initiating calls to the SQLiteTableReader,
+         * for getting the next table in the queue, initiating calls to the
-         * and filling in any excess bytes that are lingering from the previous call.
+         * SQLiteTableReader, and filling in any excess bytes that are lingering
         * from the previous call.
         *
         * @throws IOException
         */
@ -255,9 +235,9 @@ class SqliteTextExtractor extends ContentTextExtractor {
                            reader.read(currentTableName, () -> bufIndex == len);
                        } catch (SQLiteTableReaderException ex) {
                            logger.log(Level.WARNING, String.format(
-                                "Error attempting to read file table: [%s]" //NON-NLS
+                                    "Error attempting to read file table: [%s]" //NON-NLS
-                                + " for file: [%s] (id=%d).", currentTableName, //NON-NLS
+                                    + " for file: [%s] (id=%d).", currentTableName, //NON-NLS
-                                file.getName(), file.getId()), ex.getMessage());
+                                    file.getName(), file.getId()), ex.getMessage());
                        }
                    } else {
                        if (bufIndex == off) {
@ -290,8 +270,8 @@ class SqliteTextExtractor extends ContentTextExtractor {
        }
        /**
-         * Wrapper that holds the excess bytes that were left over from the previous
+         * Wrapper that holds the excess bytes that were left over from the
-         * call to read().
+         * previous call to read().
         */
        private class ExcessBytes {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/StringsTextExtractor.java
@ -16,19 +16,19 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.keywordsearch;
+package org.sleuthkit.autopsy.textextractors;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
+import java.util.Objects;
-import java.util.logging.Level;
+import org.openide.util.Lookup;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.StringExtract;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
 import org.sleuthkit.autopsy.textextractors.extractionconfigs.DefaultExtractionConfig;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.TskCoreException;
 import org.sleuthkit.datamodel.TskException;
@ -36,24 +36,25 @@ import org.sleuthkit.datamodel.TskException;
 /**
 * Extracts raw strings from content.
 */
-class StringsTextExtractor extends ContentTextExtractor {
+final class StringsTextExtractor extends TextExtractor {
-    static final private Logger logger = Logger.getLogger(StringsTextExtractor.class.getName());
+    private boolean extractUTF8;
-
+    private boolean extractUTF16;
-    /**
+    private final Content content;
-     * Options for this extractor
+    private final static String DEFAULT_INDEXED_TEXT_CHARSET = "UTF-8";
     */
    enum ExtractOptions {
        EXTRACT_UTF16, ///< extract UTF16 text, true/false
        EXTRACT_UTF8, ///< extract UTF8 text, true/false
    };
    private final List<SCRIPT> extractScripts = new ArrayList<>();
    private Map<String, String> extractOptions = new HashMap<>();
-    public StringsTextExtractor() {
+    /**
     * Creates a default StringsTextExtractor instance. The instance will be
     * configured to run only LATIN_2 as its default extraction script and UTF-8
     * as its default encoding.
     */
    public StringsTextExtractor(Content content) {
        //LATIN_2 is the default script
        extractScripts.add(SCRIPT.LATIN_2);
        extractUTF8 = true;
        this.content = content;
    }
    /**
@ -61,56 +62,29 @@ class StringsTextExtractor extends ContentTextExtractor {
     *
     * @param extractScripts scripts to use
     */
-    public void setScripts(List<SCRIPT> extractScripts) {
+    public final void setScripts(List<SCRIPT> extractScripts) {
        if (extractScripts == null) {
            return;
        }
        this.extractScripts.clear();
        this.extractScripts.addAll(extractScripts);
    }
    /**
-     * Get the currently used scripts for extraction
+     * Returns a reader that will iterate over the text of the content source.
     *
-     * @return scripts currently used or null if not supported
+     * @param content Content source of any type
     */
    public List<SCRIPT> getScripts() {
        return new ArrayList<>(extractScripts);
    }
    /**
     * Get current options
     *
-     * @return currently used, extractor specific options, or null of not
+     * @return A reader instance that content text can be obtained from
     *         supported
     */
    public Map<String, String> getOptions() {
        return extractOptions;
    }
    /**
     * Set extractor specific options
     *
-     * @param options options to use
+     * @throws
     * org.sleuthkit.autopsy.textextractors.TextExtractor.TextExtractorException
     */
    public void setOptions(Map<String, String> options) {
        this.extractOptions = options;
    }
    @Override
-    public void logWarning(final String msg, Exception ex) {
+    public InputStreamReader getReader() {
        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
    @Override
    public boolean isDisabled() {
        boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
        boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
        return extractUTF8 == false && extractUTF16 == false;
    }
    @Override
    public InputStreamReader getReader(Content content) throws TextExtractorException {
        InputStream stringStream = getInputStream(content);
-        return new InputStreamReader(stringStream, Server.DEFAULT_INDEXED_TEXT_CHARSET);
+        return new InputStreamReader(stringStream, Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
    }
    InputStream getInputStream(Content content) {
@ -118,27 +92,55 @@ class StringsTextExtractor extends ContentTextExtractor {
        if (extractScripts.size() == 1 && extractScripts.get(0).equals(SCRIPT.LATIN_1)) {
            return new EnglishOnlyStream(content);//optimal for english, english only
        } else {
            boolean extractUTF8 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF8.toString()));
            boolean extractUTF16 = Boolean.parseBoolean(extractOptions.get(ExtractOptions.EXTRACT_UTF16.toString()));
            return new InternationalStream(content, extractScripts, extractUTF8, extractUTF16);
        }
    }
    /**
     * Determines how the extraction process will proceed given the settings
     * stored in this context instance.
     *
     * See the DefaultExtractionConfig class in the extractionconfigs package
     * for available settings.
     *
     * @param context Lookup instance containing config classes
     */
    @Override
-    public boolean isContentTypeSpecific() {
+    public void setExtractionSettings(Lookup context) {
-        return false;
+        if (context != null) {
-    }
+            DefaultExtractionConfig configInstance = context.lookup(DefaultExtractionConfig.class);
-
+            if (configInstance == null) {
-    @Override
+                return;
-    public boolean isSupported(Content content, String detectedFormat) {
+            }
-        // strings can be run on anything. 
+            if (Objects.nonNull(configInstance.getExtractUTF8())) {
-        return true;
+                extractUTF8 = configInstance.getExtractUTF8();
            }
            if (Objects.nonNull(configInstance.getExtractUTF16())) {
                extractUTF16 = configInstance.getExtractUTF16();
            }
            if (Objects.nonNull(configInstance.getExtractScripts())) {
                setScripts(configInstance.getExtractScripts());
            }
        }
    }
    /**
-     * Content input string stream reader/converter - given Content,
+     *
-     * extract strings from it and return encoded bytes via read()
+     * @return
     */
    @Override
    public boolean isEnabled() {
        return extractUTF8 || extractUTF16;
    }
    @Override
    boolean isSupported(Content file, String detectedFormat) {
        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
    }
    /**
     * Content input string stream reader/converter - given Content, extract
     * strings from it and return encoded bytes via read()
     *
     * Note: the utility supports extraction of only LATIN script and UTF8,
     * UTF16LE, UTF16BE encodings and uses a brute force encoding detection -
@ -150,7 +152,6 @@ class StringsTextExtractor extends ContentTextExtractor {
     */
    private static class EnglishOnlyStream extends InputStream {
        private static final Logger logger = Logger.getLogger(EnglishOnlyStream.class.getName());
        private static final String NLS = Character.toString((char) 10); //new line
        private static final int READ_BUF_SIZE = 65536;
        private static final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
@ -244,12 +245,7 @@ class StringsTextExtractor extends ContentTextExtractor {
                }
                //get char from cur read buf
                char c = (char) curReadBuf[readBufOffset++];
-                if (c == 0 && singleConsecZero == false) {
+                singleConsecZero = c == 0 && singleConsecZero == false; //preserve the current sequence if max consec. 1 zero char
                    //preserve the current sequence if max consec. 1 zero char
                    singleConsecZero = true;
                } else {
                    singleConsecZero = false;
                }
                if (StringExtract.isPrintableAscii(c)) {
                    tempString.append(c);
                    ++tempStringLen;
@ -328,7 +324,7 @@ class StringsTextExtractor extends ContentTextExtractor {
        private int copyToReturn(byte[] b, int off, long len) {
            final String curStringS = curString.toString();
            //logger.log(Level.INFO, curStringS);
-            byte[] stringBytes = curStringS.getBytes(Server.DEFAULT_INDEXED_TEXT_CHARSET);
+            byte[] stringBytes = curStringS.getBytes(Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
            System.arraycopy(stringBytes, 0, b, off, Math.min(curStringLen, (int) len));
            //logger.log(Level.INFO, curStringS);
            //copied all string, reset
@ -370,7 +366,6 @@ class StringsTextExtractor extends ContentTextExtractor {
     */
    private static class InternationalStream extends InputStream {
        private static final Logger logger = Logger.getLogger(InternationalStream.class.getName());
        private static final int FILE_BUF_SIZE = 1024 * 1024;
        private final Content content;
        private final byte[] oneCharBuf = new byte[1];
@ -499,7 +494,7 @@ class StringsTextExtractor extends ContentTextExtractor {
         */
        private void convert(int numBytes) {
            lastExtractResult = stringExtractor.extract(fileReadBuff, numBytes, 0);
-            convertBuff = lastExtractResult.getText().getBytes(Server.DEFAULT_INDEXED_TEXT_CHARSET);
+            convertBuff = lastExtractResult.getText().getBytes(Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
            //reset tracking vars
            if (lastExtractResult.getNumBytes() == 0) {
                bytesInConvertBuff = 0;
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractor.java
@ -0,0 +1,103 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-18 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.textextractors;
 import java.io.Reader;
 import org.openide.util.Lookup;
 import org.sleuthkit.datamodel.Content;
 /**
 * Extracts the text out of {@link org.sleuthkit.datamodel.Content} instances
 * and exposes them as a {@link java.io.Reader}. Concrete implementations can be
 * obtained from
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory#getExtractor(org.sleuthkit.datamodel.Content)}
 * or
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory#getExtractor(org.sleuthkit.datamodel.Content, org.openide.util.Lookup)}.
 *
 * @see org.sleuthkit.autopsy.textextractors.TextExtractorFactory
 */
 public abstract class TextExtractor {
    /**
     * Determines if the file content is supported by the extractor.
     *
     * @param file           to test if its content should be supported
     * @param detectedFormat mime-type with detected format (such as text/plain)
     *                       or null if not detected
     *
     * @return true if the file content is supported, false otherwise
     */
    abstract boolean isSupported(Content file, String detectedFormat);
    /**
     * Determines if the TextExtractor instance is enabled to read content.
     *
     * @return
     */
    boolean isEnabled() {
        return true;
    }
    /**
     * Get a {@link java.io.Reader} that will iterate over the text extracted
     * from the {@link org.sleuthkit.datamodel.Content} passed into
     * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory}.
     *
     * @return {@link java.io.Reader} that contains the text of the underlying
     *         {@link org.sleuthkit.datamodel.Content}
     *
     * @throws
     * org.sleuthkit.autopsy.textextractors.TextExtractor.ExtractionException
     *
     * @see org.sleuthkit.autopsy.textextractors.TextExtractorFactory
     *
     */
    public abstract Reader getReader() throws ExtractionException;
    /**
     * Determines how the extraction process will proceed given the settings
     * stored in the context instance.
     *
     * @param context Instance containing file config classes
     */
    void setExtractionSettings(Lookup context) {
        //no-op by default
    }
    /**
     * Exception encountered during
     * {@link org.sleuthkit.autopsy.textextractors.TextExtractor#getReader()}.
     * This indicates that there was an internal parsing error that occurred
     * during the reading of Content text.
     */
    public class ExtractionException extends Exception {
        public ExtractionException(String msg, Throwable ex) {
            super(msg, ex);
        }
        public ExtractionException(Throwable ex) {
            super(ex);
        }
        public ExtractionException(String msg) {
            super(msg);
        }
    }
 }
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractorFactory.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractorFactory.java
@ -0,0 +1,157 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2018-2018 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.textextractors;
 import java.util.Arrays;
 import java.util.List;
 import org.openide.util.Lookup;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.Report;
 /**
 * Factory for creating
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractor}'s given a
 * {@link org.sleuthkit.datamodel.Content} instance
 *
 * See {@link org.sleuthkit.autopsy.textextractors.extractionconfigs} for
 * available {@link org.sleuthkit.autopsy.textextractors.TextExtractor}
 * configuration options.
 *
 * @see org.openide.util.Lookup
 */
 public class TextExtractorFactory {
    /**
     * Auto detects the correct
     * {@link org.sleuthkit.autopsy.textextractors.TextExtractor} given the
     * {@link org.sleuthkit.datamodel.Content}.
     *
     * See {@link org.sleuthkit.autopsy.textextractors.extractionconfigs} for
     * available {@link org.sleuthkit.autopsy.textextractors.TextExtractor}
     * configuration options.
     *
     * @param content Content source that will be read from
     * @param context Contains extraction configurations for certain file types
     *
     * @return A TextExtractor that supports the given content. File text can be
     *         obtained from
     *         {@link org.sleuthkit.autopsy.textextractors.TextExtractor#getReader()}.
     *
     * @throws NoTextExtractorFound Encountered when there is no TextExtractor
     *                              was found for the given content type. Use {@link
     *                              TextExtractorFactory#getDefaultExtractor(org.sleuthkit.datamodel.Content,
     *                              org.openide.util.Lookup)}
     *
     * @see org.openide.util.Lookup
     */
    public static TextExtractor getExtractor(Content content,
            Lookup context) throws NoTextExtractorFound {
        if (content instanceof AbstractFile) {
            String mimeType = ((AbstractFile) content).getMIMEType();
            List<TextExtractor> extractors = Arrays.asList(
                    new HtmlTextExtractor(content),
                    new SqliteTextExtractor(content),
                    new TikaTextExtractor(content));
            for (TextExtractor extractor : extractors) {
                extractor.setExtractionSettings(context);
                if (extractor.isEnabled() && extractor.isSupported(content, mimeType)) {
                    return extractor;
                }
            }
        } else if (content instanceof BlackboardArtifact) {
            TextExtractor artifactExtractor = new ArtifactTextExtractor((BlackboardArtifact) content);
            artifactExtractor.setExtractionSettings(context);
            return artifactExtractor;
        } else if (content instanceof Report) {
            TextExtractor reportExtractor = new TikaTextExtractor(content);
            reportExtractor.setExtractionSettings(context);
            return reportExtractor;
        }
        throw new NoTextExtractorFound(
                String.format("Could not find a suitable extractor for "
                        + "content with name [%s] and id=[%d]. Try using the default, "
                        + "non content specific extractor as an alternative.",
                        content.getName(), content.getId())
        );
    }
    /**
     * Auto detects the correct
     * {@link org.sleuthkit.autopsy.textextractors.TextExtractor} given the
     * {@link org.sleuthkit.datamodel.Content}.
     *
     * @param content Content instance that will be read from
     *
     * @return A TextExtractor that supports the given content. File text can be
     *         obtained from {@link TextExtractor#getReader()}.
     *
     * @throws NoTextExtractorFound Encountered when there is no TextExtractor
     *                              was found for the given content type. Use {@link
     *                              TextExtractorFactory#getDefaultExtractor(org.sleuthkit.datamodel.Content,
     *                              org.openide.util.Lookup)}
     */
    public static TextExtractor getExtractor(Content content)
            throws NoTextExtractorFound {
        return getExtractor(content, null);
    }
    /**
     * Returns the default extractor that can be run on any content type. This
     * extractor should be used as a backup in the event that no extractor was
     * found using or {@link TextExtractorFactory#getDefaultExtractor(org.sleuthkit.datamodel.Content, org.openide.util.Lookup)}
     * {@link TextExtractorFactory#getExtractor(org.sleuthkit.datamodel.Content)}.
     *
     * @param content Content source to read from
     * @param context Contains extraction configurations for certain file types
     *
     * @return A DefaultExtractor instance. File text can be obtained from
     *         {@link TextExtractor#getReader()}.
     *
     * @see org.openide.util.Lookup
     */
    public static TextExtractor getDefaultExtractor(Content content, Lookup context) {
        TextExtractor stringsInstance = new StringsTextExtractor(content);
        stringsInstance.setExtractionSettings(context);
        return stringsInstance;
    }
    /**
     * System level exception for handling content types that have no specific
     * strategy defined for extracting their text.
     *
     * @see
     * org.sleuthkit.autopsy.textextractors.TextExtractorFactory#getExtractor(org.sleuthkit.datamodel.Content)
     * @see
     * org.sleuthkit.autopsy.textextractors.TextExtractorFactory#getDefaultExtractor(org.sleuthkit.datamodel.Content,
     * org.openide.util.Lookup)}
     */
    public static class NoTextExtractorFound extends Exception {
        public NoTextExtractorFound(String msg) {
            super(msg);
        }
        public NoTextExtractorFound(Throwable ex) {
            super(ex);
        }
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TikaTextExtractor.java
@ -16,15 +16,19 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.keywordsearch;
+package org.sleuthkit.autopsy.textextractors;
 import com.google.common.collect.ImmutableList;
 import com.google.common.io.CharSource;
 import java.io.File;
 import java.io.IOException;
 import java.io.PushbackReader;
 import java.io.Reader;
 import java.nio.file.Paths;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Objects;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@ -33,6 +37,7 @@ import java.util.concurrent.TimeoutException;
 import java.util.logging.Level;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.tika.Tika;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
@ -44,26 +49,78 @@ import org.apache.tika.parser.ocr.TesseractOCRConfig;
 import org.apache.tika.parser.pdf.PDFParserConfig;
 import org.openide.util.NbBundle;
 import org.openide.modules.InstalledFileLocator;
-import org.sleuthkit.autopsy.coreutils.Logger;
+import org.openide.util.Lookup;
 import org.sleuthkit.autopsy.coreutils.PlatformUtil;
 import org.sleuthkit.autopsy.textextractors.extractionconfigs.ImageFileExtractionConfig;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.ReadContentInputStream;
 /**
- * Extracts text from Tika supported content. Protects against Tika
+ * Extracts text from Tika supported content. Protects against Tika parser hangs
- * parser hangs (for unexpected/corrupt content) using a timeout mechanism.
+ * (for unexpected/corrupt content) using a timeout mechanism.
 */
-class TikaTextExtractor extends ContentTextExtractor {
+final class TikaTextExtractor extends TextExtractor {
    //Mimetype groups to aassist extractor implementations in ignoring binary and 
    //archive files.
    private static final List<String> BINARY_MIME_TYPES
            = ImmutableList.of(
                    //ignore binary blob data, for which string extraction will be used
                    "application/octet-stream", //NON-NLS
                    "application/x-msdownload"); //NON-NLS
    /**
     * generally text extractors should ignore archives and let unpacking
     * modules take care of them
     */
    private static final List<String> ARCHIVE_MIME_TYPES
            = ImmutableList.of(
                    //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
                    "application/x-7z-compressed", //NON-NLS
                    "application/x-ace-compressed", //NON-NLS
                    "application/x-alz-compressed", //NON-NLS
                    "application/x-arj", //NON-NLS
                    "application/vnd.ms-cab-compressed", //NON-NLS
                    "application/x-cfs-compressed", //NON-NLS
                    "application/x-dgc-compressed", //NON-NLS
                    "application/x-apple-diskimage", //NON-NLS
                    "application/x-gca-compressed", //NON-NLS
                    "application/x-dar", //NON-NLS
                    "application/x-lzx", //NON-NLS
                    "application/x-lzh", //NON-NLS
                    "application/x-rar-compressed", //NON-NLS
                    "application/x-stuffit", //NON-NLS
                    "application/x-stuffitx", //NON-NLS
                    "application/x-gtar", //NON-NLS
                    "application/x-archive", //NON-NLS
                    "application/x-executable", //NON-NLS
                    "application/x-gzip", //NON-NLS
                    "application/zip", //NON-NLS
                    "application/x-zoo", //NON-NLS
                    "application/x-cpio", //NON-NLS
                    "application/x-shar", //NON-NLS
                    "application/x-tar", //NON-NLS
                    "application/x-bzip", //NON-NLS
                    "application/x-bzip2", //NON-NLS
                    "application/x-lzip", //NON-NLS
                    "application/x-lzma", //NON-NLS
                    "application/x-lzop", //NON-NLS
                    "application/x-z", //NON-NLS
                    "application/x-compress"); //NON-NLS
    private static final java.util.logging.Logger tikaLogger = java.util.logging.Logger.getLogger("Tika"); //NON-NLS
    static final private Logger logger = Logger.getLogger(TikaTextExtractor.class.getName());
    private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
    private static final String SQLITE_MIMETYPE = "application/x-sqlite3";
    private final AutoDetectParser parser = new AutoDetectParser();
-    
+    private final Content content;
    private boolean tesseractOCREnabled;
    private static final String TESSERACT_DIR_NAME = "Tesseract-OCR"; //NON-NLS
    private static final String TESSERACT_EXECUTABLE = "tesseract.exe"; //NON-NLS
    private static final File TESSERACT_PATH = locateTesseractExecutable();
    private static final String LANGUAGE_PACKS = getLanguagePacks();
    private static final List<String> TIKA_SUPPORTED_TYPES
            = new Tika().getParser().getSupportedTypes(new ParseContext())
@ -71,13 +128,23 @@ class TikaTextExtractor extends ContentTextExtractor {
                    .map(mt -> mt.getType() + "/" + mt.getSubtype())
                    .collect(Collectors.toList());
-    @Override
+    public TikaTextExtractor(Content content) {
-    public void logWarning(final String msg, Exception ex) {
+        this.content = content;
        KeywordSearch.getTikaLogger().log(Level.WARNING, msg, ex);
    }
    /**
     * Returns a reader that will iterate over the text extracted from Apache
     * Tika.
     *
     * @param content Supported source content to extract
     *
     * @return Reader that contains Apache Tika extracted text
     *
     * @throws
     * org.sleuthkit.autopsy.textextractors.TextExtractor.TextExtractorException
     */
    @Override
-    public Reader getReader(Content content) throws TextExtractorException {
+    public Reader getReader() throws ExtractionException {
        ReadContentInputStream stream = new ReadContentInputStream(content);
        Metadata metadata = new Metadata();
@ -90,28 +157,28 @@ class TikaTextExtractor extends ContentTextExtractor {
        officeParserConfig.setUseSAXPptxExtractor(true);
        officeParserConfig.setUseSAXDocxExtractor(true);
        parseContext.set(OfficeParserConfig.class, officeParserConfig);
-        
+
        // configure OCR if it is enabled in KWS settings and installed on the machine
-        if (TESSERACT_PATH != null && KeywordSearchSettings.getOcrOption() && PlatformUtil.isWindowsOS() == true) {
+        if (TESSERACT_PATH != null && tesseractOCREnabled && PlatformUtil.isWindowsOS() == true) {
-            
+
            // configure PDFParser. 
            PDFParserConfig pdfConfig = new PDFParserConfig();
-            
+
            // Extracting the inline images and letting Tesseract run on each inline image.
            // https://wiki.apache.org/tika/PDFParser%20%28Apache%20PDFBox%29
            // https://tika.apache.org/1.7/api/org/apache/tika/parser/pdf/PDFParserConfig.html
-            pdfConfig.setExtractInlineImages(true); 
+            pdfConfig.setExtractInlineImages(true);
            // Multiple pages within a PDF file might refer to the same underlying image.
-            pdfConfig.setExtractUniqueInlineImagesOnly(true);            
+            pdfConfig.setExtractUniqueInlineImagesOnly(true);
            parseContext.set(PDFParserConfig.class, pdfConfig);
-            
+
            // Configure Tesseract parser to perform OCR
            TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
            String tesseractFolder = TESSERACT_PATH.getParent();
            ocrConfig.setTesseractPath(tesseractFolder);
            // Tesseract expects language data packs to be in a subdirectory of tesseractFolder, in a folder called "tessdata".
            // If they are stored somewhere else, use ocrConfig.setTessdataPath(String tessdataPath) to point to them
-            ocrConfig.setLanguage("eng");
+            ocrConfig.setLanguage(LANGUAGE_PACKS);
            parseContext.set(TesseractOCRConfig.class, ocrConfig);
        }
@ -124,7 +191,7 @@ class TikaTextExtractor extends ContentTextExtractor {
            PushbackReader pushbackReader = new PushbackReader(tikaReader);
            int read = pushbackReader.read();
            if (read == -1) {
-                throw new TextExtractorException("Unable to extract text: Tika returned empty reader for " + content);
+                throw new ExtractionException("Unable to extract text: Tika returned empty reader for " + content);
            }
            pushbackReader.unread(read);
@ -133,15 +200,13 @@ class TikaTextExtractor extends ContentTextExtractor {
            return CharSource.concat(new ReaderCharSource(pushbackReader), metaDataCharSource).openStream();
        } catch (TimeoutException te) {
            final String msg = NbBundle.getMessage(this.getClass(), "AbstractFileTikaTextExtract.index.tikaParseTimeout.text", content.getId(), content.getName());
-            logWarning(msg, te);
+            throw new ExtractionException(msg, te);
-            throw new TextExtractorException(msg, te);
+        } catch (ExtractionException ex) {
        } catch (TextExtractorException ex) {
            throw ex;
        } catch (Exception ex) {
-            KeywordSearch.getTikaLogger().log(Level.WARNING, "Exception: Unable to Tika parse the content" + content.getId() + ": " + content.getName(), ex.getCause()); //NON-NLS
+            tikaLogger.log(Level.WARNING, "Exception: Unable to Tika parse the content" + content.getId() + ": " + content.getName(), ex.getCause()); //NON-NLS
            final String msg = NbBundle.getMessage(this.getClass(), "AbstractFileTikaTextExtract.index.exception.tikaParse.msg", content.getId(), content.getName());
-            logWarning(msg, ex);
+            throw new ExtractionException(msg, ex);
            throw new TextExtractorException(msg, ex);
        } finally {
            future.cancel(true);
        }
@ -187,16 +252,19 @@ class TikaTextExtractor extends ContentTextExtractor {
                        ));
    }
-    @Override
+    /**
-    public boolean isContentTypeSpecific() {
+     * Determines if Tika is supported for this content type and mimetype.
-        return true;
+     *
-    }
+     * @param content        Source content to read
-
+     * @param detectedFormat Mimetype of content
     *
     * @return Flag indicating support for reading content type
     */
    @Override
    public boolean isSupported(Content content, String detectedFormat) {
        if (detectedFormat == null
-                || ContentTextExtractor.BINARY_MIME_TYPES.contains(detectedFormat) //any binary unstructured blobs (string extraction will be used)
+                || BINARY_MIME_TYPES.contains(detectedFormat) //any binary unstructured blobs (string extraction will be used)
-                || ContentTextExtractor.ARCHIVE_MIME_TYPES.contains(detectedFormat)
+                || ARCHIVE_MIME_TYPES.contains(detectedFormat)
                || (detectedFormat.startsWith("video/") && !detectedFormat.equals("video/x-flv")) //skip video other than flv (tika supports flv only) //NON-NLS
                || detectedFormat.equals(SQLITE_MIMETYPE) //Skip sqlite files, Tika cannot handle virtual tables and will fail with an exception. //NON-NLS
                ) {
@ -205,9 +273,34 @@ class TikaTextExtractor extends ContentTextExtractor {
        return TIKA_SUPPORTED_TYPES.contains(detectedFormat);
    }
-    @Override
+    /**
-    public boolean isDisabled() {
+     * Retrieves all of the installed language packs from their designated
-        return false;
+     * directory location to be used to configure Tesseract OCR.
     *
     * @return String of all language packs available for Tesseract to use
     */
    private static String getLanguagePacks() {
        File languagePackRootDir = new File(TESSERACT_PATH.getParent(), "tessdata");
        //Acceptable extensions for Tesseract-OCR version 3.05 language packs.
        //All extensions other than traineddata are associated with cube files that
        //have been made obsolete since version 4.0.
        List<String> acceptableExtensions = Arrays.asList("traineddata", "params",
                "lm", "fold", "bigrams", "nn", "word-freq", "size",
                "user-patterns", "user-words");
        //Pull out only unique languagePacks
        HashSet<String> languagePacks = new HashSet<>();
        if (languagePackRootDir.exists()) {
            for (File languagePack : languagePackRootDir.listFiles()) {
                if (languagePack.isDirectory() || !acceptableExtensions.contains(
                        FilenameUtils.getExtension(languagePack.getName()))) {
                    continue;
                }
                String threeLetterPackageName = languagePack.getName().substring(0, 3);
                //Ignore the eng language pack if accidentally added
                languagePacks.add(threeLetterPackageName);
            }
        }
        return String.join("+", languagePacks);
    }
    /**
@ -233,6 +326,28 @@ class TikaTextExtractor extends ContentTextExtractor {
    }
    /**
     * Determines how the extraction process will proceed given the settings
     * stored in this context instance.
     *
     * See the ImageFileExtractionConfig class in the extractionconfigs package
     * for available settings.
     *
     * @param context Instance containing config classes
     */
    @Override
    public void setExtractionSettings(Lookup context) {
        if (context != null) {
            ImageFileExtractionConfig configInstance = context.lookup(ImageFileExtractionConfig.class);
            if (configInstance == null) {
                return;
            }
            if (Objects.nonNull(configInstance.getOCREnabled())) {
                this.tesseractOCREnabled = configInstance.getOCREnabled();
            }
        }
    }
    /**
     * An implementation of CharSource that just wraps an existing reader and
     * returns it in openStream().
--- a/Core/src/org/sleuthkit/autopsy/textextractors/extractionconfigs/DefaultExtractionConfig.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/extractionconfigs/DefaultExtractionConfig.java
@ -0,0 +1,100 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2018-2018 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.textextractors.extractionconfigs;
 import java.util.List;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
 /**
 * Allows for configuration of the
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractor} obtained from
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory#getDefaultExtractor(org.sleuthkit.datamodel.Content, org.openide.util.Lookup)}.
 *
 * The default extractor will read strings from the Content instance. This class
 * allows for the configuration of the encoding language script to use during
 * extraction.
 *
 * @see org.sleuthkit.autopsy.textextractors.TextExtractorFactory
 * @see
 * org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT
 * @see org.openide.util.Lookup
 */
 public class DefaultExtractionConfig {
    private Boolean extractUTF8;
    private Boolean extractUTF16;
    private List<SCRIPT> extractScripts;
    /**
     * Enables UTF-8 encoding to be used during extraction.
     *
     * @param enabled Flag indicating if UTF-8 should be turned on
     */
    public void setExtractUTF8(boolean enabled) {
        this.extractUTF8 = enabled;
    }
    /**
     * Enables UTF-16 encoding to be used during extraction.
     *
     * @param enabled Flag indicating if UTF-16 should be turned on
     */
    public void setExtractUTF16(boolean enabled) {
        this.extractUTF16 = enabled;
    }
    /**
     * Returns whether extracting with UTF-8 encoding should be done.
     *
     * @return Flag indicating if UTF-8 has been turned on/off
     */
    public Boolean getExtractUTF8() {
        return extractUTF8;
    }
    /**
     * Return whether extracting with UTF-16 encoding should be done.
     *
     * @return Flag indicating if UTF-16 has been turned on/off
     */
    public Boolean getExtractUTF16() {
        return extractUTF16;
    }
    /**
     * Sets the type of extraction scripts that will be used during this
     * extraction. See
     * {@link org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT}
     * for more information about available scripts.
     *
     * @param scripts Desired set of scripts to be used during extraction
     */
    public void setExtractScripts(List<SCRIPT> scripts) {
        this.extractScripts = scripts;
    }
    /**
     * Gets the desired set of scripts to be used during extraction.
     *
     * @return Set of extraction scripts to be used
     */
    public List<SCRIPT> getExtractScripts() {
        return this.extractScripts;
    }
 }
--- a/Core/src/org/sleuthkit/autopsy/textextractors/extractionconfigs/ImageFileExtractionConfig.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/extractionconfigs/ImageFileExtractionConfig.java
@ -0,0 +1,54 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2018-2018 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.textextractors.extractionconfigs;
 /**
 * Allows for configuration of OCR on image files.
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractor}'s that use
 * ImageFileExtractionConfig can be obtained through
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory#getExtractor(org.sleuthkit.datamodel.Content)}
 * or
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory#getDefaultExtractor(org.sleuthkit.datamodel.Content, org.openide.util.Lookup)}.
 *
 * @see org.sleuthkit.autopsy.textextractors.TextExtractorFactory
 * @see org.openide.util.Lookup
 */
 public class ImageFileExtractionConfig {
    private Boolean OCREnabled;
    /**
     * Enables OCR to be run on the text extractor responsible for handling
     * image files.
     *
     * @param enabled Flag indicating if OCR is enabled.
     */
    public void setOCREnabled(boolean enabled) {
        this.OCREnabled = enabled;
    }
    /**
     * Gets the OCR flag that has been set. By default this flag is turned off.
     *
     * @return Flag indicating if OCR is enabled.
     */
    public boolean getOCREnabled() {
        return this.OCREnabled;
    }
 }
--- a/Core/src/org/sleuthkit/autopsy/texttranslation/TranslationException.java
+++ b/Core/src/org/sleuthkit/autopsy/texttranslation/TranslationException.java
@ -19,7 +19,7 @@
 package org.sleuthkit.autopsy.texttranslation;
 /**
- * Provides a system exception for the Text Translation errors
+ * Provides a system exception for Text Translation errors
 */
 public class TranslationException extends Exception {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ArtifactTextExtractor.java
@ -1,150 +0,0 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-2018 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 import java.util.logging.Level;
 import org.apache.commons.io.IOUtils;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.BlackboardAttribute;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.SleuthkitCase;
 import org.sleuthkit.datamodel.TskCoreException;
 /**
 * Extracts text from artifacts by concatenating the values of all of the
 * artifact's attributes.
 */
 class ArtifactTextExtractor implements TextExtractor<BlackboardArtifact> {
    static final private Logger logger = Logger.getLogger(ArtifactTextExtractor.class.getName());
    /**
     * Get the Content that is the data source for the given artifact. //JMTODO:
     * is there a prexisting method to do this?
     *
     * @param artifact
     *
     * @return The data source for the given artifact as a Content object, or
     *         null if it could not be found.
     *
     * @throws TskCoreException if there is a problem accessing the case db.
     */
    static Content getDataSource(BlackboardArtifact artifact) throws TskCoreException {
        Case currentCase;
        try {
            currentCase = Case.getCurrentCaseThrows();
        } catch (NoCurrentCaseException ignore) {
            // thorown by Case.getCurrentOpenCase() if currentCase is null
            return null;
        }
        SleuthkitCase sleuthkitCase = currentCase.getSleuthkitCase();
        if (sleuthkitCase == null) {
            return null;
        }
        Content dataSource;
        AbstractFile abstractFile = sleuthkitCase.getAbstractFileById(artifact.getObjectID());
        if (abstractFile != null) {
            dataSource = abstractFile.getDataSource();
        } else {
            dataSource = sleuthkitCase.getContentById(artifact.getObjectID());
        }
        if (dataSource == null) {
            return null;
        }
        return dataSource;
    }
    @Override
    public boolean isDisabled() {
        return false;
    }
    @Override
    public void logWarning(final String msg, Exception ex) {
        logger.log(Level.WARNING, msg, ex); //NON-NLS  }
    }
    private InputStream getInputStream(BlackboardArtifact artifact) throws TextExtractorException {
        // Concatenate the string values of all attributes into a single
        // "content" string to be indexed.
        StringBuilder artifactContents = new StringBuilder();
        Content dataSource = null;
        try {
            dataSource = getDataSource(artifact);
        } catch (TskCoreException tskCoreException) {
            throw new TextExtractorException("Unable to get datasource for artifact: " + artifact.toString(), tskCoreException);
        }
        if (dataSource == null) {
            throw new TextExtractorException("Datasource was null for artifact: " + artifact.toString());
        }
        try {
            for (BlackboardAttribute attribute : artifact.getAttributes()) {
                artifactContents.append(attribute.getAttributeType().getDisplayName());
                artifactContents.append(" : ");
                // We have also discussed modifying BlackboardAttribute.getDisplayString()
                // to magically format datetime attributes but that is complicated by
                // the fact that BlackboardAttribute exists in Sleuthkit data model
                // while the utility to determine the timezone to use is in ContentUtils
                // in the Autopsy datamodel.
                switch (attribute.getValueType()) {
                    case DATETIME:
                        artifactContents.append(ContentUtils.getStringTime(attribute.getValueLong(), dataSource));
                        break;
                    default:
                        artifactContents.append(attribute.getDisplayString());
                }
                artifactContents.append(System.lineSeparator());
            }
        } catch (TskCoreException tskCoreException) {
            throw new TextExtractorException("Unable to get attributes for artifact: " + artifact.toString(), tskCoreException);
        }
        return IOUtils.toInputStream(artifactContents, StandardCharsets.UTF_8);
    }
    @Override
    public Reader getReader(BlackboardArtifact source) throws TextExtractorException {
        return new InputStreamReader(getInputStream(source), StandardCharsets.UTF_8);
    }
    @Override
    public long getID(BlackboardArtifact source) {
        return source.getArtifactID();
    }
    @Override
    public String getName(BlackboardArtifact source) {
        return source.getDisplayName() + "_" + source.getArtifactID();
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ContentTextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ContentTextExtractor.java
@ -1,110 +0,0 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-2018 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.Reader;
 import java.util.Arrays;
 import java.util.List;
 import org.sleuthkit.datamodel.Content;
 /**
 * Common methods for utilities that extract text and content and divide into
 * chunks
 */
 abstract class ContentTextExtractor implements TextExtractor<Content> {
    static final List<String> BINARY_MIME_TYPES
            = Arrays.asList(
                    //ignore binary blob data, for which string extraction will be used
                    "application/octet-stream", //NON-NLS
                    "application/x-msdownload"); //NON-NLS
    /** generally text extractors should ignore archives and let unpacking
     * modules take care of them */
    static final List<String> ARCHIVE_MIME_TYPES
            = Arrays.asList(
                    //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
                    "application/x-7z-compressed", //NON-NLS
                    "application/x-ace-compressed", //NON-NLS
                    "application/x-alz-compressed", //NON-NLS
                    "application/x-arj", //NON-NLS
                    "application/vnd.ms-cab-compressed", //NON-NLS
                    "application/x-cfs-compressed", //NON-NLS
                    "application/x-dgc-compressed", //NON-NLS
                    "application/x-apple-diskimage", //NON-NLS
                    "application/x-gca-compressed", //NON-NLS
                    "application/x-dar", //NON-NLS
                    "application/x-lzx", //NON-NLS
                    "application/x-lzh", //NON-NLS
                    "application/x-rar-compressed", //NON-NLS
                    "application/x-stuffit", //NON-NLS
                    "application/x-stuffitx", //NON-NLS
                    "application/x-gtar", //NON-NLS
                    "application/x-archive", //NON-NLS
                    "application/x-executable", //NON-NLS
                    "application/x-gzip", //NON-NLS
                    "application/zip", //NON-NLS
                    "application/x-zoo", //NON-NLS
                    "application/x-cpio", //NON-NLS
                    "application/x-shar", //NON-NLS
                    "application/x-tar", //NON-NLS
                    "application/x-bzip", //NON-NLS
                    "application/x-bzip2", //NON-NLS
                    "application/x-lzip", //NON-NLS
                    "application/x-lzma", //NON-NLS
                    "application/x-lzop", //NON-NLS
                    "application/x-z", //NON-NLS
                    "application/x-compress"); //NON-NLS
    /**
     * Determines if the extractor works only for specified types is
     * supportedTypes() or whether is a generic content extractor (such as
     * string extractor)
     *
     * @return
     */
    abstract boolean isContentTypeSpecific();
    /**
     * Determines if the file content is supported by the extractor if
     * isContentTypeSpecific() returns true.
     *
     * @param content           to test if its content should be supported
     * @param detectedFormat mime-type with detected format (such as text/plain)
     *                       or null if not detected
     *
     * @return true if the file content is supported, false otherwise
     */
    abstract boolean isSupported(Content file, String detectedFormat);
    @Override
    public abstract Reader getReader(Content source) throws TextExtractorException;
    @Override
    public long getID(Content source) {
        return source.getId();
    }
    @Override
    public String getName(Content source) {
        return source.getName();
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@ -19,6 +19,7 @@
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.BufferedReader;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.logging.Level;
@ -58,7 +59,6 @@ class Ingester {
    private final Server solrServer = KeywordSearch.getServer();
    private static final SolrFieldsVisitor SOLR_FIELDS_VISITOR = new SolrFieldsVisitor();
    private static Ingester instance;
    private static final int SINGLE_READ_CHARS = 512;
    private Ingester() {
    }
@ -106,8 +106,8 @@ class Ingester {
     * @throws IngesterException if there was an error processing a specific
     *                           artifact, but the Solr server is probably fine.
     */
-    void indexMetaDataOnly(BlackboardArtifact artifact) throws IngesterException {
+    void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException {
-        indexChunk("", new ArtifactTextExtractor().getName(artifact), getContentFields(artifact));
+        indexChunk("", sourceName, getContentFields(artifact));
    }
    /**
@ -142,23 +142,12 @@ class Ingester {
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
     */
    // TODO (JIRA-3118): Cancelled text indexing does not propagate cancellation to clients 
-    < T extends SleuthkitVisitableItem> boolean indexText(TextExtractor< T> extractor, T source, IngestJobContext context) throws Ingester.IngesterException {
+    < T extends SleuthkitVisitableItem> boolean indexText(Reader sourceReader, long sourceID, String sourceName, T source, IngestJobContext context) throws Ingester.IngesterException {
        final long sourceID = extractor.getID(source);
        final String sourceName = extractor.getName(source);
        int numChunks = 0; //unknown until chunking is done
-
+        
        if (extractor.isDisabled()) {
            /*
             * some Extractors, notable the strings extractor, have options
             * which can be configured such that no extraction should be done
             */
            return true;
        }
        Map<String, String> fields = getContentFields(source);
        //Get a reader for the content of the given source
-        try (BufferedReader reader = new BufferedReader(extractor.getReader(source));) {
+        try (BufferedReader reader = new BufferedReader(sourceReader)) {
            Chunker chunker = new Chunker(reader);
            for (Chunk chunk : chunker) {
                if (context != null && context.fileIngestIsCancelled()) {
@ -173,18 +162,18 @@ class Ingester {
                    indexChunk(chunk.toString(), sourceName, fields);
                    numChunks++;
                } catch (Ingester.IngesterException ingEx) {
-                    extractor.logWarning("Ingester had a problem with extracted string from file '" //NON-NLS
+                    logger.log(Level.WARNING, "Ingester had a problem with extracted string from file '" //NON-NLS
                            + sourceName + "' (id: " + sourceID + ").", ingEx);//NON-NLS
                    throw ingEx; //need to rethrow to signal error and move on
                }
            }
            if (chunker.hasException()) {
-                extractor.logWarning("Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
+                logger.log(Level.WARNING, "Error chunking content from " + sourceID + ": " + sourceName, chunker.getException());
                return false;
            }
        } catch (Exception ex) {
-            extractor.logWarning("Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
+            logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceID + ": " + sourceName, ex);//NON-NLS
            return false;
        } finally {
            if (context != null && context.fileIngestIsCancelled()) {
@ -371,7 +360,7 @@ class Ingester {
            Map<String, String> params = new HashMap<>();
            params.put(Server.Schema.ID.toString(), Long.toString(artifact.getArtifactID()));
            try {
-                params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(ArtifactTextExtractor.getDataSource(artifact).getId()));
+                params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(artifact.getDataSource().getId()));
            } catch (TskCoreException ex) {
                logger.log(Level.SEVERE, "Could not get data source id to properly index the artifact " + artifact.getArtifactID(), ex); //NON-NLS
                params.put(Server.Schema.IMAGE_ID.toString(), Long.toString(-1));
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalLanguageSettingsPanel.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchGlobalLanguageSettingsPanel.java
@ -35,6 +35,7 @@ import org.sleuthkit.autopsy.coreutils.PlatformUtil;
 import org.sleuthkit.autopsy.coreutils.StringExtract;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
 import org.sleuthkit.autopsy.ingest.IngestManager;
 import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.StringsExtractOptions;
 /**
 * Child panel of the global settings panel (Languages tab).
@ -45,7 +46,7 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
    private final Map<String, StringExtract.StringExtractUnicodeTable.SCRIPT> scripts = new HashMap<>();
    private ActionListener updateLanguagesAction;
    private List<SCRIPT> toUpdate;
-
+    
    KeywordSearchGlobalLanguageSettingsPanel() {
        initComponents();
        customizeComponents();
@ -125,12 +126,12 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
    private void reloadScriptsCheckBoxes() {
        boolean utf16
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsExtractOptions.EXTRACT_UTF16.toString()));
        enableUTF16Checkbox.setSelected(utf16);
        boolean utf8
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsExtractOptions.EXTRACT_UTF8.toString()));
        enableUTF8Checkbox.setSelected(utf8);
        boolean ocr = KeywordSearchSettings.getOcrOption();
@ -152,12 +153,12 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
        reloadScriptsCheckBoxes();
        boolean utf16
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsExtractOptions.EXTRACT_UTF16.toString()));
        enableUTF16Checkbox.setSelected(utf16);
        boolean utf8
-                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString()));
+                = Boolean.parseBoolean(KeywordSearchSettings.getStringExtractOption(StringsExtractOptions.EXTRACT_UTF8.toString()));
        enableUTF8Checkbox.setSelected(utf8);
        final boolean extractEnabled = utf16 || utf8;
@ -316,9 +317,9 @@ class KeywordSearchGlobalLanguageSettingsPanel extends javax.swing.JPanel implem
    @Override
    public void store() {
-        KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString(),
+        KeywordSearchSettings.setStringExtractOption(StringsExtractOptions.EXTRACT_UTF8.toString(),
                Boolean.toString(enableUTF8Checkbox.isSelected()));
-        KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(),
+        KeywordSearchSettings.setStringExtractOption(StringsExtractOptions.EXTRACT_UTF16.toString(),
                Boolean.toString(enableUTF16Checkbox.isSelected()));
        KeywordSearchSettings.setOcrOption(enableOcrCheckbox.isSelected());
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
@ -18,14 +18,18 @@
 */
 package org.sleuthkit.autopsy.keywordsearch;
-import java.util.ArrayList;
+import com.google.common.collect.ImmutableList;
 import java.io.Reader;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.logging.Level;
 import org.openide.util.Exceptions;
 import org.openide.util.Lookup;
 import org.openide.util.NbBundle;
 import org.openide.util.NbBundle.Messages;
 import org.openide.util.lookup.Lookups;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
 import org.sleuthkit.autopsy.coreutils.Logger;
@ -37,9 +41,15 @@ import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
 import org.sleuthkit.autopsy.ingest.IngestModuleReferenceCounter;
 import org.sleuthkit.autopsy.ingest.IngestServices;
 import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
 import org.sleuthkit.autopsy.keywordsearch.TextFileExtractor.TextFileExtractorException;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
 import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
 import org.sleuthkit.autopsy.textextractors.TextExtractor;
 import org.sleuthkit.autopsy.textextractors.TextExtractor.ExtractionException;
 import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
 import org.sleuthkit.autopsy.textextractors.extractionconfigs.ImageFileExtractionConfig;
 import org.sleuthkit.autopsy.textextractors.extractionconfigs.DefaultExtractionConfig;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.TskData;
 import org.sleuthkit.datamodel.TskData.FileKnown;
@ -61,6 +71,52 @@ import org.sleuthkit.datamodel.TskData.FileKnown;
    "CannotRunFileTypeDetection=Unable to run file type detection."
 })
 public final class KeywordSearchIngestModule implements FileIngestModule {
    /** generally text extractors should ignore archives and let unpacking
     * modules take care of them */
    public static final List<String> ARCHIVE_MIME_TYPES
            = ImmutableList.of(
                    //ignore unstructured binary and compressed data, for which string extraction or unzipper works better
                    "application/x-7z-compressed", //NON-NLS
                    "application/x-ace-compressed", //NON-NLS
                    "application/x-alz-compressed", //NON-NLS
                    "application/x-arj", //NON-NLS
                    "application/vnd.ms-cab-compressed", //NON-NLS
                    "application/x-cfs-compressed", //NON-NLS
                    "application/x-dgc-compressed", //NON-NLS
                    "application/x-apple-diskimage", //NON-NLS
                    "application/x-gca-compressed", //NON-NLS
                    "application/x-dar", //NON-NLS
                    "application/x-lzx", //NON-NLS
                    "application/x-lzh", //NON-NLS
                    "application/x-rar-compressed", //NON-NLS
                    "application/x-stuffit", //NON-NLS
                    "application/x-stuffitx", //NON-NLS
                    "application/x-gtar", //NON-NLS
                    "application/x-archive", //NON-NLS
                    "application/x-executable", //NON-NLS
                    "application/x-gzip", //NON-NLS
                    "application/zip", //NON-NLS
                    "application/x-zoo", //NON-NLS
                    "application/x-cpio", //NON-NLS
                    "application/x-shar", //NON-NLS
                    "application/x-tar", //NON-NLS
                    "application/x-bzip", //NON-NLS
                    "application/x-bzip2", //NON-NLS
                    "application/x-lzip", //NON-NLS
                    "application/x-lzma", //NON-NLS
                    "application/x-lzop", //NON-NLS
                    "application/x-z", //NON-NLS
                    "application/x-compress"); //NON-NLS
    /**
     * Options for this extractor
     */
    enum StringsExtractOptions {
        EXTRACT_UTF16, ///< extract UTF16 text, true/false
        EXTRACT_UTF8, ///< extract UTF8 text, true/false
    };
    enum UpdateFrequency {
@ -89,13 +145,10 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
    //accessed read-only by searcher thread
    private boolean startedSearching = false;
-    private List<ContentTextExtractor> textExtractors;
+    private Lookup stringsExtractionContext;
    private StringsTextExtractor stringExtractor;
    private TextFileExtractor txtFileExtractor;
    private final KeywordSearchJobSettings settings;
    private boolean initialized = false;
    private long jobId;
    private long dataSourceId;
    private static final AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging
    private int instanceNum = 0;
    private static final IngestModuleReferenceCounter refCounter = new IngestModuleReferenceCounter();
@ -152,7 +205,6 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
    public void startUp(IngestJobContext context) throws IngestModuleException {
        initialized = false;
        jobId = context.getJobId();
        dataSourceId = context.getDataSource().getId();
        Server server = KeywordSearch.getServer();
        if (server.coreIsOpen() == false) {
@ -238,22 +290,15 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
                }
            }
        }
-
+        
-        //initialize extractors
+        DefaultExtractionConfig stringsConfig = new DefaultExtractionConfig();
-        stringExtractor = new StringsTextExtractor();
+        Map<String, String> stringsOptions = KeywordSearchSettings.getStringExtractOptions();
-        stringExtractor.setScripts(KeywordSearchSettings.getStringExtractScripts());
+        stringsConfig.setExtractUTF8(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF8.toString())));
-        stringExtractor.setOptions(KeywordSearchSettings.getStringExtractOptions());
+        stringsConfig.setExtractUTF16(Boolean.parseBoolean(stringsOptions.get(StringsExtractOptions.EXTRACT_UTF16.toString())));
-
+        stringsConfig.setExtractScripts(KeywordSearchSettings.getStringExtractScripts());
-        txtFileExtractor = new TextFileExtractor();
+        
-
+        stringsExtractionContext = Lookups.fixed(stringsConfig);
-        textExtractors = new ArrayList<>();
+        
        //order matters, more specific extractors first
        textExtractors.add(new HtmlTextExtractor());
        //Add sqlite text extractor to be default for sqlite files, since tika stuggles 
        //with them. See SqliteTextExtractor class for specifics
        textExtractors.add(new SqliteTextExtractor());
        textExtractors.add(new TikaTextExtractor());
        indexer = new Indexer();
        initialized = true;
    }
@ -345,10 +390,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
     * Common cleanup code when module stops or final searcher completes
     */
    private void cleanup() {
-        textExtractors.clear();
+        stringsExtractionContext = null;
        textExtractors = null;
        stringExtractor = null;
        txtFileExtractor = null;
        initialized = false;
    }
@ -436,24 +478,18 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
         * @throws IngesterException exception thrown if indexing failed
         */
        private boolean extractTextAndIndex(AbstractFile aFile, String detectedFormat) throws IngesterException {
-            ContentTextExtractor extractor = null;
+            ImageFileExtractionConfig imageConfig = new ImageFileExtractionConfig();
-
+            imageConfig.setOCREnabled(KeywordSearchSettings.getOcrOption());
-            //go over available text extractors in order, and pick the first one (most specific one)
+            Lookup extractionContext = Lookups.fixed(imageConfig);
-            for (ContentTextExtractor fe : textExtractors) {
+            
-                if (fe.isSupported(aFile, detectedFormat)) {
+            try {
-                    extractor = fe;
+                Reader specializedReader = TextExtractorFactory.getExtractor(aFile,extractionContext).getReader();
-                    break;
+                //divide into chunks and index
-                }
+                return Ingester.getDefault().indexText(specializedReader,aFile.getId(),aFile.getName(), aFile, context);
-            }
+            } catch (TextExtractorFactory.NoTextExtractorFound | ExtractionException ex) {
-
+                //No text extractor found... run the default instead
            if (extractor == null) {
                // No text extractor found.
                return false;
            }
            //logger.log(Level.INFO, "Extractor: " + fileExtract + ", file: " + aFile.getName());
            //divide into chunks and index
            return Ingester.getDefault().indexText(extractor, aFile, context);
        }
        /**
@ -469,7 +505,8 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
                if (context.fileIngestIsCancelled()) {
                    return true;
                }
-                if (Ingester.getDefault().indexText(stringExtractor, aFile, KeywordSearchIngestModule.this.context)) {
+                Reader stringsReader = TextExtractorFactory.getDefaultExtractor(aFile, stringsExtractionContext).getReader();
                if (Ingester.getDefault().indexText(stringsReader,aFile.getId(),aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {
                    putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
                    return true;
                } else {
@ -477,7 +514,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
                    putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
                    return false;
                }
-            } catch (IngesterException ex) {
+            } catch (IngesterException | ExtractionException ex) {
                logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);  //NON-NLS
                putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
                return false;
@ -529,7 +566,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
            // we skip archive formats that are opened by the archive module. 
            // @@@ We could have a check here to see if the archive module was enabled though...
-            if (ContentTextExtractor.ARCHIVE_MIME_TYPES.contains(fileType)) {
+            if (ARCHIVE_MIME_TYPES.contains(fileType)) {
                try {
                    if (context.fileIngestIsCancelled()) {
                        return;
@ -577,11 +614,13 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
                //Carved Files should be the only type of unallocated files capable of a txt extension and 
                //should be ignored by the TextFileExtractor because they may contain more than one text encoding
                try {
-                    if (Ingester.getDefault().indexText(txtFileExtractor, aFile, context)) {
+                    TextFileExtractor textFileExtractor = new TextFileExtractor();
                    Reader textReader = textFileExtractor.getReader(aFile);
                    if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
                        putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
                        wasTextAdded = true;
                    }
-                } catch (IngesterException ex) {
+                } catch (IngesterException | TextFileExtractorException ex) {
                    logger.log(Level.WARNING, "Unable to index as unicode", ex);
                }
            }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchJobSettingsPanel.java
@ -31,6 +31,7 @@ import javax.swing.table.TableColumn;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
 import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettings;
 import org.sleuthkit.autopsy.ingest.IngestModuleIngestJobSettingsPanel;
 import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.StringsExtractOptions;
 /**
 * Ingest job settings panel for keyword search file ingest modules.
@ -102,8 +103,8 @@ public final class KeywordSearchJobSettingsPanel extends IngestModuleIngestJobSe
    }
    private void displayEncodings() {
-        String utf8 = KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString());
+        String utf8 = KeywordSearchSettings.getStringExtractOption(StringsExtractOptions.EXTRACT_UTF8.toString());
-        String utf16 = KeywordSearchSettings.getStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString());
+        String utf16 = KeywordSearchSettings.getStringExtractOption(StringsExtractOptions.EXTRACT_UTF16.toString());
        ArrayList<String> encodingsList = new ArrayList<>();
        if (utf8 == null || Boolean.parseBoolean(utf8)) {
            encodingsList.add("UTF8");
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchSettings.java
@ -28,6 +28,7 @@ import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.ModuleSettings;
 import org.sleuthkit.autopsy.coreutils.StringExtract;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
 import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.StringsExtractOptions;
 import org.sleuthkit.autopsy.keywordsearch.KeywordSearchIngestModule.UpdateFrequency;
 //This file contains constants and settings for KeywordSearch
@ -234,14 +235,14 @@ class KeywordSearchSettings {
            KeywordSearchSettings.setUpdateFrequency(UpdateFrequency.DEFAULT);
        }
        //setting default Extract UTF8
-        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString())) {
+        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, StringsExtractOptions.EXTRACT_UTF8.toString())) {
            logger.log(Level.INFO, "No configuration for UTF8 found, generating default..."); //NON-NLS
-            KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF8.toString(), Boolean.TRUE.toString());
+            KeywordSearchSettings.setStringExtractOption(StringsExtractOptions.EXTRACT_UTF8.toString(), Boolean.TRUE.toString());
        }
        //setting default Extract UTF16
-        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString())) {
+        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, StringsExtractOptions.EXTRACT_UTF16.toString())) {
            logger.log(Level.INFO, "No configuration for UTF16 found, generating defaults..."); //NON-NLS
-            KeywordSearchSettings.setStringExtractOption(StringsTextExtractor.ExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString());
+            KeywordSearchSettings.setStringExtractOption(StringsExtractOptions.EXTRACT_UTF16.toString(), Boolean.TRUE.toString());
        }
        //setting OCR default (disabled by default)
        if (!ModuleSettings.settingExists(KeywordSearchSettings.PROPERTIES_OPTIONS, OCR_ENABLED)) {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch;
 import java.io.File;
 import java.io.IOException;
 import java.io.Reader;
 import java.lang.reflect.InvocationTargetException;
 import java.net.InetAddress;
 import java.util.ArrayList;
@ -45,6 +46,9 @@ import org.sleuthkit.autopsy.appservices.AutopsyService;
 import org.sleuthkit.autopsy.progress.ProgressIndicator;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
 import org.sleuthkit.autopsy.textextractors.TextExtractor.ExtractionException;
 import org.sleuthkit.autopsy.textextractors.TextExtractor;
 import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.TskCoreException;
@ -112,19 +116,24 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
                return;
            }
            try {
-                ingester.indexMetaDataOnly(artifact);
+                Reader blackboardReader = TextExtractorFactory
-                ingester.indexText(new ArtifactTextExtractor(), artifact, null);
+                        .getExtractor(content, null).getReader();
-            } catch (Ingester.IngesterException ex) {
+                String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
                ingester.indexMetaDataOnly(artifact, sourceName);
                ingester.indexText(blackboardReader, artifact.getArtifactID(), sourceName, content, null);
            } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | ExtractionException ex) {
                throw new TskCoreException(ex.getCause().getMessage(), ex);
            }
        } else {
            try {
-                ingester.indexText(new TikaTextExtractor(), content, null);
+                Reader contentReader = TextExtractorFactory
-            } catch (Ingester.IngesterException ex) {
+                        .getExtractor(content, null).getReader();
                ingester.indexText(contentReader, content.getId(), content.getName(), content, null);
            } catch (TextExtractorFactory.NoTextExtractorFound | ExtractionException | Ingester.IngesterException ex) {
                try {
                    // Try the StringsTextExtractor if Tika extractions fails.
-                    ingester.indexText(new StringsTextExtractor(), content, null);
+                    ingester.indexText(TextExtractorFactory.getDefaultExtractor(content, null).getReader(),content.getId(),content.getName(), content, null);
-                } catch (Ingester.IngesterException ex1) {
+                } catch (Ingester.IngesterException | ExtractionException ex1) {
                    throw new TskCoreException(ex.getCause().getMessage(), ex1);
                }
            }
@ -437,9 +446,12 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
        final Ingester ingester = Ingester.getDefault();
        try {
-            ingester.indexMetaDataOnly(artifact);
+            String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
-            ingester.indexText(new ArtifactTextExtractor(), artifact, null);
+            Reader contentSpecificReader = 
-        } catch (Ingester.IngesterException ex) {
+                    TextExtractorFactory.getExtractor((Content) artifact, null).getReader();
            ingester.indexMetaDataOnly(artifact, sourceName);
            ingester.indexText(contentSpecificReader, artifact.getId(), sourceName, artifact, null);
        } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | ExtractionException ex) {
            throw new TskCoreException(ex.getCause().getMessage(), ex);
        }
    }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextExtractor.java
@ -1,88 +0,0 @@
 /*
 * Autopsy Forensic Browser
 *
 * Copyright 2011-16 Basis Technology Corp.
 * Contact: carrier <at> sleuthkit <dot> org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.sleuthkit.autopsy.keywordsearch;
 import java.io.Reader;
 import org.sleuthkit.datamodel.SleuthkitVisitableItem;
 /**
 * Extracts text out of a SleuthkitVisitableItem, and exposes it is a Reader.
 * This Reader is given to the Ingester to chunk and index in Solr.
 *
 * @param <TextSource> The subtype of SleuthkitVisitableItem an implementation
 *                     is able to process.
 */
 interface TextExtractor< TextSource extends SleuthkitVisitableItem> {
    /**
     * Is this extractor configured such that no extraction will/should be done?
     *
     * @return True if this extractor will/should not perform any extraction.
     */
    abstract boolean isDisabled();
    /**
     * Log the given message and exception as a warning.
     *
     * @param msg
     * @param ex
     */
    abstract void logWarning(String msg, Exception ex);
    /**
     * Get a reader that over the text extracted from the given source.
     *
     * @param stream
     * @param source
     *
     * @return
     *
     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
     */
    abstract Reader getReader(TextSource source) throws TextExtractorException;
    /**
     * Get the 'object' id of the given source.
     *
     * @param source
     *
     * @return
     */
    abstract long getID(TextSource source);
    /**
     * Get a human readable name for the given source.
     *
     * @param source
     *
     * @return
     */
    abstract String getName(TextSource source);
    class TextExtractorException extends Exception {
        public TextExtractorException(String message) {
            super(message);
        }
        public TextExtractorException(String message, Throwable cause) {
            super(message, cause);
        }
    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextFileExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextFileExtractor.java
@ -21,17 +21,15 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.BufferedInputStream;
 import java.io.Reader;
 import java.util.logging.Level;
 import org.apache.tika.parser.txt.CharsetDetector;
 import org.apache.tika.parser.txt.CharsetMatch;
-import org.sleuthkit.autopsy.coreutils.Logger;
+import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.ReadContentInputStream;
 /**
 * Extract text from .txt files
 */
-final class TextFileExtractor extends ContentTextExtractor {
+final class TextFileExtractor {
    //Set a Minimum confidence value to reject matches that may not have a valid text encoding
    //Values of valid text encodings were generally 100, xml code sometimes had a value around 50, 
@ -39,44 +37,30 @@ final class TextFileExtractor extends ContentTextExtractor {
    //This limited information was used to select the current value as one that would filter out clearly non-text 
    //files while hopefully working on all files with a valid text encoding
    static final private int MIN_MATCH_CONFIDENCE = 20;
    static final private Logger logger = Logger.getLogger(TextFileExtractor.class.getName());
-    @Override
+    public Reader getReader(AbstractFile source) throws TextFileExtractorException {
    boolean isContentTypeSpecific() {
        return true;
    }
    @Override
    boolean isSupported(Content file, String detectedFormat) {
        return true;
    }
    @Override
    public Reader getReader(Content source) throws TextExtractorException {
        CharsetDetector detector = new CharsetDetector();
        //wrap stream in a BufferedInputStream so that it supports the mark/reset methods necessary for the CharsetDetector
        InputStream stream = new BufferedInputStream(new ReadContentInputStream(source));
        try {
            detector.setText(stream);
        } catch (IOException ex) {
-            throw new TextExtractorException("Unable to get string from detected text in TextFileExtractor", ex);
+            throw new TextFileExtractorException("Unable to get string from detected text in TextFileExtractor", ex);
        }
        CharsetMatch match = detector.detect();
        if (match.getConfidence() < MIN_MATCH_CONFIDENCE) {
-            throw new TextExtractorException("Text does not match any character set with a high enough confidence for TextFileExtractor");
+            throw new TextFileExtractorException("Text does not match any character set with a high enough confidence for TextFileExtractor");
        }
        return match.getReader();
    }
-
+    
-    @Override
+    public class TextFileExtractorException extends Exception {
-    public boolean isDisabled() {
+        public TextFileExtractorException(String msg, Throwable ex) {
-        return false;
+            super(msg, ex);
        }
        public TextFileExtractorException(String msg) {
            super(msg);
        }
    }
    @Override
    public void logWarning(String msg, Exception ex) {
        logger.log(Level.WARNING, msg, ex);
    }
 }