Renamed TextExtractor public API

2025-07-17 18:17:43 +00:00 · 2018-12-19 14:06:26 -05:00 · 2018-12-19 14:06:26 -05:00 · 1e1e166f55
commit 1e1e166f55
parent 55f7003246
15 changed files with 251 additions and 244 deletions
--- a/Core/nbproject/project.xml
+++ b/Core/nbproject/project.xml
@ -338,8 +338,8 @@
                <package>org.sleuthkit.autopsy.modules.vmextractor</package>
                <package>org.sleuthkit.autopsy.progress</package>
                <package>org.sleuthkit.autopsy.report</package>
-                <package>org.sleuthkit.autopsy.textreaders</package>
-                <package>org.sleuthkit.autopsy.textreaders.textreaderconfigs</package>
+                <package>org.sleuthkit.autopsy.textextractors</package>
+                <package>org.sleuthkit.autopsy.textextractors.textextractorconfigs</package>
                <package>org.sleuthkit.autopsy.texttranslation</package>
                <package>org.sleuthkit.datamodel</package>
            </public-packages>
--- a/Core/src/org/sleuthkit/autopsy/textextractors/ArtifactTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/ArtifactTextExtractor.java
@ -16,7 +16,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders;
+package org.sleuthkit.autopsy.textextractors;

 import java.io.InputStreamReader;
 import java.io.Reader;
@ -83,7 +83,7 @@ class ArtifactTextExtractor extends TextExtractor {
    }

    @Override
-    public boolean isSupported(Content file, String detectedFormat) {
+    public boolean isSupported() {
        return true;
    }
 }
--- a/Core/src/org/sleuthkit/autopsy/textextractors/Bundle.properties
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/Bundle.properties
--- a/Core/src/org/sleuthkit/autopsy/textextractors/Bundle_ja.properties
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/Bundle_ja.properties
--- a/Core/src/org/sleuthkit/autopsy/textextractors/HtmlTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/HtmlTextExtractor.java
@ -16,7 +16,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders;
+package org.sleuthkit.autopsy.textextractors;

 import java.io.IOException;
 import java.io.Reader;
@ -32,6 +32,7 @@ import net.htmlparser.jericho.Source;
 import net.htmlparser.jericho.StartTag;
 import net.htmlparser.jericho.StartTagType;
 import org.sleuthkit.autopsy.coreutils.Logger;
+import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.ReadContentInputStream;

@ -42,7 +43,7 @@ final class HtmlTextExtractor extends TextExtractor {

    static final private Logger logger = Logger.getLogger(HtmlTextExtractor.class.getName());
    private final int MAX_SIZE;
-    private final Content file;
+    private final AbstractFile file;

    static final List<String> WEB_MIME_TYPES = Arrays.asList(
            "application/javascript", //NON-NLS
@ -62,7 +63,7 @@ final class HtmlTextExtractor extends TextExtractor {
     * Creates a default instance of the HtmlTextExtractor. Supported file size
     * is 50MB.
     */
-    public HtmlTextExtractor(Content file) {
+    public HtmlTextExtractor(AbstractFile file) {
        //Set default to be 50 MB.
        MAX_SIZE = 50_000_000;
        this.file = file;
@ -77,10 +78,10 @@ final class HtmlTextExtractor extends TextExtractor {
     * @return flag indicating support
     */
    @Override
-    public boolean isSupported(Content content, String detectedFormat) {
-        return detectedFormat != null
-                && WEB_MIME_TYPES.contains(detectedFormat)
-                && content.getSize() <= MAX_SIZE;
+    public boolean isSupported() {
+        return file.getMIMEType() != null
+                && WEB_MIME_TYPES.contains(file.getMIMEType())
+                && file.getSize() <= MAX_SIZE;
    }

    /**
--- a/Core/src/org/sleuthkit/autopsy/textextractors/SqliteTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/SqliteTextExtractor.java
@ -16,7 +16,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders;
+package org.sleuthkit.autopsy.textextractors;

 import java.io.IOException;
 import java.io.Reader;
@ -28,7 +28,6 @@ import org.sleuthkit.autopsy.coreutils.SQLiteTableReaderException;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.SQLiteTableReader;
 import org.sleuthkit.datamodel.AbstractFile;
-import org.sleuthkit.datamodel.Content;

 /**
 * Extracts text from SQLite database files.
@ -45,8 +44,8 @@ final class SqliteTextExtractor extends TextExtractor {
    private static final Logger logger = Logger.getLogger(SqliteTextExtractor.class.getName());
    private final AbstractFile file;

-    public SqliteTextExtractor(Content file) {
-        this.file = (AbstractFile) file;
+    public SqliteTextExtractor(AbstractFile file) {
+        this.file = file;
    }
    /**
     * Supports only the sqlite mimetypes
@ -57,8 +56,8 @@ final class SqliteTextExtractor extends TextExtractor {
     * @return true if x-sqlite3
     */
    @Override
-    public boolean isSupported(Content file, String detectedFormat) {
-        return SQLITE_MIMETYPE.equals(detectedFormat);
+    public boolean isSupported() {
+        return SQLITE_MIMETYPE.equals(file.getMIMEType());
    }

    /**
--- a/Core/src/org/sleuthkit/autopsy/textextractors/StringsTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/StringsTextExtractor.java
@ -16,7 +16,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders;
+package org.sleuthkit.autopsy.textextractors;

 import java.io.IOException;
 import java.io.InputStream;
@ -28,7 +28,7 @@ import java.util.Objects;
 import org.openide.util.Lookup;
 import org.sleuthkit.autopsy.coreutils.StringExtract;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
-import org.sleuthkit.autopsy.textreaders.textreaderconfigs.StringsConfig;
+import org.sleuthkit.autopsy.textextractors.textextractorconfigs.StringsConfig;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.TskCoreException;
 import org.sleuthkit.datamodel.TskException;
@ -36,7 +36,7 @@ import org.sleuthkit.datamodel.TskException;
 /**
 * Extracts raw strings from content.
 */
-final class StringsTextExtractor {
+final class StringsTextExtractor extends TextExtractor {

    private boolean extractUTF8;
    private boolean extractUTF16;
@ -81,6 +81,7 @@ final class StringsTextExtractor {
     * @throws
     * org.sleuthkit.autopsy.textextractors.TextExtractor.TextExtractorException
     */
+    @Override
    public InputStreamReader getReader() {
        InputStream stringStream = getInputStream(content);
        return new InputStreamReader(stringStream, Charset.forName(DEFAULT_INDEXED_TEXT_CHARSET));
@ -104,6 +105,7 @@ final class StringsTextExtractor {
     *
     * @param context Lookup instance containing config classes
     */
+    @Override
    public void setExtractionSettings(Lookup context) {
        if (context != null) {
            StringsConfig configInstance = context.lookup(StringsConfig.class);
@ -126,14 +128,11 @@ final class StringsTextExtractor {
     *
     * @return
     */
-    public boolean isEnabled() {
+    @Override
+    public boolean isSupported() {
        return extractUTF8 || extractUTF16;
    }
-
-    boolean isSupported(Content file, String detectedFormat) {
-        throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
-    }
-
+    
    /**
     * Content input string stream reader/converter - given Content, extract
     * strings from it and return encoded bytes via read()
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractor.java
@ -16,38 +16,26 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders;
+package org.sleuthkit.autopsy.textextractors;

 import java.io.Reader;
 import org.openide.util.Lookup;
-import org.sleuthkit.datamodel.Content;

 /**
 * Extracts the text out of Content instances and exposes them as a Reader.
 * Concrete implementations can be obtained from
 * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory}
 */
-abstract class TextExtractor {
+public abstract class TextExtractor {

    /**
-     * Determines if the file content is supported by the extractor.
+     * Determines if this extractor supports the given Content and
+     * configurations passed into it in
+     * {@link org.sleuthkit.autopsy.textextractors.TextExtractorFactory}.
     *
-     * @param file           to test if its content should be supported
-     * @param detectedFormat mime-type with detected format (such as text/plain)
-     *                       or null if not detected
-     *
-     * @return true if the file content is supported, false otherwise
+     * @return true if content is supported, false otherwise
     */
-    abstract boolean isSupported(Content file, String detectedFormat);
-
-    /**
-     * Determines if the TextExtractor instance is enabled to read content.
-     *
-     * @return
-     */
-    boolean isEnabled() {
-        return true;
-    }
+    abstract boolean isSupported();

    /**
     * Get a Reader that will iterate over the text extracted from the Content
@ -75,8 +63,7 @@ abstract class TextExtractor {
    }

    /**
-     * Exception encountered during
-     * {@link org.sleuthkit.autopsy.textextractors.TextExtractor#getReader()}.
+     * Exception encountered during TextExtractor.getReader().
     * This indicates that there was an internal parsing error that occurred
     * during the reading of Content text.
     */
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractorFactory.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextExtractorFactory.java
@ -0,0 +1,160 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2018-2018 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.sleuthkit.autopsy.textextractors;
+
+import java.util.Arrays;
+import java.util.List;
+import org.openide.util.Lookup;
+import org.sleuthkit.datamodel.AbstractFile;
+import org.sleuthkit.datamodel.BlackboardArtifact;
+import org.sleuthkit.datamodel.Content;
+import org.sleuthkit.datamodel.Report;
+
+/**
+ * Factory for creating TextExtractors given a Content instance
+ *
+ * See {@link org.sleuthkit.autopsy.textextractors.textextractorconfigs} for
+ * available extractor configuration options.
+ *
+ * @see org.openide.util.Lookup
+ */
+public class TextExtractorFactory {
+
+    /**
+     * Returns a TextExtractor containing the Content text. Configuration files
+     * can be added to the Lookup.
+     *
+     * See {@link org.sleuthkit.autopsy.textextractors.textextractorconfigs} for
+     * available extractor configuration options.
+     *
+     * @param content Content source that will be read from
+     * @param context Contains extraction configurations for certain file types
+     *
+     * @return TextExtractor containing file text
+     *
+     * @throws NoTextExtractorFound Encountered when there is no Reader found
+     *                              for the given content type or there was an
+     *                              error while creating the reader.
+     *
+     * @see org.openide.util.Lookup
+     */
+    public static TextExtractor getExtractor(Content content, Lookup context) throws NoTextExtractorFound {
+        if (content instanceof AbstractFile) {
+            for (TextExtractor extractor : getFileExtractors((AbstractFile) content, context)) {
+                if (extractor.isSupported()) {
+                    return extractor;
+                }
+            }
+        } else if (content instanceof BlackboardArtifact) {
+            TextExtractor artifactExtractor = new ArtifactTextExtractor((BlackboardArtifact) content);
+            artifactExtractor.setExtractionSettings(context);
+            return artifactExtractor;
+        } else if (content instanceof Report) {
+            TextExtractor reportExtractor = new TikaTextExtractor(content);
+            reportExtractor.setExtractionSettings(context);
+            return reportExtractor;
+        }
+
+        throw new NoTextExtractorFound(
+                String.format("Could not find a suitable reader for "
+                        + "content with name [%s] and id=[%d]. Try using "
+                        + "the strings extractor instead.",
+                        content.getName(), content.getId())
+        );
+    }
+
+    /**
+     * Initializes, orders, and returns all file extractors that can read
+     * AbstractFile instances.
+     *
+     * @param content AbstractFile content
+     * @param context Lookup containing extractor configurations
+     *
+     * @return
+     */
+    private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) {
+        List<TextExtractor> fileExtractors = Arrays.asList(
+                new HtmlTextExtractor(content),
+                new SqliteTextExtractor(content),
+                new TikaTextExtractor(content));
+
+        fileExtractors.forEach((fileExtractor) -> {
+            fileExtractor.setExtractionSettings(context);
+        });
+
+        return fileExtractors;
+    }
+
+    /**
+     * Returns a TextExtractor containing the Content text.
+     *
+     * @param content Content instance that will be read from
+     *
+     * @return TextExtractor containing file text
+     *
+     * @throws NoTextExtractorFound Encountered when there is no Reader was
+     *                              found for the given content type. Use
+     *                              getStringsExtractor(Content,Lookup) method
+     *                              instead.
+     */
+    public static TextExtractor getExtractor(Content content) throws NoTextExtractorFound {
+        return TextExtractorFactory.getExtractor(content, null);
+    }
+
+    /**
+     * Returns a TextExtractor containing the Content strings. This method
+     * supports all content types. This method should be used as a backup in the
+     * event that no reader was found using getExtractor(Content) or
+     * getExtractor(Content, Lookup).
+     *
+     * Configure this extractor with the StringsConfig in
+     * {@link org.sleuthkit.autopsy.textextractors.textextractorconfigs}
+     *
+     * @param content Content source to read from
+     * @param context Contains extraction configurations for certain file types
+     *
+     * @return TextExtractor containing file text
+     *
+     * @see org.openide.util.Lookup
+     */
+    public static TextExtractor getStringsExtractor(Content content, Lookup context) {
+        StringsTextExtractor stringsInstance = new StringsTextExtractor(content);
+        stringsInstance.setExtractionSettings(context);
+        return stringsInstance;
+    }
+
+    /**
+     * System level exception for handling content types that have no specific
+     * strategy defined for extracting their text.
+     */
+    public static class NoTextExtractorFound extends Exception {
+
+        public NoTextExtractorFound(String msg) {
+            super(msg);
+        }
+
+        public NoTextExtractorFound(Throwable ex) {
+            super(ex);
+        }
+
+        private NoTextExtractorFound(String msg, Throwable ex) {
+            super(msg, ex);
+        }
+    }
+}
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
@ -16,7 +16,7 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders;
+package org.sleuthkit.autopsy.textextractors;

 import com.google.common.collect.ImmutableList;
 import com.google.common.io.CharSource;
@ -61,7 +61,7 @@ import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
 import org.sleuthkit.autopsy.coreutils.ExecUtil;
 import org.sleuthkit.autopsy.coreutils.ExecUtil.ProcessTerminator;
 import org.sleuthkit.autopsy.coreutils.PlatformUtil;
-import org.sleuthkit.autopsy.textreaders.textreaderconfigs.ImageConfig;
+import org.sleuthkit.autopsy.textextractors.textextractorconfigs.ImageConfig;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.Content;
@ -318,7 +318,7 @@ final class TikaTextExtractor extends TextExtractor {
            }
        }
    }
-
+    
    /**
     * Wraps the creation of a TikaReader into a Future so that it can be
     * cancelled.
@ -422,24 +422,27 @@ final class TikaTextExtractor extends TextExtractor {
    }

    /**
-     * Determines if Tika is supported for this content type and mimetype.
-     *
-     * @param content        Source content to read
-     * @param detectedFormat Mimetype of content
+     * Determines if Tika is enabled for this content
     *
     * @return Flag indicating support for reading content type
     */
    @Override
-    public boolean isSupported(Content content, String detectedFormat) {
-        if (detectedFormat == null
-                || BINARY_MIME_TYPES.contains(detectedFormat) //any binary unstructured blobs (string extraction will be used)
-                || ARCHIVE_MIME_TYPES.contains(detectedFormat)
-                || (detectedFormat.startsWith("video/") && !detectedFormat.equals("video/x-flv")) //skip video other than flv (tika supports flv only) //NON-NLS
-                || detectedFormat.equals(SQLITE_MIMETYPE) //Skip sqlite files, Tika cannot handle virtual tables and will fail with an exception. //NON-NLS
+    public boolean isSupported() {
+        if(!(content instanceof AbstractFile)) {
+            return false;
+        }
+        
+        String detectedType = ((AbstractFile)content).getMIMEType();
+        if (detectedType == null
+                || BINARY_MIME_TYPES.contains(detectedType) //any binary unstructured blobs (string extraction will be used)
+                || ARCHIVE_MIME_TYPES.contains(detectedType)
+                || (detectedType.startsWith("video/") && !detectedType.equals("video/x-flv")) //skip video other than flv (tika supports flv only) //NON-NLS
+                || detectedType.equals(SQLITE_MIMETYPE) //Skip sqlite files, Tika cannot handle virtual tables and will fail with an exception. //NON-NLS
                ) {
            return false;
        }
-        return TIKA_SUPPORTED_TYPES.contains(detectedFormat);
+        
+        return TIKA_SUPPORTED_TYPES.contains(detectedType);
    }

    /**
--- a/Core/src/org/sleuthkit/autopsy/textextractors/textextractorconfigs/ImageConfig.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/textextractorconfigs/ImageConfig.java
@ -16,11 +16,11 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders.textreaderconfigs;
+package org.sleuthkit.autopsy.textextractors.textextractorconfigs;

 /**
- * Allows for configuration of OCR on image files. Readers that use ImageConfig
- * can be obtained through {@link org.sleuthkit.autopsy.textreaders.TextReaders}
+ * Allows for configuration of OCR on image files. Extractors that use ImageConfig
+ * can be obtained through TextExtractoryFactory.getExtractor().
 *
 * @see org.openide.util.Lookup
 */
--- a/Core/src/org/sleuthkit/autopsy/textextractors/textextractorconfigs/StringsConfig.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/textextractorconfigs/StringsConfig.java
@ -16,20 +16,20 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.textreaders.textreaderconfigs;
+package org.sleuthkit.autopsy.textextractors.textextractorconfigs;

 import java.util.List;
 import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;

 /**
- * Allows for configuration of the Reader obtained from
- * {@link org.sleuthkit.autopsy.textreaders.TextReader#getStringsReader(Content, Lookup)}.
+ * Allows for configuration of the TextExtractor obtained from
+ * TextExtractorFactory.getExtractor().
 *
- * The strings reader will read strings from the Content instance. This class
+ * The strings extractor will extract strings from the Content instance. This class
 * allows for the configuration of the encoding and language scripts used during
 * reading.
 *
- * @see org.sleuthkit.autopsy.textreaders.TextReaders
+ * @see org.sleuthkit.autopsy.textextractors.TextExtractorFactory
 * @see
 * org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT
 * @see org.openide.util.Lookup
@ -77,8 +77,8 @@ public class StringsConfig {
    }

    /**
-     * Sets the type of language scripts that will be used during this
-     * reading. See
+     * Sets the type of language scripts that will be used during this reading.
+     * See
     * {@link org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT}
     * for more information about available scripts.
     *
--- a/Core/src/org/sleuthkit/autopsy/textreaders/TextReaders.java
+++ b/Core/src/org/sleuthkit/autopsy/textreaders/TextReaders.java
@ -1,152 +0,0 @@
-/*
- * Autopsy Forensic Browser
- *
- * Copyright 2018-2018 Basis Technology Corp.
- * Contact: carrier <at> sleuthkit <dot> org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.sleuthkit.autopsy.textreaders;
-
-import java.io.Reader;
-import java.util.Arrays;
-import java.util.List;
-import org.openide.util.Lookup;
-import org.sleuthkit.autopsy.textreaders.TextExtractor.ExtractionException;
-import org.sleuthkit.datamodel.AbstractFile;
-import org.sleuthkit.datamodel.BlackboardArtifact;
-import org.sleuthkit.datamodel.Content;
-import org.sleuthkit.datamodel.Report;
-
-/**
- * Factory for creating Readers given a Content instance
- *
- * See {@link org.sleuthkit.autopsy.textreaders.textreaderconfigs} for available
- * Reader configuration options.
- *
- * @see org.openide.util.Lookup
- */
-public class TextReaders {
-
-    /**
-     * Returns a reader containing the Content text. Configuration files can be
-     * added to the Lookup.
-     *
-     * See {@link org.sleuthkit.autopsy.textreaders.textreaderconfigs} for
-     * available Reader configuration options.
-     *
-     * @param content Content source that will be read from
-     * @param context Contains extraction configurations for certain file types
-     *
-     * @return Reader containing file text
-     *
-     * @throws NoTextReaderFound Encountered when there is no Reader found for
-     *                           the given content type or there was an error
-     *                           while creating the reader.
-     *
-     * @see org.openide.util.Lookup
-     */
-    public static Reader getReader(Content content,
-            Lookup context) throws NoTextReaderFound {
-        try {
-            if (content instanceof AbstractFile) {
-                String mimeType = ((AbstractFile) content).getMIMEType();
-                List<TextExtractor> extractors = Arrays.asList(
-                        new HtmlTextExtractor(content),
-                        new SqliteTextExtractor(content),
-                        new TikaTextExtractor(content));
-                for (TextExtractor extractor : extractors) {
-                    extractor.setExtractionSettings(context);
-                    if (extractor.isEnabled() && extractor.isSupported(content, mimeType)) {
-                        return extractor.getReader();
-                    }
-                }
-            } else if (content instanceof BlackboardArtifact) {
-                TextExtractor artifactExtractor = new ArtifactTextExtractor((BlackboardArtifact) content);
-                artifactExtractor.setExtractionSettings(context);
-                return artifactExtractor.getReader();
-            } else if (content instanceof Report) {
-                TextExtractor reportExtractor = new TikaTextExtractor(content);
-                reportExtractor.setExtractionSettings(context);
-                return reportExtractor.getReader();
-            }
-        } catch (ExtractionException ex) {
-            throw new NoTextReaderFound("Error while getting reader", ex);
-        }
-
-        throw new NoTextReaderFound(
-                String.format("Could not find a suitable reader for "
-                        + "content with name [%s] and id=[%d]. Try using "
-                        + "the default reader instead.",
-                        content.getName(), content.getId())
-        );
-    }
-
-    /**
-     * Returns a reader containing the Content text.
-     *
-     * @param content Content instance that will be read from
-     *
-     * @return Reader containing file text
-     *
-     * @throws NoTextReaderFound Encountered when there is no Reader was found
-     *                           for the given content type. Use
-     *                           getStringsReader(Content,Lookup) method
-     *                           instead.
-     */
-    public static Reader getReader(Content content)
-            throws NoTextReaderFound {
-        return TextReaders.getReader(content, null);
-    }
-
-    /**
-     * Returns a Reader containing the Content strings. This method supports all
-     * content types. This method should be used as a backup in the event that
-     * no reader was found using getReader(Content) or getReader(Content,
-     * Lookup).
-     *
-     * Configure this reader with the StringsConfig in
-     * {@link org.sleuthkit.autopsy.textreaders.textreaderconfigs}
-     *
-     * @param content Content source to read from
-     * @param context Contains extraction configurations for certain file types
-     *
-     * @return Reader containing file text
-     *
-     * @see org.openide.util.Lookup
-     */
-    public static Reader getStringsReader(Content content, Lookup context) {
-        StringsTextExtractor stringsInstance = new StringsTextExtractor(content);
-        stringsInstance.setExtractionSettings(context);
-        return stringsInstance.getReader();
-    }
-
-    /**
-     * System level exception for handling content types that have no specific
-     * strategy defined for extracting their text.
-     */
-    public static class NoTextReaderFound extends Exception {
-
-        public NoTextReaderFound(String msg) {
-            super(msg);
-        }
-
-        public NoTextReaderFound(Throwable ex) {
-            super(ex);
-        }
-
-        private NoTextReaderFound(String msg, Throwable ex) {
-            super(msg, ex);
-        }
-    }
-}
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.logging.Level;
+import org.openide.util.Exceptions;
 import org.openide.util.Lookup;
 import org.openide.util.NbBundle;
 import org.openide.util.NbBundle.Messages;
@ -44,9 +45,10 @@ import org.sleuthkit.autopsy.keywordsearch.TextFileExtractor.TextFileExtractorEx
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
 import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector;
-import org.sleuthkit.autopsy.textreaders.TextReaders;
-import org.sleuthkit.autopsy.textreaders.textreaderconfigs.ImageConfig;
-import org.sleuthkit.autopsy.textreaders.textreaderconfigs.StringsConfig;
+import org.sleuthkit.autopsy.textextractors.TextExtractor;
+import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
+import org.sleuthkit.autopsy.textextractors.textextractorconfigs.ImageConfig;
+import org.sleuthkit.autopsy.textextractors.textextractorconfigs.StringsConfig;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.TskData;
 import org.sleuthkit.datamodel.TskData.FileKnown;
@ -480,10 +482,11 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
            Lookup extractionContext = Lookups.fixed(imageConfig);
            
            try {
-                Reader specializedReader = TextReaders.getReader(aFile,extractionContext);
+                TextExtractor extractor = TextExtractorFactory.getExtractor(aFile,extractionContext);
+                Reader extractedTextReader = extractor.getReader();
                //divide into chunks and index
-                return Ingester.getDefault().indexText(specializedReader,aFile.getId(),aFile.getName(), aFile, context);
-            } catch (TextReaders.NoTextReaderFound ex) {
+                return Ingester.getDefault().indexText(extractedTextReader,aFile.getId(),aFile.getName(), aFile, context);
+            } catch (TextExtractorFactory.NoTextExtractorFound | TextExtractor.ExtractionException ex) {
                //No text extractor found... run the default instead
                return false;
            }
@ -502,8 +505,9 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
                if (context.fileIngestIsCancelled()) {
                    return true;
                }
-                Reader stringsReader = TextReaders.getStringsReader(aFile, stringsExtractionContext);
-                if (Ingester.getDefault().indexText(stringsReader,aFile.getId(),aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {
+                TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(aFile, stringsExtractionContext);
+                Reader extractedTextReader = stringsExtractor.getReader();
+                if (Ingester.getDefault().indexText(extractedTextReader,aFile.getId(),aFile.getName(), aFile, KeywordSearchIngestModule.this.context)) {
                    putIngestStatus(jobId, aFile.getId(), IngestStatus.STRINGS_INGESTED);
                    return true;
                } else {
@ -511,7 +515,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
                    putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
                    return false;
                }
-            } catch (IngesterException ex) {
+            } catch (IngesterException | TextExtractor.ExtractionException ex) {
                logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);  //NON-NLS
                putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_INDEXING);
                return false;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SolrSearchService.java
@ -46,7 +46,8 @@ import org.sleuthkit.autopsy.appservices.AutopsyService;
 import org.sleuthkit.autopsy.progress.ProgressIndicator;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchService;
 import org.sleuthkit.autopsy.keywordsearchservice.KeywordSearchServiceException;
-import org.sleuthkit.autopsy.textreaders.TextReaders;
+import org.sleuthkit.autopsy.textextractors.TextExtractor;
+import org.sleuthkit.autopsy.textextractors.TextExtractorFactory;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.TskCoreException;
@ -114,22 +115,26 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {
                return;
            }
            try {
-                Reader blackboardReader = TextReaders.getReader(content, null);
+                TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor(content, null);
+                Reader blackboardExtractedTextReader = blackboardExtractor.getReader();
                String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
                ingester.indexMetaDataOnly(artifact, sourceName);
-                ingester.indexText(blackboardReader, artifact.getArtifactID(), sourceName, content, null);
-            } catch (Ingester.IngesterException | TextReaders.NoTextReaderFound ex) {
+                ingester.indexText(blackboardExtractedTextReader, artifact.getArtifactID(), sourceName, content, null);
+            } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.ExtractionException ex) {
                throw new TskCoreException(ex.getCause().getMessage(), ex);
            }
        } else {
            try {
-                Reader contentReader = TextReaders.getReader(content, null);
-                ingester.indexText(contentReader, content.getId(), content.getName(), content, null);
-            } catch (TextReaders.NoTextReaderFound | Ingester.IngesterException ex) {
+                TextExtractor contentExtractor = TextExtractorFactory.getExtractor(content, null);
+                Reader contentExtractedTextReader = contentExtractor.getReader();
+                ingester.indexText(contentExtractedTextReader, content.getId(), content.getName(), content, null);
+            } catch (TextExtractorFactory.NoTextExtractorFound | Ingester.IngesterException | TextExtractor.ExtractionException ex) {
                try {
                    // Try the StringsTextExtractor if Tika extractions fails.
-                    ingester.indexText(TextReaders.getStringsReader(content, null),content.getId(),content.getName(), content, null);
-                } catch (Ingester.IngesterException ex1) {
+                    TextExtractor stringsExtractor = TextExtractorFactory.getStringsExtractor(content, null);
+                    Reader stringsExtractedTextReader = stringsExtractor.getReader();
+                    ingester.indexText(stringsExtractedTextReader,content.getId(),content.getName(), content, null);
+                } catch (Ingester.IngesterException | TextExtractor.ExtractionException ex1) {
                    throw new TskCoreException(ex.getCause().getMessage(), ex1);
                }
            }
@ -443,10 +448,11 @@ public class SolrSearchService implements KeywordSearchService, AutopsyService {

        try {
            String sourceName = artifact.getDisplayName() + "_" + artifact.getArtifactID();
-            Reader contentSpecificReader = TextReaders.getReader((Content) artifact, null);
+            TextExtractor blackboardExtractor = TextExtractorFactory.getExtractor((Content) artifact, null);
+            Reader blackboardExtractedTextReader = blackboardExtractor.getReader();
            ingester.indexMetaDataOnly(artifact, sourceName);
-            ingester.indexText(contentSpecificReader, artifact.getId(), sourceName, artifact, null);
-        } catch (Ingester.IngesterException | TextReaders.NoTextReaderFound ex) {
+            ingester.indexText(blackboardExtractedTextReader, artifact.getId(), sourceName, artifact, null);
+        } catch (Ingester.IngesterException | TextExtractorFactory.NoTextExtractorFound | TextExtractor.ExtractionException ex) {
            throw new TskCoreException(ex.getCause().getMessage(), ex);
        }
    }