From 5d46793d45572d81d07ee0c755f7ac6f3b42a27f Mon Sep 17 00:00:00 2001
From: Ethan Roseman <eroseman@basistech.com>
Date: Wed, 14 Aug 2019 17:32:35 -0400
Subject: [PATCH] cleanup

---
 .../KeywordSearchIngestModule.java              | 17 ++++++++---------
 .../keywordsearch/TextFileExtractor.java        | 11 ++++++++---
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
index 119deb8a71..131145943d 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
@@ -628,6 +628,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
 
             boolean wasTextAdded = false;
 
+            Charset decodetectCharset = null;
             //extract text with one of the extractors, divide into chunks and index with Solr
             try {
                 //logger.log(Level.INFO, "indexing: " + aFile.getName());
@@ -638,12 +639,10 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
                     extractStringsAndIndex(aFile);
                     return;
                 }
-                if (fileType.equals(MimeTypes.PLAIN_TEXT)) {
-                    Charset detectedCharset = TextExtractor.getDecodetectCharset(aFile);
-                    if (detectedCharset != null) {
-                        indexTextFile(aFile);
-                        return;
-                    }
+                decodetectCharset = TextExtractor.getDecodetectCharset(aFile);
+                if (fileType.equals(MimeTypes.PLAIN_TEXT) && decodetectCharset != null) {
+                    indexTextFile(aFile, decodetectCharset);
+                    return;
                 }
                 if (!extractTextAndIndex(aFile)) {
                     // Text extractor not found for file. Extract string only.
@@ -666,7 +665,7 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
             if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
                 //Carved Files should be the only type of unallocated files capable of a txt extension and 
                 //should be ignored by the TextFileExtractor because they may contain more than one text encoding
-                wasTextAdded = indexTextFile(aFile);
+                wasTextAdded = indexTextFile(aFile, decodetectCharset);
             }
 
             // if it wasn't supported or had an error, default to strings
@@ -675,9 +674,9 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
             }
         }
 
-        private boolean indexTextFile(AbstractFile aFile) {
+        private boolean indexTextFile(AbstractFile aFile, Charset detectedCharset) {
             try {
-                TextFileExtractor textFileExtractor = new TextFileExtractor();
+                TextFileExtractor textFileExtractor = new TextFileExtractor(detectedCharset);
                 Reader textReader = textFileExtractor.getReader(aFile);
                 if (textReader == null) {
                     logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextFileExtractor.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextFileExtractor.java
index 1ea5e4d3ed..139645dab7 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextFileExtractor.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TextFileExtractor.java
@@ -42,12 +42,17 @@ final class TextFileExtractor {
     //files while hopefully working on all files with a valid text encoding
     static final private int MIN_MATCH_CONFIDENCE = 20;
 
+    private final Charset detectedCharset;
+
+    TextFileExtractor(Charset detectedCharset) {
+        this.detectedCharset = detectedCharset;
+    }
+
     public Reader getReader(AbstractFile source) throws TextFileExtractorException {
         String mimeType = source.getMIMEType();
         if (mimeType.equals(MimeTypes.PLAIN_TEXT)) {
-            Charset decodetectCharset = TextExtractor.getDecodetectCharset(source);
-            if (decodetectCharset != null) {
-                return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(source)), decodetectCharset);
+            if (detectedCharset != null) {
+                return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(source)), detectedCharset);
             }
         }