Revert "Optimization and Bug fix - Make Tika use AbstractFile mimetype instead of recomputing"

2025-07-19 19:14:55 +00:00 · 2019-02-13 15:43:04 -05:00 · 2019-02-13 15:43:04 -05:00 · 3c3d92f03c
commit 3c3d92f03c
parent 417659bb84
1 changed files with 14 additions and 26 deletions
--- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
+++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java
@ -50,7 +50,6 @@ import org.apache.tika.parser.ParsingReader;
 import org.apache.tika.parser.microsoft.OfficeParserConfig;
 import org.apache.tika.parser.ocr.TesseractOCRConfig;
 import org.apache.tika.parser.pdf.PDFParserConfig;
 import org.apache.tika.mime.MediaType;
 import org.openide.util.NbBundle;
 import org.openide.modules.InstalledFileLocator;
 import org.openide.util.Lookup;
@ -126,7 +125,7 @@ final class TikaTextExtractor implements TextExtractor {
    private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory);
    private static final String SQLITE_MIMETYPE = "application/x-sqlite3";
-    private final AutoDetectParser parser;
+    private final AutoDetectParser parser = new AutoDetectParser();
    private final Content content;
    private boolean tesseractOCREnabled;
@ -146,23 +145,12 @@ final class TikaTextExtractor implements TextExtractor {
    public TikaTextExtractor(Content content) {
        this.content = content;
        parser = new AutoDetectParser();
        if (content instanceof AbstractFile) {
            AbstractFile file = (AbstractFile) content;
            if (file.getMIMEType() != null && !file.getMIMEType().isEmpty()) {
                //Force Tika to use our pre-computed mime type during detection
                parser.setDetector((InputStream inStream, Metadata metaData)
                        -> MediaType.parse(file.getMIMEType()));
            }
        }
    }
    /**
     * If Tesseract has been installed and is set to be used through
-     * configuration, then ocr is enabled. OCR can only currently be run on 64
+     * configuration, then ocr is enabled. OCR can only currently be run on
-     * bit Windows OS.
+     * 64 bit Windows OS.
     *
     * @return Flag indicating if OCR is set to be used.
     */