mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-19 19:14:55 +00:00
Revert "Optimization and Bug fix - Make Tika use AbstractFile mimetype instead of recomputing"
This commit is contained in:
parent
417659bb84
commit
3c3d92f03c
@ -50,7 +50,6 @@ import org.apache.tika.parser.ParsingReader;
|
|||||||
import org.apache.tika.parser.microsoft.OfficeParserConfig;
|
import org.apache.tika.parser.microsoft.OfficeParserConfig;
|
||||||
import org.apache.tika.parser.ocr.TesseractOCRConfig;
|
import org.apache.tika.parser.ocr.TesseractOCRConfig;
|
||||||
import org.apache.tika.parser.pdf.PDFParserConfig;
|
import org.apache.tika.parser.pdf.PDFParserConfig;
|
||||||
import org.apache.tika.mime.MediaType;
|
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
import org.openide.modules.InstalledFileLocator;
|
import org.openide.modules.InstalledFileLocator;
|
||||||
import org.openide.util.Lookup;
|
import org.openide.util.Lookup;
|
||||||
@ -126,7 +125,7 @@ final class TikaTextExtractor implements TextExtractor {
|
|||||||
private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory);
|
private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory);
|
||||||
private static final String SQLITE_MIMETYPE = "application/x-sqlite3";
|
private static final String SQLITE_MIMETYPE = "application/x-sqlite3";
|
||||||
|
|
||||||
private final AutoDetectParser parser;
|
private final AutoDetectParser parser = new AutoDetectParser();
|
||||||
private final Content content;
|
private final Content content;
|
||||||
|
|
||||||
private boolean tesseractOCREnabled;
|
private boolean tesseractOCREnabled;
|
||||||
@ -146,23 +145,12 @@ final class TikaTextExtractor implements TextExtractor {
|
|||||||
|
|
||||||
public TikaTextExtractor(Content content) {
|
public TikaTextExtractor(Content content) {
|
||||||
this.content = content;
|
this.content = content;
|
||||||
|
|
||||||
parser = new AutoDetectParser();
|
|
||||||
|
|
||||||
if (content instanceof AbstractFile) {
|
|
||||||
AbstractFile file = (AbstractFile) content;
|
|
||||||
if (file.getMIMEType() != null && !file.getMIMEType().isEmpty()) {
|
|
||||||
//Force Tika to use our pre-computed mime type during detection
|
|
||||||
parser.setDetector((InputStream inStream, Metadata metaData)
|
|
||||||
-> MediaType.parse(file.getMIMEType()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If Tesseract has been installed and is set to be used through
|
* If Tesseract has been installed and is set to be used through
|
||||||
* configuration, then ocr is enabled. OCR can only currently be run on 64
|
* configuration, then ocr is enabled. OCR can only currently be run on
|
||||||
* bit Windows OS.
|
* 64 bit Windows OS.
|
||||||
*
|
*
|
||||||
* @return Flag indicating if OCR is set to be used.
|
* @return Flag indicating if OCR is set to be used.
|
||||||
*/
|
*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user