mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
Add tika timeout to protect against tika spinning bugs
Uses single thread thread pool to minimize impact on performance/memory
This commit is contained in:
parent
a57d441a36
commit
29893c5dae
@ -27,6 +27,9 @@ import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
|
||||
@ -35,6 +38,7 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
import org.apache.tika.Tika;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.sleuthkit.autopsy.coreutils.StringExtract;
|
||||
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
|
||||
|
||||
/**
|
||||
* Extractor of text from TIKA supported AbstractFile content. Extracted text is
|
||||
@ -107,7 +111,8 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
||||
final InputStream stream = new ReadContentInputStream(sourceFile);
|
||||
try {
|
||||
Metadata meta = new Metadata();
|
||||
/* Tika parse request with timeout -- disabled for now
|
||||
//Tika parse request with timeout
|
||||
final Tika tika = new Tika(); //new tika instance for every file, to workaround tika memory issues
|
||||
ParseRequestTask parseTask = new ParseRequestTask(tika, stream, meta, sourceFile);
|
||||
final Future<?> future = tikaParseExecutor.submit(parseTask);
|
||||
try {
|
||||
@ -116,25 +121,23 @@ public class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
||||
final String msg = "Tika parse timeout for content: " + sourceFile.getId() + ", " + sourceFile.getName();
|
||||
logger.log(Level.WARNING, msg);
|
||||
throw new IngesterException(msg);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
} catch (Exception ex) {
|
||||
final String msg = "Unexpected exception from Tika parse task execution for file: " + sourceFile.getId() + ", " + sourceFile.getName();
|
||||
logger.log(Level.WARNING, msg, ex);
|
||||
throw new IngesterException(msg);
|
||||
}
|
||||
|
||||
reader = parseTask.getReader();
|
||||
*/
|
||||
|
||||
/*
|
||||
try {
|
||||
//Use new Tika instance for every file
|
||||
//it does seem to protect against memory errors in Tika
|
||||
//in contrast when reusing the same instance for many files
|
||||
//new tika instance for every file, to workaround tika memory issues
|
||||
Tika tika = new Tika();
|
||||
reader = tika.parse(stream, meta);
|
||||
} catch (IOException ex) {
|
||||
logger.log(Level.WARNING, "Unable to Tika parse the content" + sourceFile.getId() + ": " + sourceFile.getName(), ex);
|
||||
reader = null;
|
||||
}
|
||||
}*/
|
||||
|
||||
if (reader == null) {
|
||||
//likely due to exception in parse()
|
||||
|
Loading…
x
Reference in New Issue
Block a user