mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-13 00:16:16 +00:00
Minor adjustments to keyword search: add htm extension as supported by Solr and set content type of extracted text to more compliant
This commit is contained in:
parent
2e26827371
commit
8fbad4fe56
@ -91,7 +91,7 @@ public class FsContentStringStream implements ContentStream {
|
||||
|
||||
@Override
|
||||
public String getContentType() {
|
||||
return encoding.toString();
|
||||
return "text/plain; charset = " + encoding.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -43,13 +43,8 @@ import org.sleuthkit.datamodel.TskData;
|
||||
class GetIngestableFilesContentVisitor extends GetFilesContentVisitor {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(GetIngestableFilesContentVisitor.class.getName());
|
||||
// TODO: use a more robust method than checking file extension to determine
|
||||
// whether to try a file
|
||||
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
|
||||
private static final String[] supportedExtensions = {"tar", "jar", "zip", "bzip2",
|
||||
"gz", "tgz", "doc", "xls", "ppt", "rtf", "pdf", "html", "xhtml", "txt",
|
||||
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
|
||||
"pst", "xml", "class"};
|
||||
|
||||
private static final String[] supportedExtensions = KeywordSearchIngestService.ingestibleExtensions;
|
||||
// the full predicate of a SQLite statement to match supported extensions
|
||||
private static final String extensionsLikePredicate;
|
||||
|
||||
|
@ -73,8 +73,12 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
private volatile int messageID = 0;
|
||||
private volatile boolean finalRun = false;
|
||||
private SleuthkitCase caseHandle = null;
|
||||
private static final String[] ingestibleExtensions = {"tar", "jar", "zip", "bzip2",
|
||||
"gz", "tgz", "doc", "xls", "ppt", "rtf", "pdf", "html", "xhtml", "txt",
|
||||
|
||||
// TODO: use a more robust method than checking file extension to determine
|
||||
// whether to try a file
|
||||
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
|
||||
static final String[] ingestibleExtensions = {"tar", "jar", "zip", "bzip2",
|
||||
"gz", "tgz", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt",
|
||||
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
|
||||
"pst", "xml", "class"};
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user