mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-13 08:26:15 +00:00
Minor adjustments to keyword search: add htm extension as supported by Solr and set content type of extracted text to more compliant
This commit is contained in:
parent
2e26827371
commit
8fbad4fe56
@ -91,7 +91,7 @@ public class FsContentStringStream implements ContentStream {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getContentType() {
|
public String getContentType() {
|
||||||
return encoding.toString();
|
return "text/plain; charset = " + encoding.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -43,13 +43,8 @@ import org.sleuthkit.datamodel.TskData;
|
|||||||
class GetIngestableFilesContentVisitor extends GetFilesContentVisitor {
|
class GetIngestableFilesContentVisitor extends GetFilesContentVisitor {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(GetIngestableFilesContentVisitor.class.getName());
|
private static final Logger logger = Logger.getLogger(GetIngestableFilesContentVisitor.class.getName());
|
||||||
// TODO: use a more robust method than checking file extension to determine
|
|
||||||
// whether to try a file
|
private static final String[] supportedExtensions = KeywordSearchIngestService.ingestibleExtensions;
|
||||||
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
|
|
||||||
private static final String[] supportedExtensions = {"tar", "jar", "zip", "bzip2",
|
|
||||||
"gz", "tgz", "doc", "xls", "ppt", "rtf", "pdf", "html", "xhtml", "txt",
|
|
||||||
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
|
|
||||||
"pst", "xml", "class"};
|
|
||||||
// the full predicate of a SQLite statement to match supported extensions
|
// the full predicate of a SQLite statement to match supported extensions
|
||||||
private static final String extensionsLikePredicate;
|
private static final String extensionsLikePredicate;
|
||||||
|
|
||||||
|
@ -73,8 +73,12 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
private volatile int messageID = 0;
|
private volatile int messageID = 0;
|
||||||
private volatile boolean finalRun = false;
|
private volatile boolean finalRun = false;
|
||||||
private SleuthkitCase caseHandle = null;
|
private SleuthkitCase caseHandle = null;
|
||||||
private static final String[] ingestibleExtensions = {"tar", "jar", "zip", "bzip2",
|
|
||||||
"gz", "tgz", "doc", "xls", "ppt", "rtf", "pdf", "html", "xhtml", "txt",
|
// TODO: use a more robust method than checking file extension to determine
|
||||||
|
// whether to try a file
|
||||||
|
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
|
||||||
|
static final String[] ingestibleExtensions = {"tar", "jar", "zip", "bzip2",
|
||||||
|
"gz", "tgz", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt",
|
||||||
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
|
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
|
||||||
"pst", "xml", "class"};
|
"pst", "xml", "class"};
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user