Add manifest file to ingestible extensions.

Add a function that abstracts out the ingestible check.
This commit is contained in:
adam-m 2012-04-30 17:35:05 -04:00
parent 96838e4915
commit e60c3d65e2
2 changed files with 27 additions and 14 deletions

View File

@ -49,6 +49,12 @@ class Ingester {
private boolean uncommitedIngests = false;
private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor();
private final Server solrServer = KeywordSearch.getServer();
// TODO: use a more robust method than checking file extension
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
static final String[] ingestibleExtensions = {"tar", "jar", "zip", "gzip", "bzip2",
"gz", "tgz", "odf", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt", "log", "manifest",
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
"pst", "xml", "class", "dwg"};
Ingester() {
}
@ -293,4 +299,23 @@ class Ingester {
super(message);
}
}
/**
* Determine if the fscontent is ingestible/indexable by keyword search
* Note: currently only checks by extension, could be a more robust check.
* @param fsContent
* @return true if it is ingestible, false otherwise
*/
static boolean isIngestible(FsContent fsContent) {
boolean ingestible = false;
final String fileName = fsContent.getName();
for (String ext : ingestibleExtensions) {
if (fileName.toLowerCase().endsWith(ext)) {
ingestible = true;
break;
}
}
return ingestible;
}
}

View File

@ -78,13 +78,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
private final String hashDBServiceName = "Hash Lookup";
private SleuthkitCase caseHandle = null;
boolean initialized = false;
// TODO: use a more robust method than checking file extension to determine
// whether to try a file
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
static final String[] ingestibleExtensions = {"tar", "jar", "zip", "gzip", "bzip2",
"gz", "tgz", "odf", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt", "log",
"bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
"pst", "xml", "class", "dwg"};
public enum IngestStatus {
@ -447,14 +441,8 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
return;
}
boolean ingestible = false;
boolean ingestible = Ingester.isIngestible(fsContent);
final String fileName = fsContent.getName();
for (String ext : ingestibleExtensions) {
if (fileName.toLowerCase().endsWith(ext)) {
ingestible = true;
break;
}
}
String deletedMessage = "";
if ((fsContent.getMeta_flags() & (TskData.TSK_FS_META_FLAG_ENUM.ORPHAN.getMetaFlag() | TskData.TSK_FS_META_FLAG_ENUM.UNALLOC.getMetaFlag())) != 0) {