diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index 01973a5c35..4cc7b82721 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -49,6 +49,12 @@ class Ingester { private boolean uncommitedIngests = false; private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor(); private final Server solrServer = KeywordSearch.getServer(); + // TODO: use a more robust method than checking file extension + // supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika + static final String[] ingestibleExtensions = {"tar", "jar", "zip", "gzip", "bzip2", + "gz", "tgz", "odf", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt", "log", "manifest", + "bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav", + "pst", "xml", "class", "dwg"}; Ingester() { } @@ -293,4 +299,23 @@ class Ingester { super(message); } } + + + /** + * Determine if the fscontent is ingestible/indexable by keyword search + * Note: currently only checks by extension, could be a more robust check. + * @param fsContent + * @return true if it is ingestible, false otherwise + */ + static boolean isIngestible(FsContent fsContent) { + boolean ingestible = false; + final String fileName = fsContent.getName(); + for (String ext : ingestibleExtensions) { + if (fileName.toLowerCase().endsWith(ext)) { + ingestible = true; + break; + } + } + return ingestible; + } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java index 8b6ce940d1..b67232d7ba 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java @@ -78,13 +78,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent private final String hashDBServiceName = "Hash Lookup"; private SleuthkitCase caseHandle = null; boolean initialized = false; - // TODO: use a more robust method than checking file extension to determine - // whether to try a file - // supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika - static final String[] ingestibleExtensions = {"tar", "jar", "zip", "gzip", "bzip2", - "gz", "tgz", "odf", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt", "log", - "bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav", - "pst", "xml", "class", "dwg"}; + public enum IngestStatus { @@ -447,14 +441,8 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent return; } - boolean ingestible = false; + boolean ingestible = Ingester.isIngestible(fsContent); final String fileName = fsContent.getName(); - for (String ext : ingestibleExtensions) { - if (fileName.toLowerCase().endsWith(ext)) { - ingestible = true; - break; - } - } String deletedMessage = ""; if ((fsContent.getMeta_flags() & (TskData.TSK_FS_META_FLAG_ENUM.ORPHAN.getMetaFlag() | TskData.TSK_FS_META_FLAG_ENUM.UNALLOC.getMetaFlag())) != 0) {