diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index fa4841f395..b494e1fbcb 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -82,6 +82,9 @@ final class RegexQuery implements KeywordSearchQuery { private boolean escaped; private String escapedQuery; + private final int MIN_EMAIL_ADDR_LENGTH = 8; + private final Pattern INVALID_EMAIL_PATTERN = Pattern.compile(".*\\.(dll|txt|exe|jpg|xml)$"); + private final ListMultimap hitsMultiMap = ArrayListMultimap.create(); // Lucene regular expressions do not support the following Java predefined @@ -256,6 +259,14 @@ final class RegexQuery implements KeywordSearchQuery { offset = hitMatcher.end(); + if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL) { + // Reduce false positives by eliminating email address hits that are either + // too short or end with well known file externsions. + if (hit.length() < MIN_EMAIL_ADDR_LENGTH || INVALID_EMAIL_PATTERN.matcher(hit).matches()) { + break; + } + } + /* * If searching for credit card account numbers, do a Luhn check * on the term and discard it if it does not pass.