mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
Use domain validator instead of list of file extensions to reduce email false positives.
This commit is contained in:
parent
da9b27d170
commit
5225a0e47d
@ -32,6 +32,7 @@ import java.util.logging.Level;
|
|||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.commons.validator.routines.DomainValidator;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.SolrQuery.SortClause;
|
import org.apache.solr.client.solrj.SolrQuery.SortClause;
|
||||||
import org.apache.solr.client.solrj.SolrRequest;
|
import org.apache.solr.client.solrj.SolrRequest;
|
||||||
@ -83,7 +84,6 @@ final class RegexQuery implements KeywordSearchQuery {
|
|||||||
private String escapedQuery;
|
private String escapedQuery;
|
||||||
|
|
||||||
private final int MIN_EMAIL_ADDR_LENGTH = 8;
|
private final int MIN_EMAIL_ADDR_LENGTH = 8;
|
||||||
private final Pattern INVALID_EMAIL_PATTERN = Pattern.compile(".*\\.(dll|txt|exe|jpg|xml)$");
|
|
||||||
|
|
||||||
private final ListMultimap<Keyword, KeywordHit> hitsMultiMap = ArrayListMultimap.create();
|
private final ListMultimap<Keyword, KeywordHit> hitsMultiMap = ArrayListMultimap.create();
|
||||||
|
|
||||||
@ -281,9 +281,10 @@ final class RegexQuery implements KeywordSearchQuery {
|
|||||||
|
|
||||||
if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL) {
|
if (originalKeyword.getArtifactAttributeType() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_EMAIL) {
|
||||||
// Reduce false positives by eliminating email address hits that are either
|
// Reduce false positives by eliminating email address hits that are either
|
||||||
// too short or end with well known file externsions.
|
// too short or are not for valid top level domains.
|
||||||
if (hit.length() < MIN_EMAIL_ADDR_LENGTH || INVALID_EMAIL_PATTERN.matcher(hit).matches()) {
|
if (hit.length() < MIN_EMAIL_ADDR_LENGTH
|
||||||
break;
|
|| !DomainValidator.getInstance(true).isValidTld(hit.substring(hit.lastIndexOf('.')))) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user