mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-13 00:16:16 +00:00
fixes for domain rules
This commit is contained in:
parent
7736629de1
commit
1e270cc0e3
@ -22,6 +22,7 @@ import java.io.BufferedReader;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
@ -56,9 +57,13 @@ class DomainTokenizer {
|
|||||||
private static final String JOINER = ".";
|
private static final String JOINER = ".";
|
||||||
// delimiter when used with regex
|
// delimiter when used with regex
|
||||||
private static final String DELIMITER = "\\" + JOINER;
|
private static final String DELIMITER = "\\" + JOINER;
|
||||||
|
|
||||||
|
private static final String WILDCARD = "*";
|
||||||
|
private static final String EXCEPTION_PREFIX = "!";
|
||||||
|
|
||||||
// taken from https://publicsuffix.org/list/public_suffix_list.dat
|
// taken from https://publicsuffix.org/list/public_suffix_list.dat
|
||||||
// file containing line seperated suffixes
|
// file containing line seperated suffixes
|
||||||
|
// rules for parsing can be found here: https://publicsuffix.org/list/
|
||||||
private static final String DOMAIN_LIST = "public_suffix_list.dat";
|
private static final String DOMAIN_LIST = "public_suffix_list.dat";
|
||||||
|
|
||||||
// token for comments
|
// token for comments
|
||||||
@ -89,7 +94,7 @@ class DomainTokenizer {
|
|||||||
*/
|
*/
|
||||||
private static DomainTokenizer load() throws IOException {
|
private static DomainTokenizer load() throws IOException {
|
||||||
try (InputStream is = DomainTokenizer.class.getResourceAsStream(DOMAIN_LIST);
|
try (InputStream is = DomainTokenizer.class.getResourceAsStream(DOMAIN_LIST);
|
||||||
InputStreamReader isReader = new InputStreamReader(is);
|
InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8);
|
||||||
BufferedReader reader = new BufferedReader(isReader)) {
|
BufferedReader reader = new BufferedReader(isReader)) {
|
||||||
|
|
||||||
DomainTokenizer categorizer = new DomainTokenizer();
|
DomainTokenizer categorizer = new DomainTokenizer();
|
||||||
@ -122,7 +127,7 @@ class DomainTokenizer {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] tokens = domainSuffix.split(DELIMITER);
|
String[] tokens = domainSuffix.trim().split(DELIMITER);
|
||||||
|
|
||||||
DomainCategory cat = trie;
|
DomainCategory cat = trie;
|
||||||
for (int i = tokens.length - 1; i >= 0; i--) {
|
for (int i = tokens.length - 1; i >= 0; i--) {
|
||||||
@ -130,7 +135,7 @@ class DomainTokenizer {
|
|||||||
if (StringUtils.isBlank(token)) {
|
if (StringUtils.isBlank(token)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
cat = cat.getOrAddChild(tokens[i]);
|
cat = cat.getOrAddChild(tokens[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -158,10 +163,26 @@ class DomainTokenizer {
|
|||||||
DomainCategory cat = trie;
|
DomainCategory cat = trie;
|
||||||
|
|
||||||
for (; idx >= 0; idx--) {
|
for (; idx >= 0; idx--) {
|
||||||
cat = cat.get(tokens.get(idx));
|
// an exception rule must be at the beginning of a suffix, and, in
|
||||||
if (cat == null) {
|
// practice, indicates a domain that would otherwise be a further
|
||||||
|
// suffix with a wildcard rule per: https://publicsuffix.org/list/
|
||||||
|
if (cat.get(EXCEPTION_PREFIX + tokens.get(idx)) != null) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DomainCategory newCat = cat.get(tokens.get(idx));
|
||||||
|
|
||||||
|
// if no matching token can be found, look for wildcard token
|
||||||
|
if (newCat == null) {
|
||||||
|
// if no wildcard token can be found, the portion found
|
||||||
|
// so far is the suffix.
|
||||||
|
newCat = cat.get(WILDCARD);
|
||||||
|
if (newCat == null) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat = newCat;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if first suffix cannot be found, return the whole domain
|
// if first suffix cannot be found, return the whole domain
|
||||||
|
Loading…
x
Reference in New Issue
Block a user