mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 10:17:41 +00:00
priority categorizer
This commit is contained in:
parent
994590ea8e
commit
349d4f82be
@ -53,8 +53,7 @@ import org.sleuthkit.autopsy.url.analytics.DomainCategory;
|
||||
* https://bugs.openjdk.java.net/browse/JDK-8155591,
|
||||
* https://bugs.eclipse.org/bugs/show_bug.cgi?id=350279.
|
||||
*/
|
||||
@SuppressWarnings("try")
|
||||
public class DefaultDomainCategorizer implements DomainCategorizer {
|
||||
class DefaultDomainCategorizer implements DomainCategorizer {
|
||||
|
||||
private static final String CSV_DELIMITER = ",";
|
||||
private static final String DOMAIN_TYPE_CSV = "default_domain_categories.csv"; //NON-NLS
|
||||
@ -162,7 +161,7 @@ public class DefaultDomainCategorizer implements DomainCategorizer {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
public void close() throws IOException {
|
||||
// clear out the mapping to release resources
|
||||
mapping = null;
|
||||
}
|
||||
|
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2020 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.recentactivity;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.openide.util.NbBundle.Messages;
|
||||
import org.sleuthkit.autopsy.url.analytics.DomainCategorizer;
|
||||
import org.sleuthkit.autopsy.url.analytics.DomainCategorizerException;
|
||||
import org.sleuthkit.autopsy.url.analytics.DomainCategory;
|
||||
|
||||
/**
|
||||
* The autopsy provided domain category provider that overrides all domain
|
||||
* category providers except the custom web domain categorizations.
|
||||
*/
|
||||
@Messages({
|
||||
"DefaultPriorityDomainCategorizer_searchEngineCategory=Search Engine"
|
||||
})
|
||||
public class DefaultPriorityDomainCategorizer implements DomainCategorizer {
|
||||
|
||||
// taken from https://www.google.com/supported_domains
|
||||
private static final List<String> GOOGLE_DOMAINS = Arrays.asList("google.com", "google.ad", "google.ae", "google.com.af", "google.com.ag", "google.com.ai", "google.al", "google.am", "google.co.ao", "google.com.ar", "google.as", "google.at", "google.com.au", "google.az", "google.ba", "google.com.bd", "google.be", "google.bf", "google.bg", "google.com.bh", "google.bi", "google.bj", "google.com.bn", "google.com.bo", "google.com.br", "google.bs", "google.bt", "google.co.bw", "google.by", "google.com.bz", "google.ca", "google.cd", "google.cf", "google.cg", "google.ch", "google.ci", "google.co.ck", "google.cl", "google.cm", "google.cn", "google.com.co", "google.co.cr", "google.com.cu", "google.cv", "google.com.cy", "google.cz", "google.de", "google.dj", "google.dk", "google.dm", "google.com.do", "google.dz", "google.com.ec", "google.ee", "google.com.eg", "google.es", "google.com.et", "google.fi", "google.com.fj", "google.fm", "google.fr", "google.ga", "google.ge", "google.gg", "google.com.gh", "google.com.gi", "google.gl", "google.gm", "google.gr", "google.com.gt", "google.gy", "google.com.hk", "google.hn", "google.hr", "google.ht", "google.hu", "google.co.id", "google.ie", "google.co.il", "google.im", "google.co.in", "google.iq", "google.is", "google.it", "google.je", "google.com.jm", "google.jo", "google.co.jp", "google.co.ke", "google.com.kh", "google.ki", "google.kg", "google.co.kr", "google.com.kw", "google.kz", "google.la", "google.com.lb", "google.li", "google.lk", "google.co.ls", "google.lt", "google.lu", "google.lv", "google.com.ly", "google.co.ma", "google.md", "google.me", "google.mg", "google.mk", "google.ml", "google.com.mm", "google.mn", "google.ms", "google.com.mt", "google.mu", "google.mv", "google.mw", "google.com.mx", "google.com.my", "google.co.mz", "google.com.na", "google.com.ng", "google.com.ni", "google.ne", "google.nl", "google.no", "google.com.np", "google.nr", "google.nu", "google.co.nz", "google.com.om", "google.com.pa", "google.com.pe", "google.com.pg", "google.com.ph", "google.com.pk", "google.pl", "google.pn", "google.com.pr", "google.ps", "google.pt", "google.com.py", "google.com.qa", "google.ro", "google.ru", "google.rw", "google.com.sa", "google.com.sb", "google.sc", "google.se", "google.com.sg", "google.sh", "google.si", "google.sk", "google.com.sl", "google.sn", "google.so", "google.sm", "google.sr", "google.st", "google.com.sv", "google.td", "google.tg", "google.co.th", "google.com.tj", "google.tl", "google.tm", "google.tn", "google.to", "google.com.tr", "google.tt", "google.com.tw", "google.co.tz", "google.com.ua", "google.co.ug", "google.co.uk", "google.com.uy", "google.co.uz", "google.com.vc", "google.co.ve", "google.vg", "google.co.vi", "google.com.vn", "google.vu", "google.ws", "google.rs", "google.co.za", "google.co.zm", "google.co.zw", "google.cat");
|
||||
|
||||
// taken from https://www.yahoo.com/everything/world
|
||||
private static final List<String> YAHOO_DOMAINS = Arrays.asList("espanol.yahoo.com", "au.yahoo.com", "be.yahoo.com", "fr-be.yahoo.com", "br.yahoo.com", "ca.yahoo.com", "espanol.yahoo.com", "espanol.yahoo.com", "de.yahoo.com", "es.yahoo.com", "espanol.yahoo.com", "fr.yahoo.com", "in.yahoo.com", "id.yahoo.com", "ie.yahoo.com", "it.yahoo.com", "en-maktoob.yahoo.com", "malaysia.yahoo.com", "espanol.yahoo.com", "nz.yahoo.com", "espanol.yahoo.com", "ph.yahoo.com", "qc.yahoo.com", "ro.yahoo.com", "sg.yahoo.com", "za.yahoo.com", "se.yahoo.com", "uk.yahoo.com", "yahoo.com", "espanol.yahoo.com", "vn.yahoo.com", "gr.yahoo.com", "maktoob.yahoo.com", "yahoo.com", "hk.yahoo.com", "tw.yahoo.com", "yahoo.co.jp");
|
||||
|
||||
private static final List<String> OTHER_SEARCH_ENGINES = Arrays.asList(
|
||||
"bing.com",
|
||||
"baidu.com",
|
||||
"sogou.com",
|
||||
"soso.com",
|
||||
"duckduckgo.com",
|
||||
"swisscows.com",
|
||||
"gibiru.com",
|
||||
"cutestat.com",
|
||||
"youdao.com",
|
||||
"biglobe.ne.jp",
|
||||
"givewater.com",
|
||||
"ekoru.org",
|
||||
"ecosia.org",
|
||||
// according to https://en.wikipedia.org/wiki/Yandex
|
||||
"yandex.ru",
|
||||
"yandex.com"
|
||||
);
|
||||
|
||||
private static final String WWW_PREFIX = "www";
|
||||
|
||||
private static final Map<String, String> DOMAIN_LOOKUP
|
||||
= Stream.of(GOOGLE_DOMAINS, YAHOO_DOMAINS, OTHER_SEARCH_ENGINES)
|
||||
.flatMap((lst) -> lst.stream())
|
||||
.collect(Collectors.toMap((k) -> k, (k) -> Bundle.DefaultPriorityDomainCategorizer_searchEngineCategory(), (v1, v2) -> v1));
|
||||
|
||||
@Override
|
||||
public void initialize() throws DomainCategorizerException {
|
||||
}
|
||||
|
||||
@Override
|
||||
public DomainCategory getCategory(String domain, String host) throws DomainCategorizerException {
|
||||
|
||||
String hostToUse = StringUtils.isBlank(host) ? domain : host;
|
||||
|
||||
if (StringUtils.isBlank(hostToUse)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<String> domainWords = Stream.of(hostToUse.toLowerCase().split("\\."))
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.map(String::trim)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
String sanitizedDomain = domainWords.stream()
|
||||
// skip first word segment if 'www'
|
||||
.skip(domainWords.size() > 0 && WWW_PREFIX.equals(domainWords.get(0)) ? 1 : 0)
|
||||
.collect(Collectors.joining("."));
|
||||
|
||||
String category = DOMAIN_LOOKUP.get(sanitizedDomain);
|
||||
return category == null ? null : new DomainCategory(sanitizedDomain, category);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
}
|
||||
}
|
@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.recentactivity;
|
||||
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
@ -453,33 +454,45 @@ class DomainCategoryRunner extends Extract {
|
||||
@Override
|
||||
void configExtractor() throws IngestModule.IngestModuleException {
|
||||
// lookup all providers, filter null providers, and sort providers
|
||||
Collection<? extends DomainCategorizer> lookupList = Lookup.getDefault().lookupAll(DomainCategorizer.class);
|
||||
if (lookupList == null) {
|
||||
lookupList = Collections.emptyList();
|
||||
}
|
||||
|
||||
List<DomainCategorizer> foundProviders = lookupList.stream()
|
||||
.filter(provider -> provider != null)
|
||||
.sorted((a, b) -> {
|
||||
boolean aIsCustom = a.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH);
|
||||
boolean bIsCustom = b.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH);
|
||||
if (aIsCustom != bIsCustom) {
|
||||
// push custom categorizer to top
|
||||
return -Boolean.compare(aIsCustom, bIsCustom);
|
||||
}
|
||||
|
||||
return a.getClass().getName().compareToIgnoreCase(b.getClass().getName());
|
||||
Collection<? extends DomainCategorizer> lookupCollection = Lookup.getDefault().lookupAll(DomainCategorizer.class);
|
||||
Collection<? extends DomainCategorizer> lookupList = (lookupCollection == null) ?
|
||||
Collections.emptyList() :
|
||||
lookupCollection;
|
||||
|
||||
// this will be the class instance of the foundProviders
|
||||
List<DomainCategorizer> foundProviders = new ArrayList<>();
|
||||
|
||||
// find the custom domain categories provider if present and add it first to the list
|
||||
lookupList.stream()
|
||||
.filter(categorizer -> categorizer.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH))
|
||||
.findFirst()
|
||||
.ifPresent((provider) -> foundProviders.add(provider));
|
||||
|
||||
// add the default priority categorizer
|
||||
foundProviders.add(new DefaultPriorityDomainCategorizer());
|
||||
|
||||
// add all others except for the custom web domain categorizer, the default priority
|
||||
// categorizer and the default categorizer
|
||||
lookupList.stream()
|
||||
.filter(categorizer -> categorizer != null)
|
||||
.filter(categorizer -> {
|
||||
String className = categorizer.getClass().getName();
|
||||
return !className.contains(CUSTOM_CATEGORIZER_PATH) &&
|
||||
!className.equals(DefaultPriorityDomainCategorizer.class.getName()) &&
|
||||
!className.equals(DefaultDomainCategorizer.class.getName());
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// add the default categorizer last as a last resort
|
||||
.sorted((a, b) -> a.getClass().getName().compareToIgnoreCase(b.getClass().getName()))
|
||||
.forEach(foundProviders::add);
|
||||
|
||||
// add the default categorizer last
|
||||
foundProviders.add(new DefaultDomainCategorizer());
|
||||
|
||||
|
||||
for (DomainCategorizer provider : foundProviders) {
|
||||
try {
|
||||
provider.initialize();
|
||||
} catch (DomainCategorizerException ex) {
|
||||
throw new IngestModule.IngestModuleException("There was an error instantiating the provider: " + provider.getClass().getSimpleName(), ex);
|
||||
throw new IngestModule.IngestModuleException("There was an error instantiating the provider: " +
|
||||
provider.getClass().getSimpleName(), ex);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user