From 349d4f82be3a9b79dd34559b95086548892e7f6b Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 31 Mar 2021 20:17:38 -0400 Subject: [PATCH 1/3] priority categorizer --- .../DefaultDomainCategorizer.java | 5 +- .../DefaultPriorityDomainCategorizer.java | 104 ++++++++++++++++++ .../recentactivity/DomainCategoryRunner.java | 55 +++++---- 3 files changed, 140 insertions(+), 24 deletions(-) create mode 100644 RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java index 0d64661f6c..d055e132da 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java @@ -53,8 +53,7 @@ import org.sleuthkit.autopsy.url.analytics.DomainCategory; * https://bugs.openjdk.java.net/browse/JDK-8155591, * https://bugs.eclipse.org/bugs/show_bug.cgi?id=350279. */ -@SuppressWarnings("try") -public class DefaultDomainCategorizer implements DomainCategorizer { +class DefaultDomainCategorizer implements DomainCategorizer { private static final String CSV_DELIMITER = ","; private static final String DOMAIN_TYPE_CSV = "default_domain_categories.csv"; //NON-NLS @@ -162,7 +161,7 @@ public class DefaultDomainCategorizer implements DomainCategorizer { } @Override - public void close() throws Exception { + public void close() throws IOException { // clear out the mapping to release resources mapping = null; } diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java new file mode 100644 index 0000000000..9b5714b9a5 --- /dev/null +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java @@ -0,0 +1,104 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2020 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.recentactivity; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.apache.commons.lang.StringUtils; +import org.openide.util.NbBundle.Messages; +import org.sleuthkit.autopsy.url.analytics.DomainCategorizer; +import org.sleuthkit.autopsy.url.analytics.DomainCategorizerException; +import org.sleuthkit.autopsy.url.analytics.DomainCategory; + +/** + * The autopsy provided domain category provider that overrides all domain + * category providers except the custom web domain categorizations. + */ +@Messages({ + "DefaultPriorityDomainCategorizer_searchEngineCategory=Search Engine" +}) +public class DefaultPriorityDomainCategorizer implements DomainCategorizer { + + // taken from https://www.google.com/supported_domains + private static final List GOOGLE_DOMAINS = Arrays.asList("google.com", "google.ad", "google.ae", "google.com.af", "google.com.ag", "google.com.ai", "google.al", "google.am", "google.co.ao", "google.com.ar", "google.as", "google.at", "google.com.au", "google.az", "google.ba", "google.com.bd", "google.be", "google.bf", "google.bg", "google.com.bh", "google.bi", "google.bj", "google.com.bn", "google.com.bo", "google.com.br", "google.bs", "google.bt", "google.co.bw", "google.by", "google.com.bz", "google.ca", "google.cd", "google.cf", "google.cg", "google.ch", "google.ci", "google.co.ck", "google.cl", "google.cm", "google.cn", "google.com.co", "google.co.cr", "google.com.cu", "google.cv", "google.com.cy", "google.cz", "google.de", "google.dj", "google.dk", "google.dm", "google.com.do", "google.dz", "google.com.ec", "google.ee", "google.com.eg", "google.es", "google.com.et", "google.fi", "google.com.fj", "google.fm", "google.fr", "google.ga", "google.ge", "google.gg", "google.com.gh", "google.com.gi", "google.gl", "google.gm", "google.gr", "google.com.gt", "google.gy", "google.com.hk", "google.hn", "google.hr", "google.ht", "google.hu", "google.co.id", "google.ie", "google.co.il", "google.im", "google.co.in", "google.iq", "google.is", "google.it", "google.je", "google.com.jm", "google.jo", "google.co.jp", "google.co.ke", "google.com.kh", "google.ki", "google.kg", "google.co.kr", "google.com.kw", "google.kz", "google.la", "google.com.lb", "google.li", "google.lk", "google.co.ls", "google.lt", "google.lu", "google.lv", "google.com.ly", "google.co.ma", "google.md", "google.me", "google.mg", "google.mk", "google.ml", "google.com.mm", "google.mn", "google.ms", "google.com.mt", "google.mu", "google.mv", "google.mw", "google.com.mx", "google.com.my", "google.co.mz", "google.com.na", "google.com.ng", "google.com.ni", "google.ne", "google.nl", "google.no", "google.com.np", "google.nr", "google.nu", "google.co.nz", "google.com.om", "google.com.pa", "google.com.pe", "google.com.pg", "google.com.ph", "google.com.pk", "google.pl", "google.pn", "google.com.pr", "google.ps", "google.pt", "google.com.py", "google.com.qa", "google.ro", "google.ru", "google.rw", "google.com.sa", "google.com.sb", "google.sc", "google.se", "google.com.sg", "google.sh", "google.si", "google.sk", "google.com.sl", "google.sn", "google.so", "google.sm", "google.sr", "google.st", "google.com.sv", "google.td", "google.tg", "google.co.th", "google.com.tj", "google.tl", "google.tm", "google.tn", "google.to", "google.com.tr", "google.tt", "google.com.tw", "google.co.tz", "google.com.ua", "google.co.ug", "google.co.uk", "google.com.uy", "google.co.uz", "google.com.vc", "google.co.ve", "google.vg", "google.co.vi", "google.com.vn", "google.vu", "google.ws", "google.rs", "google.co.za", "google.co.zm", "google.co.zw", "google.cat"); + + // taken from https://www.yahoo.com/everything/world + private static final List YAHOO_DOMAINS = Arrays.asList("espanol.yahoo.com", "au.yahoo.com", "be.yahoo.com", "fr-be.yahoo.com", "br.yahoo.com", "ca.yahoo.com", "espanol.yahoo.com", "espanol.yahoo.com", "de.yahoo.com", "es.yahoo.com", "espanol.yahoo.com", "fr.yahoo.com", "in.yahoo.com", "id.yahoo.com", "ie.yahoo.com", "it.yahoo.com", "en-maktoob.yahoo.com", "malaysia.yahoo.com", "espanol.yahoo.com", "nz.yahoo.com", "espanol.yahoo.com", "ph.yahoo.com", "qc.yahoo.com", "ro.yahoo.com", "sg.yahoo.com", "za.yahoo.com", "se.yahoo.com", "uk.yahoo.com", "yahoo.com", "espanol.yahoo.com", "vn.yahoo.com", "gr.yahoo.com", "maktoob.yahoo.com", "yahoo.com", "hk.yahoo.com", "tw.yahoo.com", "yahoo.co.jp"); + + private static final List OTHER_SEARCH_ENGINES = Arrays.asList( + "bing.com", + "baidu.com", + "sogou.com", + "soso.com", + "duckduckgo.com", + "swisscows.com", + "gibiru.com", + "cutestat.com", + "youdao.com", + "biglobe.ne.jp", + "givewater.com", + "ekoru.org", + "ecosia.org", + // according to https://en.wikipedia.org/wiki/Yandex + "yandex.ru", + "yandex.com" + ); + + private static final String WWW_PREFIX = "www"; + + private static final Map DOMAIN_LOOKUP + = Stream.of(GOOGLE_DOMAINS, YAHOO_DOMAINS, OTHER_SEARCH_ENGINES) + .flatMap((lst) -> lst.stream()) + .collect(Collectors.toMap((k) -> k, (k) -> Bundle.DefaultPriorityDomainCategorizer_searchEngineCategory(), (v1, v2) -> v1)); + + @Override + public void initialize() throws DomainCategorizerException { + } + + @Override + public DomainCategory getCategory(String domain, String host) throws DomainCategorizerException { + + String hostToUse = StringUtils.isBlank(host) ? domain : host; + + if (StringUtils.isBlank(hostToUse)) { + return null; + } + + List domainWords = Stream.of(hostToUse.toLowerCase().split("\\.")) + .filter(StringUtils::isNotBlank) + .map(String::trim) + .collect(Collectors.toList()); + + String sanitizedDomain = domainWords.stream() + // skip first word segment if 'www' + .skip(domainWords.size() > 0 && WWW_PREFIX.equals(domainWords.get(0)) ? 1 : 0) + .collect(Collectors.joining(".")); + + String category = DOMAIN_LOOKUP.get(sanitizedDomain); + return category == null ? null : new DomainCategory(sanitizedDomain, category); + } + + @Override + public void close() throws IOException { + } +} diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategoryRunner.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategoryRunner.java index 0102f6e868..d24a031a48 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategoryRunner.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategoryRunner.java @@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.recentactivity; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; @@ -453,33 +454,45 @@ class DomainCategoryRunner extends Extract { @Override void configExtractor() throws IngestModule.IngestModuleException { // lookup all providers, filter null providers, and sort providers - Collection lookupList = Lookup.getDefault().lookupAll(DomainCategorizer.class); - if (lookupList == null) { - lookupList = Collections.emptyList(); - } - - List foundProviders = lookupList.stream() - .filter(provider -> provider != null) - .sorted((a, b) -> { - boolean aIsCustom = a.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH); - boolean bIsCustom = b.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH); - if (aIsCustom != bIsCustom) { - // push custom categorizer to top - return -Boolean.compare(aIsCustom, bIsCustom); - } - - return a.getClass().getName().compareToIgnoreCase(b.getClass().getName()); + Collection lookupCollection = Lookup.getDefault().lookupAll(DomainCategorizer.class); + Collection lookupList = (lookupCollection == null) ? + Collections.emptyList() : + lookupCollection; + + // this will be the class instance of the foundProviders + List foundProviders = new ArrayList<>(); + + // find the custom domain categories provider if present and add it first to the list + lookupList.stream() + .filter(categorizer -> categorizer.getClass().getName().contains(CUSTOM_CATEGORIZER_PATH)) + .findFirst() + .ifPresent((provider) -> foundProviders.add(provider)); + + // add the default priority categorizer + foundProviders.add(new DefaultPriorityDomainCategorizer()); + + // add all others except for the custom web domain categorizer, the default priority + // categorizer and the default categorizer + lookupList.stream() + .filter(categorizer -> categorizer != null) + .filter(categorizer -> { + String className = categorizer.getClass().getName(); + return !className.contains(CUSTOM_CATEGORIZER_PATH) && + !className.equals(DefaultPriorityDomainCategorizer.class.getName()) && + !className.equals(DefaultDomainCategorizer.class.getName()); }) - .collect(Collectors.toList()); - - // add the default categorizer last as a last resort + .sorted((a, b) -> a.getClass().getName().compareToIgnoreCase(b.getClass().getName())) + .forEach(foundProviders::add); + + // add the default categorizer last foundProviders.add(new DefaultDomainCategorizer()); - + for (DomainCategorizer provider : foundProviders) { try { provider.initialize(); } catch (DomainCategorizerException ex) { - throw new IngestModule.IngestModuleException("There was an error instantiating the provider: " + provider.getClass().getSimpleName(), ex); + throw new IngestModule.IngestModuleException("There was an error instantiating the provider: " + + provider.getClass().getSimpleName(), ex); } } From 05b9421ee864289facced08cdcb1de23b990f55b Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Thu, 1 Apr 2021 09:08:08 -0400 Subject: [PATCH 2/3] bundle changes --- .../autopsy/recentactivity/Bundle.properties-MERGED | 1 + .../autopsy/recentactivity/DefaultDomainCategorizer.java | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Bundle.properties-MERGED b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Bundle.properties-MERGED index 547b90ca6a..b796a16d26 100755 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Bundle.properties-MERGED +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Bundle.properties-MERGED @@ -16,6 +16,7 @@ DataSourceUsage_FlashDrive=Flash Drive # {0} - OS name DataSourceUsageAnalyzer.customVolume.label=OS Drive ({0}) DataSourceUsageAnalyzer.parentModuleName=Recent Activity +DefaultPriorityDomainCategorizer_searchEngineCategory=Search Engine DomainCategoryRunner_moduleName_text=DomainCategoryRunner DomainCategoryRunner_parentModuleName=Recent Activity DomainCategoryRunner_Progress_Message_Domain_Types=Finding Domain Types diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java index d055e132da..0d64661f6c 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategorizer.java @@ -53,7 +53,8 @@ import org.sleuthkit.autopsy.url.analytics.DomainCategory; * https://bugs.openjdk.java.net/browse/JDK-8155591, * https://bugs.eclipse.org/bugs/show_bug.cgi?id=350279. */ -class DefaultDomainCategorizer implements DomainCategorizer { +@SuppressWarnings("try") +public class DefaultDomainCategorizer implements DomainCategorizer { private static final String CSV_DELIMITER = ","; private static final String DOMAIN_TYPE_CSV = "default_domain_categories.csv"; //NON-NLS @@ -161,7 +162,7 @@ class DefaultDomainCategorizer implements DomainCategorizer { } @Override - public void close() throws IOException { + public void close() throws Exception { // clear out the mapping to release resources mapping = null; } From d6460338f3b31316a373e98ecb56c8b07aca54e5 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Thu, 1 Apr 2021 09:41:56 -0400 Subject: [PATCH 3/3] license update --- .../recentactivity/DefaultPriorityDomainCategorizer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java index 9b5714b9a5..da84660cc2 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultPriorityDomainCategorizer.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2020 Basis Technology Corp. + * Copyright 2021 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License");