diff --git a/Core/src/org/sleuthkit/autopsy/url/analytics/DefaultDomainCategoryResult.java b/Core/src/org/sleuthkit/autopsy/url/analytics/DefaultDomainCategoryResult.java index e0792cd375..9a1663482b 100644 --- a/Core/src/org/sleuthkit/autopsy/url/analytics/DefaultDomainCategoryResult.java +++ b/Core/src/org/sleuthkit/autopsy/url/analytics/DefaultDomainCategoryResult.java @@ -28,30 +28,16 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult { private final String hostSuffix; private final String category; - private final boolean morePrefixes; /** - * Default constructor assuming default for hasMorePrefixes of true. + * Default constructor. * @param hostSuffix The portion of the suffix from the host or domain that was a * match (i.e. 'mail.google.com' or 'hotmail.com'). * @param category The category (i.e. 'Web Email'). */ public DefaultDomainCategoryResult(String hostSuffix, String category) { - this(hostSuffix, category, true); - } - - /** - * Main constructor. - * @param hostSuffix The portion of the suffix from the host or domain that was a - * match (i.e. 'mail.google.com' or 'hotmail.com'). - * @param category The category (i.e. 'Web Email'). - * @param morePrefixes In the event that there would be different matches for additional - * prefixes, this can be true. - */ - public DefaultDomainCategoryResult(String hostSuffix, String category, boolean morePrefixes) { this.hostSuffix = hostSuffix; this.category = category; - this.morePrefixes = morePrefixes; } @Override @@ -63,10 +49,4 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult { public String getCategory() { return category; } - - @Override - public boolean hasMorePrefixes() { - return morePrefixes; - } - } diff --git a/Core/src/org/sleuthkit/autopsy/url/analytics/DomainCategoryResult.java b/Core/src/org/sleuthkit/autopsy/url/analytics/DomainCategoryResult.java index c52780a587..269efa9852 100644 --- a/Core/src/org/sleuthkit/autopsy/url/analytics/DomainCategoryResult.java +++ b/Core/src/org/sleuthkit/autopsy/url/analytics/DomainCategoryResult.java @@ -35,16 +35,4 @@ public interface DomainCategoryResult { * @return The category (i.e. 'Web Email'). */ String getCategory(); - - /** - * @return In the event that there would be different matches for additional - * prefixes, this can return true. For instance, if there was an entry for - * 'mail.google.com' and 'chatenabled.mail.google.com', a search for - * 'mail.google.com' would return the host suffix: 'mail.google.com' and - * 'true' for hasMorePrefixes since an additional category could be added - * for the 'chatenabled' prefix. - */ - default boolean hasMorePrefixes() { - return true; - } } diff --git a/Core/src/org/sleuthkit/autopsy/url/analytics/DomainSuffixTrie.java b/Core/src/org/sleuthkit/autopsy/url/analytics/DomainSuffixTrie.java deleted file mode 100644 index 47633df46b..0000000000 --- a/Core/src/org/sleuthkit/autopsy/url/analytics/DomainSuffixTrie.java +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2020 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.url.analytics; - -import com.google.common.annotations.Beta; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import org.apache.commons.lang.StringUtils; -import org.sleuthkit.autopsy.url.analytics.Trie.TrieResult; - -@Beta -public class DomainSuffixTrie { - - private static Iterable getSuffixIter(String host) { - // parse the tokens splitting on delimiter - List tokens = Stream.of(host.toLowerCase().split(DELIMITER)) - .filter(StringUtils::isNotBlank) - .collect(Collectors.toList()); - - Collections.reverse(tokens); - return tokens; - } - - //private void Node get - // Character for joining domain segments. - private static final String JOINER = "."; - // delimiter when used with regex for domains - private static final String DELIMITER = "\\" + JOINER; - - private final Trie trie = new Trie<>(); - - /** - * - * @param suffix - * @param leaf - */ - public void add(String suffix, String leaf) { - this.trie.add(getSuffixIter(suffix), leaf); - } - - /** - * Determines if the host is a known type of host. If so, returns the - * portion of the host suffix that signifies the domain type (i.e. - * "hotmail.com" or "mail.google.com") and the domain type. Also returned in - * the DomainCategoryResult is whether or not any children of the found node - * in the trie and consequently, whether or not - * - * @param host The host. - * @return The DomainCategoryResult if a portion of the suffix was found - * - * - * A pair of the host suffix and domain type for that suffix if - * found. Otherwise, returns null. - */ - public DomainCategoryResult findHostCategory(String host) { - // if no host, return none. - if (StringUtils.isBlank(host)) { - return null; - } - - TrieResult result = this.trie.getDeepest(getSuffixIter(host)); - List keys = new ArrayList<>(result.getKeys()); - Collections.reverse(keys); - String suffix = String.join(JOINER, keys); - return new DefaultDomainCategoryResult(suffix, result.getValue(), result.hasChildren()); - } -} diff --git a/Core/src/org/sleuthkit/autopsy/url/analytics/Trie.java b/Core/src/org/sleuthkit/autopsy/url/analytics/Trie.java deleted file mode 100644 index af7f9307b2..0000000000 --- a/Core/src/org/sleuthkit/autopsy/url/analytics/Trie.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package org.sleuthkit.autopsy.url.analytics; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.commons.collections4.MapUtils; - -class Trie { - - private class Node { - - private final Map> children = new HashMap<>(); - private V leafValue = null; - - Node getOrAddChild(K childKey) { - Node child = children.get(childKey); - if (child == null) { - child = new Node(); - children.put(childKey, child); - } - - return child; - } - - Node getChild(K childKey) { - return children.get(childKey); - } - - V getLeafValue() { - return leafValue; - } - - void setLeafValue(V leafValue) { - this.leafValue = leafValue; - } - - } - - static class TrieResult { - - private final V value; - private final List keys; - private final boolean hasChildren; - - TrieResult(V value, List keys, boolean hasChildren) { - this.value = value; - this.keys = keys; - this.hasChildren = hasChildren; - } - - V getValue() { - return value; - } - - List getKeys() { - return keys; - } - - boolean hasChildren() { - return hasChildren; - } - } - - - private Node root = new Node<>(); - - void add(Iterable keyTokens, V leafValue) { - Node node = root; - for (K key : keyTokens) { - node = node.getOrAddChild(key); - } - - node.setLeafValue(leafValue); - } - - V getExact(Iterable keys) { - Node node = root; - for (K key : keys) { - node = node.getChild(key); - if (node == null) { - return null; - } - } - - return node.getLeafValue(); - } - - TrieResult getDeepest(Iterable keys) { - Node node = root; - List visited = new ArrayList<>(); - TrieResult bestMatch = null; - for (K key : keys) { - if (node == null) { - break; - } - - if (node.getLeafValue() != null) { - bestMatch = new TrieResult(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children)); - } - - node = node.getChild(key); - visited.add(key); - } - - return bestMatch; - } - - TrieResult getFirst(Iterable keys) { - Node node = root; - List visited = new ArrayList<>(); - for (K key : keys) { - if (node == null) { - break; - } - - if (node.getLeafValue() != null) { - return new TrieResult(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children)); - } - - node = node.getChild(key); - visited.add(key); - } - - return null; - } -} diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategoryProvider.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategoryProvider.java index cea846dd8b..6fcd9300b3 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategoryProvider.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DefaultDomainCategoryProvider.java @@ -18,17 +18,21 @@ */ package org.sleuthkit.autopsy.recentactivity; -import org.sleuthkit.autopsy.url.analytics.DomainSuffixTrie; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.logging.Level; import org.apache.commons.lang.StringUtils; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.ingest.IngestModule; import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException; +import org.sleuthkit.autopsy.url.analytics.DefaultDomainCategoryResult; import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider; import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult; @@ -44,38 +48,38 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider { private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName()); /** - * Loads the trie of suffixes from the csv resource file. + * Loads the domain suffixes from the csv resource file. * - * @return The root trie node. + * @return The mapping. * @throws IOException */ - private static DomainSuffixTrie loadTrie() throws IOException { + private static Map loadMapping() throws IOException { try (InputStream is = DomainCategorizer.class.getResourceAsStream(DOMAIN_TYPE_CSV); InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8); BufferedReader reader = new BufferedReader(isReader)) { - DomainSuffixTrie trie = new DomainSuffixTrie(); + Map mapping = new HashMap<>(); int lineNum = 1; while (reader.ready()) { String line = reader.readLine(); if (!StringUtils.isBlank(line)) { - addItem(trie, line.trim(), lineNum); + addItem(mapping, line.trim(), lineNum); lineNum++; } } - return trie; + return mapping; } } /** - * Adds a trie node based on the csv line. + * Adds a mapping based on the csv line. * - * @param trie The root trie node. + * @param mapping The suffix to category mapping. * @param line The line to be parsed. * @param lineNumber The line number of this csv line. */ - private static void addItem(DomainSuffixTrie trie, String line, int lineNumber) { + private static void addItem(Map mapping, String line, int lineNumber) { // make sure this isn't a blank line. if (StringUtils.isBlank(line)) { return; @@ -102,17 +106,17 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider { return; } - trie.add(hostSuffix, domainTypeStr); + mapping.put(hostSuffix, domainTypeStr); } - // the root node for the trie containing suffixes for domain categories. - private DomainSuffixTrie trie = null; + // the host suffix to category mapping. + private Map mapping = null; @Override public void initialize() throws IngestModuleException { - if (this.trie == null) { + if (this.mapping == null) { try { - this.trie = loadTrie(); + this.mapping = loadMapping(); } catch (IOException ex) { throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex); } @@ -121,6 +125,21 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider { @Override public DomainCategoryResult getCategory(String domain, String host) { - return trie.findHostCategory(host); + String hostToUse = StringUtils.isBlank(host) ? domain : host; + + if (StringUtils.isBlank(hostToUse)) { + return null; + } + + List tokens = Arrays.asList(hostToUse.split("\\.")); + for (int i = 0; i < tokens.size(); i++) { + String searchString = String.join(".", tokens.subList(i, tokens.size())); + String category = mapping.get(searchString); + if (StringUtils.isNotBlank(category)) { + return new DefaultDomainCategoryResult(searchString, category); + } + } + + return null; } } diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategorizer.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategorizer.java index c4857c7c6d..8413ca257d 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategorizer.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/DomainCategorizer.java @@ -122,8 +122,7 @@ class DomainCategorizer extends Extract { return host; } - - + private DomainCategoryResult findCategory(String domain, String host) { List safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders; for (DomainCategoryProvider provider : safeProviders) { @@ -132,7 +131,7 @@ class DomainCategorizer extends Extract { return result; } } - + return null; } @@ -177,16 +176,16 @@ class DomainCategorizer extends Extract { // atempt to get the host from the url provided. String host = getHost(urlString); - + // get the url string from the artifact BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN)); String domainString = domainAttr.getValueString(); - - // make sure we have at least one of host or domain - if (StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) { + + // make sure we have at least one of host or domain, and the host hasn't been seen before + if ((StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) || (domainSuffixesSeen.contains(host))) { continue; } - + // if we reached this point, we are at least analyzing this item artifactsAnalyzed++; @@ -266,9 +265,9 @@ class DomainCategorizer extends Extract { provider.initialize(); } - this.domainProviders = foundProviders == null ? - Collections.emptyList() : - foundProviders; + this.domainProviders = foundProviders == null + ? Collections.emptyList() + : foundProviders; } @Override