working through domain to sqlite

This commit is contained in:
Greg DiCristofaro 2020-12-08 16:23:32 -05:00
parent fd5d759d2a
commit 6793bd7dc9
6 changed files with 46 additions and 278 deletions

View File

@ -28,30 +28,16 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
private final String hostSuffix;
private final String category;
private final boolean morePrefixes;
/**
* Default constructor assuming default for hasMorePrefixes of true.
* Default constructor.
* @param hostSuffix The portion of the suffix from the host or domain that was a
* match (i.e. 'mail.google.com' or 'hotmail.com').
* @param category The category (i.e. 'Web Email').
*/
public DefaultDomainCategoryResult(String hostSuffix, String category) {
this(hostSuffix, category, true);
}
/**
* Main constructor.
* @param hostSuffix The portion of the suffix from the host or domain that was a
* match (i.e. 'mail.google.com' or 'hotmail.com').
* @param category The category (i.e. 'Web Email').
* @param morePrefixes In the event that there would be different matches for additional
* prefixes, this can be true.
*/
public DefaultDomainCategoryResult(String hostSuffix, String category, boolean morePrefixes) {
this.hostSuffix = hostSuffix;
this.category = category;
this.morePrefixes = morePrefixes;
}
@Override
@ -63,10 +49,4 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
public String getCategory() {
return category;
}
@Override
public boolean hasMorePrefixes() {
return morePrefixes;
}
}

View File

@ -35,16 +35,4 @@ public interface DomainCategoryResult {
* @return The category (i.e. 'Web Email').
*/
String getCategory();
/**
* @return In the event that there would be different matches for additional
* prefixes, this can return true. For instance, if there was an entry for
* 'mail.google.com' and 'chatenabled.mail.google.com', a search for
* 'mail.google.com' would return the host suffix: 'mail.google.com' and
* 'true' for hasMorePrefixes since an additional category could be added
* for the 'chatenabled' prefix.
*/
default boolean hasMorePrefixes() {
return true;
}
}

View File

@ -1,86 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2020 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.url.analytics;
import com.google.common.annotations.Beta;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang.StringUtils;
import org.sleuthkit.autopsy.url.analytics.Trie.TrieResult;
@Beta
public class DomainSuffixTrie {
private static Iterable<String> getSuffixIter(String host) {
// parse the tokens splitting on delimiter
List<String> tokens = Stream.of(host.toLowerCase().split(DELIMITER))
.filter(StringUtils::isNotBlank)
.collect(Collectors.toList());
Collections.reverse(tokens);
return tokens;
}
//private void Node get
// Character for joining domain segments.
private static final String JOINER = ".";
// delimiter when used with regex for domains
private static final String DELIMITER = "\\" + JOINER;
private final Trie<String, String> trie = new Trie<>();
/**
*
* @param suffix
* @param leaf
*/
public void add(String suffix, String leaf) {
this.trie.add(getSuffixIter(suffix), leaf);
}
/**
* Determines if the host is a known type of host. If so, returns the
* portion of the host suffix that signifies the domain type (i.e.
* "hotmail.com" or "mail.google.com") and the domain type. Also returned in
* the DomainCategoryResult is whether or not any children of the found node
* in the trie and consequently, whether or not
*
* @param host The host.
* @return The DomainCategoryResult if a portion of the suffix was found
*
*
* A pair of the host suffix and domain type for that suffix if
* found. Otherwise, returns null.
*/
public DomainCategoryResult findHostCategory(String host) {
// if no host, return none.
if (StringUtils.isBlank(host)) {
return null;
}
TrieResult<String, String> result = this.trie.getDeepest(getSuffixIter(host));
List<String> keys = new ArrayList<>(result.getKeys());
Collections.reverse(keys);
String suffix = String.join(JOINER, keys);
return new DefaultDomainCategoryResult(suffix, result.getValue(), result.hasChildren());
}
}

View File

@ -1,132 +0,0 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package org.sleuthkit.autopsy.url.analytics;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections4.MapUtils;
class Trie<K, V> {
private class Node<K, V> {
private final Map<K, Node<K, V>> children = new HashMap<>();
private V leafValue = null;
Node<K, V> getOrAddChild(K childKey) {
Node<K, V> child = children.get(childKey);
if (child == null) {
child = new Node();
children.put(childKey, child);
}
return child;
}
Node<K, V> getChild(K childKey) {
return children.get(childKey);
}
V getLeafValue() {
return leafValue;
}
void setLeafValue(V leafValue) {
this.leafValue = leafValue;
}
}
static class TrieResult<K, V> {
private final V value;
private final List<K> keys;
private final boolean hasChildren;
TrieResult(V value, List<K> keys, boolean hasChildren) {
this.value = value;
this.keys = keys;
this.hasChildren = hasChildren;
}
V getValue() {
return value;
}
List<K> getKeys() {
return keys;
}
boolean hasChildren() {
return hasChildren;
}
}
private Node<K, V> root = new Node<>();
void add(Iterable<K> keyTokens, V leafValue) {
Node<K, V> node = root;
for (K key : keyTokens) {
node = node.getOrAddChild(key);
}
node.setLeafValue(leafValue);
}
V getExact(Iterable<K> keys) {
Node<K, V> node = root;
for (K key : keys) {
node = node.getChild(key);
if (node == null) {
return null;
}
}
return node.getLeafValue();
}
TrieResult<K, V> getDeepest(Iterable<K> keys) {
Node<K, V> node = root;
List<K> visited = new ArrayList<>();
TrieResult<K, V> bestMatch = null;
for (K key : keys) {
if (node == null) {
break;
}
if (node.getLeafValue() != null) {
bestMatch = new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
}
node = node.getChild(key);
visited.add(key);
}
return bestMatch;
}
TrieResult<K, V> getFirst(Iterable<K> keys) {
Node<K, V> node = root;
List<K> visited = new ArrayList<>();
for (K key : keys) {
if (node == null) {
break;
}
if (node.getLeafValue() != null) {
return new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
}
node = node.getChild(key);
visited.add(key);
}
return null;
}
}

View File

@ -18,17 +18,21 @@
*/
package org.sleuthkit.autopsy.recentactivity;
import org.sleuthkit.autopsy.url.analytics.DomainSuffixTrie;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import org.apache.commons.lang.StringUtils;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.IngestModule;
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
import org.sleuthkit.autopsy.url.analytics.DefaultDomainCategoryResult;
import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider;
import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult;
@ -44,38 +48,38 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName());
/**
* Loads the trie of suffixes from the csv resource file.
* Loads the domain suffixes from the csv resource file.
*
* @return The root trie node.
* @return The mapping.
* @throws IOException
*/
private static DomainSuffixTrie loadTrie() throws IOException {
private static Map<String, String> loadMapping() throws IOException {
try (InputStream is = DomainCategorizer.class.getResourceAsStream(DOMAIN_TYPE_CSV);
InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8);
BufferedReader reader = new BufferedReader(isReader)) {
DomainSuffixTrie trie = new DomainSuffixTrie();
Map<String, String> mapping = new HashMap<>();
int lineNum = 1;
while (reader.ready()) {
String line = reader.readLine();
if (!StringUtils.isBlank(line)) {
addItem(trie, line.trim(), lineNum);
addItem(mapping, line.trim(), lineNum);
lineNum++;
}
}
return trie;
return mapping;
}
}
/**
* Adds a trie node based on the csv line.
* Adds a mapping based on the csv line.
*
* @param trie The root trie node.
* @param mapping The suffix to category mapping.
* @param line The line to be parsed.
* @param lineNumber The line number of this csv line.
*/
private static void addItem(DomainSuffixTrie trie, String line, int lineNumber) {
private static void addItem(Map<String, String> mapping, String line, int lineNumber) {
// make sure this isn't a blank line.
if (StringUtils.isBlank(line)) {
return;
@ -102,17 +106,17 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
return;
}
trie.add(hostSuffix, domainTypeStr);
mapping.put(hostSuffix, domainTypeStr);
}
// the root node for the trie containing suffixes for domain categories.
private DomainSuffixTrie trie = null;
// the host suffix to category mapping.
private Map<String, String> mapping = null;
@Override
public void initialize() throws IngestModuleException {
if (this.trie == null) {
if (this.mapping == null) {
try {
this.trie = loadTrie();
this.mapping = loadMapping();
} catch (IOException ex) {
throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex);
}
@ -121,6 +125,21 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
@Override
public DomainCategoryResult getCategory(String domain, String host) {
return trie.findHostCategory(host);
String hostToUse = StringUtils.isBlank(host) ? domain : host;
if (StringUtils.isBlank(hostToUse)) {
return null;
}
List<String> tokens = Arrays.asList(hostToUse.split("\\."));
for (int i = 0; i < tokens.size(); i++) {
String searchString = String.join(".", tokens.subList(i, tokens.size()));
String category = mapping.get(searchString);
if (StringUtils.isNotBlank(category)) {
return new DefaultDomainCategoryResult(searchString, category);
}
}
return null;
}
}

View File

@ -123,7 +123,6 @@ class DomainCategorizer extends Extract {
return host;
}
private DomainCategoryResult findCategory(String domain, String host) {
List<DomainCategoryProvider> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
for (DomainCategoryProvider provider : safeProviders) {
@ -182,8 +181,8 @@ class DomainCategorizer extends Extract {
BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
String domainString = domainAttr.getValueString();
// make sure we have at least one of host or domain
if (StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) {
// make sure we have at least one of host or domain, and the host hasn't been seen before
if ((StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) || (domainSuffixesSeen.contains(host))) {
continue;
}
@ -266,9 +265,9 @@ class DomainCategorizer extends Extract {
provider.initialize();
}
this.domainProviders = foundProviders == null ?
Collections.emptyList() :
foundProviders;
this.domainProviders = foundProviders == null
? Collections.emptyList()
: foundProviders;
}
@Override