working through domain to sqlite

This commit is contained in:
Greg DiCristofaro 2020-12-08 16:23:32 -05:00
parent fd5d759d2a
commit 6793bd7dc9
6 changed files with 46 additions and 278 deletions

View File

@ -28,30 +28,16 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
private final String hostSuffix; private final String hostSuffix;
private final String category; private final String category;
private final boolean morePrefixes;
/** /**
* Default constructor assuming default for hasMorePrefixes of true. * Default constructor.
* @param hostSuffix The portion of the suffix from the host or domain that was a * @param hostSuffix The portion of the suffix from the host or domain that was a
* match (i.e. 'mail.google.com' or 'hotmail.com'). * match (i.e. 'mail.google.com' or 'hotmail.com').
* @param category The category (i.e. 'Web Email'). * @param category The category (i.e. 'Web Email').
*/ */
public DefaultDomainCategoryResult(String hostSuffix, String category) { public DefaultDomainCategoryResult(String hostSuffix, String category) {
this(hostSuffix, category, true);
}
/**
* Main constructor.
* @param hostSuffix The portion of the suffix from the host or domain that was a
* match (i.e. 'mail.google.com' or 'hotmail.com').
* @param category The category (i.e. 'Web Email').
* @param morePrefixes In the event that there would be different matches for additional
* prefixes, this can be true.
*/
public DefaultDomainCategoryResult(String hostSuffix, String category, boolean morePrefixes) {
this.hostSuffix = hostSuffix; this.hostSuffix = hostSuffix;
this.category = category; this.category = category;
this.morePrefixes = morePrefixes;
} }
@Override @Override
@ -63,10 +49,4 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
public String getCategory() { public String getCategory() {
return category; return category;
} }
@Override
public boolean hasMorePrefixes() {
return morePrefixes;
}
} }

View File

@ -35,16 +35,4 @@ public interface DomainCategoryResult {
* @return The category (i.e. 'Web Email'). * @return The category (i.e. 'Web Email').
*/ */
String getCategory(); String getCategory();
/**
* @return In the event that there would be different matches for additional
* prefixes, this can return true. For instance, if there was an entry for
* 'mail.google.com' and 'chatenabled.mail.google.com', a search for
* 'mail.google.com' would return the host suffix: 'mail.google.com' and
* 'true' for hasMorePrefixes since an additional category could be added
* for the 'chatenabled' prefix.
*/
default boolean hasMorePrefixes() {
return true;
}
} }

View File

@ -1,86 +0,0 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2020 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.url.analytics;
import com.google.common.annotations.Beta;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang.StringUtils;
import org.sleuthkit.autopsy.url.analytics.Trie.TrieResult;
@Beta
public class DomainSuffixTrie {
private static Iterable<String> getSuffixIter(String host) {
// parse the tokens splitting on delimiter
List<String> tokens = Stream.of(host.toLowerCase().split(DELIMITER))
.filter(StringUtils::isNotBlank)
.collect(Collectors.toList());
Collections.reverse(tokens);
return tokens;
}
//private void Node get
// Character for joining domain segments.
private static final String JOINER = ".";
// delimiter when used with regex for domains
private static final String DELIMITER = "\\" + JOINER;
private final Trie<String, String> trie = new Trie<>();
/**
*
* @param suffix
* @param leaf
*/
public void add(String suffix, String leaf) {
this.trie.add(getSuffixIter(suffix), leaf);
}
/**
* Determines if the host is a known type of host. If so, returns the
* portion of the host suffix that signifies the domain type (i.e.
* "hotmail.com" or "mail.google.com") and the domain type. Also returned in
* the DomainCategoryResult is whether or not any children of the found node
* in the trie and consequently, whether or not
*
* @param host The host.
* @return The DomainCategoryResult if a portion of the suffix was found
*
*
* A pair of the host suffix and domain type for that suffix if
* found. Otherwise, returns null.
*/
public DomainCategoryResult findHostCategory(String host) {
// if no host, return none.
if (StringUtils.isBlank(host)) {
return null;
}
TrieResult<String, String> result = this.trie.getDeepest(getSuffixIter(host));
List<String> keys = new ArrayList<>(result.getKeys());
Collections.reverse(keys);
String suffix = String.join(JOINER, keys);
return new DefaultDomainCategoryResult(suffix, result.getValue(), result.hasChildren());
}
}

View File

@ -1,132 +0,0 @@
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package org.sleuthkit.autopsy.url.analytics;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections4.MapUtils;
class Trie<K, V> {
private class Node<K, V> {
private final Map<K, Node<K, V>> children = new HashMap<>();
private V leafValue = null;
Node<K, V> getOrAddChild(K childKey) {
Node<K, V> child = children.get(childKey);
if (child == null) {
child = new Node();
children.put(childKey, child);
}
return child;
}
Node<K, V> getChild(K childKey) {
return children.get(childKey);
}
V getLeafValue() {
return leafValue;
}
void setLeafValue(V leafValue) {
this.leafValue = leafValue;
}
}
static class TrieResult<K, V> {
private final V value;
private final List<K> keys;
private final boolean hasChildren;
TrieResult(V value, List<K> keys, boolean hasChildren) {
this.value = value;
this.keys = keys;
this.hasChildren = hasChildren;
}
V getValue() {
return value;
}
List<K> getKeys() {
return keys;
}
boolean hasChildren() {
return hasChildren;
}
}
private Node<K, V> root = new Node<>();
void add(Iterable<K> keyTokens, V leafValue) {
Node<K, V> node = root;
for (K key : keyTokens) {
node = node.getOrAddChild(key);
}
node.setLeafValue(leafValue);
}
V getExact(Iterable<K> keys) {
Node<K, V> node = root;
for (K key : keys) {
node = node.getChild(key);
if (node == null) {
return null;
}
}
return node.getLeafValue();
}
TrieResult<K, V> getDeepest(Iterable<K> keys) {
Node<K, V> node = root;
List<K> visited = new ArrayList<>();
TrieResult<K, V> bestMatch = null;
for (K key : keys) {
if (node == null) {
break;
}
if (node.getLeafValue() != null) {
bestMatch = new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
}
node = node.getChild(key);
visited.add(key);
}
return bestMatch;
}
TrieResult<K, V> getFirst(Iterable<K> keys) {
Node<K, V> node = root;
List<K> visited = new ArrayList<>();
for (K key : keys) {
if (node == null) {
break;
}
if (node.getLeafValue() != null) {
return new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
}
node = node.getChild(key);
visited.add(key);
}
return null;
}
}

View File

@ -18,17 +18,21 @@
*/ */
package org.sleuthkit.autopsy.recentactivity; package org.sleuthkit.autopsy.recentactivity;
import org.sleuthkit.autopsy.url.analytics.DomainSuffixTrie;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level; import java.util.logging.Level;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.IngestModule; import org.sleuthkit.autopsy.ingest.IngestModule;
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException; import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
import org.sleuthkit.autopsy.url.analytics.DefaultDomainCategoryResult;
import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider; import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider;
import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult; import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult;
@ -44,38 +48,38 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName()); private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName());
/** /**
* Loads the trie of suffixes from the csv resource file. * Loads the domain suffixes from the csv resource file.
* *
* @return The root trie node. * @return The mapping.
* @throws IOException * @throws IOException
*/ */
private static DomainSuffixTrie loadTrie() throws IOException { private static Map<String, String> loadMapping() throws IOException {
try (InputStream is = DomainCategorizer.class.getResourceAsStream(DOMAIN_TYPE_CSV); try (InputStream is = DomainCategorizer.class.getResourceAsStream(DOMAIN_TYPE_CSV);
InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8); InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8);
BufferedReader reader = new BufferedReader(isReader)) { BufferedReader reader = new BufferedReader(isReader)) {
DomainSuffixTrie trie = new DomainSuffixTrie(); Map<String, String> mapping = new HashMap<>();
int lineNum = 1; int lineNum = 1;
while (reader.ready()) { while (reader.ready()) {
String line = reader.readLine(); String line = reader.readLine();
if (!StringUtils.isBlank(line)) { if (!StringUtils.isBlank(line)) {
addItem(trie, line.trim(), lineNum); addItem(mapping, line.trim(), lineNum);
lineNum++; lineNum++;
} }
} }
return trie; return mapping;
} }
} }
/** /**
* Adds a trie node based on the csv line. * Adds a mapping based on the csv line.
* *
* @param trie The root trie node. * @param mapping The suffix to category mapping.
* @param line The line to be parsed. * @param line The line to be parsed.
* @param lineNumber The line number of this csv line. * @param lineNumber The line number of this csv line.
*/ */
private static void addItem(DomainSuffixTrie trie, String line, int lineNumber) { private static void addItem(Map<String, String> mapping, String line, int lineNumber) {
// make sure this isn't a blank line. // make sure this isn't a blank line.
if (StringUtils.isBlank(line)) { if (StringUtils.isBlank(line)) {
return; return;
@ -102,17 +106,17 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
return; return;
} }
trie.add(hostSuffix, domainTypeStr); mapping.put(hostSuffix, domainTypeStr);
} }
// the root node for the trie containing suffixes for domain categories. // the host suffix to category mapping.
private DomainSuffixTrie trie = null; private Map<String, String> mapping = null;
@Override @Override
public void initialize() throws IngestModuleException { public void initialize() throws IngestModuleException {
if (this.trie == null) { if (this.mapping == null) {
try { try {
this.trie = loadTrie(); this.mapping = loadMapping();
} catch (IOException ex) { } catch (IOException ex) {
throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex); throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex);
} }
@ -121,6 +125,21 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
@Override @Override
public DomainCategoryResult getCategory(String domain, String host) { public DomainCategoryResult getCategory(String domain, String host) {
return trie.findHostCategory(host); String hostToUse = StringUtils.isBlank(host) ? domain : host;
if (StringUtils.isBlank(hostToUse)) {
return null;
}
List<String> tokens = Arrays.asList(hostToUse.split("\\."));
for (int i = 0; i < tokens.size(); i++) {
String searchString = String.join(".", tokens.subList(i, tokens.size()));
String category = mapping.get(searchString);
if (StringUtils.isNotBlank(category)) {
return new DefaultDomainCategoryResult(searchString, category);
}
}
return null;
} }
} }

View File

@ -123,7 +123,6 @@ class DomainCategorizer extends Extract {
return host; return host;
} }
private DomainCategoryResult findCategory(String domain, String host) { private DomainCategoryResult findCategory(String domain, String host) {
List<DomainCategoryProvider> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders; List<DomainCategoryProvider> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
for (DomainCategoryProvider provider : safeProviders) { for (DomainCategoryProvider provider : safeProviders) {
@ -182,8 +181,8 @@ class DomainCategorizer extends Extract {
BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN)); BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
String domainString = domainAttr.getValueString(); String domainString = domainAttr.getValueString();
// make sure we have at least one of host or domain // make sure we have at least one of host or domain, and the host hasn't been seen before
if (StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) { if ((StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) || (domainSuffixesSeen.contains(host))) {
continue; continue;
} }
@ -266,9 +265,9 @@ class DomainCategorizer extends Extract {
provider.initialize(); provider.initialize();
} }
this.domainProviders = foundProviders == null ? this.domainProviders = foundProviders == null
Collections.emptyList() : ? Collections.emptyList()
foundProviders; : foundProviders;
} }
@Override @Override