mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-13 00:16:16 +00:00
working through domain to sqlite
This commit is contained in:
parent
fd5d759d2a
commit
6793bd7dc9
@ -28,30 +28,16 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
|
|||||||
|
|
||||||
private final String hostSuffix;
|
private final String hostSuffix;
|
||||||
private final String category;
|
private final String category;
|
||||||
private final boolean morePrefixes;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Default constructor assuming default for hasMorePrefixes of true.
|
* Default constructor.
|
||||||
* @param hostSuffix The portion of the suffix from the host or domain that was a
|
* @param hostSuffix The portion of the suffix from the host or domain that was a
|
||||||
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
||||||
* @param category The category (i.e. 'Web Email').
|
* @param category The category (i.e. 'Web Email').
|
||||||
*/
|
*/
|
||||||
public DefaultDomainCategoryResult(String hostSuffix, String category) {
|
public DefaultDomainCategoryResult(String hostSuffix, String category) {
|
||||||
this(hostSuffix, category, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Main constructor.
|
|
||||||
* @param hostSuffix The portion of the suffix from the host or domain that was a
|
|
||||||
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
|
||||||
* @param category The category (i.e. 'Web Email').
|
|
||||||
* @param morePrefixes In the event that there would be different matches for additional
|
|
||||||
* prefixes, this can be true.
|
|
||||||
*/
|
|
||||||
public DefaultDomainCategoryResult(String hostSuffix, String category, boolean morePrefixes) {
|
|
||||||
this.hostSuffix = hostSuffix;
|
this.hostSuffix = hostSuffix;
|
||||||
this.category = category;
|
this.category = category;
|
||||||
this.morePrefixes = morePrefixes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -63,10 +49,4 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
|
|||||||
public String getCategory() {
|
public String getCategory() {
|
||||||
return category;
|
return category;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasMorePrefixes() {
|
|
||||||
return morePrefixes;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -35,16 +35,4 @@ public interface DomainCategoryResult {
|
|||||||
* @return The category (i.e. 'Web Email').
|
* @return The category (i.e. 'Web Email').
|
||||||
*/
|
*/
|
||||||
String getCategory();
|
String getCategory();
|
||||||
|
|
||||||
/**
|
|
||||||
* @return In the event that there would be different matches for additional
|
|
||||||
* prefixes, this can return true. For instance, if there was an entry for
|
|
||||||
* 'mail.google.com' and 'chatenabled.mail.google.com', a search for
|
|
||||||
* 'mail.google.com' would return the host suffix: 'mail.google.com' and
|
|
||||||
* 'true' for hasMorePrefixes since an additional category could be added
|
|
||||||
* for the 'chatenabled' prefix.
|
|
||||||
*/
|
|
||||||
default boolean hasMorePrefixes() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,86 +0,0 @@
|
|||||||
/*
|
|
||||||
* Autopsy Forensic Browser
|
|
||||||
*
|
|
||||||
* Copyright 2020 Basis Technology Corp.
|
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.sleuthkit.autopsy.url.analytics;
|
|
||||||
|
|
||||||
import com.google.common.annotations.Beta;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
import org.sleuthkit.autopsy.url.analytics.Trie.TrieResult;
|
|
||||||
|
|
||||||
@Beta
|
|
||||||
public class DomainSuffixTrie {
|
|
||||||
|
|
||||||
private static Iterable<String> getSuffixIter(String host) {
|
|
||||||
// parse the tokens splitting on delimiter
|
|
||||||
List<String> tokens = Stream.of(host.toLowerCase().split(DELIMITER))
|
|
||||||
.filter(StringUtils::isNotBlank)
|
|
||||||
.collect(Collectors.toList());
|
|
||||||
|
|
||||||
Collections.reverse(tokens);
|
|
||||||
return tokens;
|
|
||||||
}
|
|
||||||
|
|
||||||
//private void Node get
|
|
||||||
// Character for joining domain segments.
|
|
||||||
private static final String JOINER = ".";
|
|
||||||
// delimiter when used with regex for domains
|
|
||||||
private static final String DELIMITER = "\\" + JOINER;
|
|
||||||
|
|
||||||
private final Trie<String, String> trie = new Trie<>();
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param suffix
|
|
||||||
* @param leaf
|
|
||||||
*/
|
|
||||||
public void add(String suffix, String leaf) {
|
|
||||||
this.trie.add(getSuffixIter(suffix), leaf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines if the host is a known type of host. If so, returns the
|
|
||||||
* portion of the host suffix that signifies the domain type (i.e.
|
|
||||||
* "hotmail.com" or "mail.google.com") and the domain type. Also returned in
|
|
||||||
* the DomainCategoryResult is whether or not any children of the found node
|
|
||||||
* in the trie and consequently, whether or not
|
|
||||||
*
|
|
||||||
* @param host The host.
|
|
||||||
* @return The DomainCategoryResult if a portion of the suffix was found
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* A pair of the host suffix and domain type for that suffix if
|
|
||||||
* found. Otherwise, returns null.
|
|
||||||
*/
|
|
||||||
public DomainCategoryResult findHostCategory(String host) {
|
|
||||||
// if no host, return none.
|
|
||||||
if (StringUtils.isBlank(host)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrieResult<String, String> result = this.trie.getDeepest(getSuffixIter(host));
|
|
||||||
List<String> keys = new ArrayList<>(result.getKeys());
|
|
||||||
Collections.reverse(keys);
|
|
||||||
String suffix = String.join(JOINER, keys);
|
|
||||||
return new DefaultDomainCategoryResult(suffix, result.getValue(), result.hasChildren());
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,132 +0,0 @@
|
|||||||
/*
|
|
||||||
* To change this license header, choose License Headers in Project Properties.
|
|
||||||
* To change this template file, choose Tools | Templates
|
|
||||||
* and open the template in the editor.
|
|
||||||
*/
|
|
||||||
package org.sleuthkit.autopsy.url.analytics;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import org.apache.commons.collections4.MapUtils;
|
|
||||||
|
|
||||||
class Trie<K, V> {
|
|
||||||
|
|
||||||
private class Node<K, V> {
|
|
||||||
|
|
||||||
private final Map<K, Node<K, V>> children = new HashMap<>();
|
|
||||||
private V leafValue = null;
|
|
||||||
|
|
||||||
Node<K, V> getOrAddChild(K childKey) {
|
|
||||||
Node<K, V> child = children.get(childKey);
|
|
||||||
if (child == null) {
|
|
||||||
child = new Node();
|
|
||||||
children.put(childKey, child);
|
|
||||||
}
|
|
||||||
|
|
||||||
return child;
|
|
||||||
}
|
|
||||||
|
|
||||||
Node<K, V> getChild(K childKey) {
|
|
||||||
return children.get(childKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
V getLeafValue() {
|
|
||||||
return leafValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setLeafValue(V leafValue) {
|
|
||||||
this.leafValue = leafValue;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static class TrieResult<K, V> {
|
|
||||||
|
|
||||||
private final V value;
|
|
||||||
private final List<K> keys;
|
|
||||||
private final boolean hasChildren;
|
|
||||||
|
|
||||||
TrieResult(V value, List<K> keys, boolean hasChildren) {
|
|
||||||
this.value = value;
|
|
||||||
this.keys = keys;
|
|
||||||
this.hasChildren = hasChildren;
|
|
||||||
}
|
|
||||||
|
|
||||||
V getValue() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<K> getKeys() {
|
|
||||||
return keys;
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean hasChildren() {
|
|
||||||
return hasChildren;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private Node<K, V> root = new Node<>();
|
|
||||||
|
|
||||||
void add(Iterable<K> keyTokens, V leafValue) {
|
|
||||||
Node<K, V> node = root;
|
|
||||||
for (K key : keyTokens) {
|
|
||||||
node = node.getOrAddChild(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
node.setLeafValue(leafValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
V getExact(Iterable<K> keys) {
|
|
||||||
Node<K, V> node = root;
|
|
||||||
for (K key : keys) {
|
|
||||||
node = node.getChild(key);
|
|
||||||
if (node == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return node.getLeafValue();
|
|
||||||
}
|
|
||||||
|
|
||||||
TrieResult<K, V> getDeepest(Iterable<K> keys) {
|
|
||||||
Node<K, V> node = root;
|
|
||||||
List<K> visited = new ArrayList<>();
|
|
||||||
TrieResult<K, V> bestMatch = null;
|
|
||||||
for (K key : keys) {
|
|
||||||
if (node == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (node.getLeafValue() != null) {
|
|
||||||
bestMatch = new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
|
|
||||||
}
|
|
||||||
|
|
||||||
node = node.getChild(key);
|
|
||||||
visited.add(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bestMatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
TrieResult<K, V> getFirst(Iterable<K> keys) {
|
|
||||||
Node<K, V> node = root;
|
|
||||||
List<K> visited = new ArrayList<>();
|
|
||||||
for (K key : keys) {
|
|
||||||
if (node == null) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (node.getLeafValue() != null) {
|
|
||||||
return new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
|
|
||||||
}
|
|
||||||
|
|
||||||
node = node.getChild(key);
|
|
||||||
visited.add(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
@ -18,17 +18,21 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.recentactivity;
|
package org.sleuthkit.autopsy.recentactivity;
|
||||||
|
|
||||||
import org.sleuthkit.autopsy.url.analytics.DomainSuffixTrie;
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModule;
|
import org.sleuthkit.autopsy.ingest.IngestModule;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
||||||
|
import org.sleuthkit.autopsy.url.analytics.DefaultDomainCategoryResult;
|
||||||
import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider;
|
import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider;
|
||||||
import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult;
|
import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult;
|
||||||
|
|
||||||
@ -44,38 +48,38 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
|||||||
private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName());
|
private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName());
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads the trie of suffixes from the csv resource file.
|
* Loads the domain suffixes from the csv resource file.
|
||||||
*
|
*
|
||||||
* @return The root trie node.
|
* @return The mapping.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private static DomainSuffixTrie loadTrie() throws IOException {
|
private static Map<String, String> loadMapping() throws IOException {
|
||||||
try (InputStream is = DomainCategorizer.class.getResourceAsStream(DOMAIN_TYPE_CSV);
|
try (InputStream is = DomainCategorizer.class.getResourceAsStream(DOMAIN_TYPE_CSV);
|
||||||
InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8);
|
InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8);
|
||||||
BufferedReader reader = new BufferedReader(isReader)) {
|
BufferedReader reader = new BufferedReader(isReader)) {
|
||||||
|
|
||||||
DomainSuffixTrie trie = new DomainSuffixTrie();
|
Map<String, String> mapping = new HashMap<>();
|
||||||
int lineNum = 1;
|
int lineNum = 1;
|
||||||
while (reader.ready()) {
|
while (reader.ready()) {
|
||||||
String line = reader.readLine();
|
String line = reader.readLine();
|
||||||
if (!StringUtils.isBlank(line)) {
|
if (!StringUtils.isBlank(line)) {
|
||||||
addItem(trie, line.trim(), lineNum);
|
addItem(mapping, line.trim(), lineNum);
|
||||||
lineNum++;
|
lineNum++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return trie;
|
return mapping;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a trie node based on the csv line.
|
* Adds a mapping based on the csv line.
|
||||||
*
|
*
|
||||||
* @param trie The root trie node.
|
* @param mapping The suffix to category mapping.
|
||||||
* @param line The line to be parsed.
|
* @param line The line to be parsed.
|
||||||
* @param lineNumber The line number of this csv line.
|
* @param lineNumber The line number of this csv line.
|
||||||
*/
|
*/
|
||||||
private static void addItem(DomainSuffixTrie trie, String line, int lineNumber) {
|
private static void addItem(Map<String, String> mapping, String line, int lineNumber) {
|
||||||
// make sure this isn't a blank line.
|
// make sure this isn't a blank line.
|
||||||
if (StringUtils.isBlank(line)) {
|
if (StringUtils.isBlank(line)) {
|
||||||
return;
|
return;
|
||||||
@ -102,17 +106,17 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
trie.add(hostSuffix, domainTypeStr);
|
mapping.put(hostSuffix, domainTypeStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// the root node for the trie containing suffixes for domain categories.
|
// the host suffix to category mapping.
|
||||||
private DomainSuffixTrie trie = null;
|
private Map<String, String> mapping = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize() throws IngestModuleException {
|
public void initialize() throws IngestModuleException {
|
||||||
if (this.trie == null) {
|
if (this.mapping == null) {
|
||||||
try {
|
try {
|
||||||
this.trie = loadTrie();
|
this.mapping = loadMapping();
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex);
|
throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex);
|
||||||
}
|
}
|
||||||
@ -121,6 +125,21 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DomainCategoryResult getCategory(String domain, String host) {
|
public DomainCategoryResult getCategory(String domain, String host) {
|
||||||
return trie.findHostCategory(host);
|
String hostToUse = StringUtils.isBlank(host) ? domain : host;
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(hostToUse)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> tokens = Arrays.asList(hostToUse.split("\\."));
|
||||||
|
for (int i = 0; i < tokens.size(); i++) {
|
||||||
|
String searchString = String.join(".", tokens.subList(i, tokens.size()));
|
||||||
|
String category = mapping.get(searchString);
|
||||||
|
if (StringUtils.isNotBlank(category)) {
|
||||||
|
return new DefaultDomainCategoryResult(searchString, category);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -122,8 +122,7 @@ class DomainCategorizer extends Extract {
|
|||||||
|
|
||||||
return host;
|
return host;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private DomainCategoryResult findCategory(String domain, String host) {
|
private DomainCategoryResult findCategory(String domain, String host) {
|
||||||
List<DomainCategoryProvider> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
|
List<DomainCategoryProvider> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
|
||||||
for (DomainCategoryProvider provider : safeProviders) {
|
for (DomainCategoryProvider provider : safeProviders) {
|
||||||
@ -132,7 +131,7 @@ class DomainCategorizer extends Extract {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -177,16 +176,16 @@ class DomainCategorizer extends Extract {
|
|||||||
|
|
||||||
// atempt to get the host from the url provided.
|
// atempt to get the host from the url provided.
|
||||||
String host = getHost(urlString);
|
String host = getHost(urlString);
|
||||||
|
|
||||||
// get the url string from the artifact
|
// get the url string from the artifact
|
||||||
BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
|
BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
|
||||||
String domainString = domainAttr.getValueString();
|
String domainString = domainAttr.getValueString();
|
||||||
|
|
||||||
// make sure we have at least one of host or domain
|
// make sure we have at least one of host or domain, and the host hasn't been seen before
|
||||||
if (StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) {
|
if ((StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) || (domainSuffixesSeen.contains(host))) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we reached this point, we are at least analyzing this item
|
// if we reached this point, we are at least analyzing this item
|
||||||
artifactsAnalyzed++;
|
artifactsAnalyzed++;
|
||||||
|
|
||||||
@ -266,9 +265,9 @@ class DomainCategorizer extends Extract {
|
|||||||
provider.initialize();
|
provider.initialize();
|
||||||
}
|
}
|
||||||
|
|
||||||
this.domainProviders = foundProviders == null ?
|
this.domainProviders = foundProviders == null
|
||||||
Collections.emptyList() :
|
? Collections.emptyList()
|
||||||
foundProviders;
|
: foundProviders;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
Loading…
x
Reference in New Issue
Block a user