mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-13 00:16:16 +00:00
working through domain to sqlite
This commit is contained in:
parent
fd5d759d2a
commit
6793bd7dc9
@ -28,30 +28,16 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
|
||||
|
||||
private final String hostSuffix;
|
||||
private final String category;
|
||||
private final boolean morePrefixes;
|
||||
|
||||
/**
|
||||
* Default constructor assuming default for hasMorePrefixes of true.
|
||||
* Default constructor.
|
||||
* @param hostSuffix The portion of the suffix from the host or domain that was a
|
||||
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
||||
* @param category The category (i.e. 'Web Email').
|
||||
*/
|
||||
public DefaultDomainCategoryResult(String hostSuffix, String category) {
|
||||
this(hostSuffix, category, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Main constructor.
|
||||
* @param hostSuffix The portion of the suffix from the host or domain that was a
|
||||
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
||||
* @param category The category (i.e. 'Web Email').
|
||||
* @param morePrefixes In the event that there would be different matches for additional
|
||||
* prefixes, this can be true.
|
||||
*/
|
||||
public DefaultDomainCategoryResult(String hostSuffix, String category, boolean morePrefixes) {
|
||||
this.hostSuffix = hostSuffix;
|
||||
this.category = category;
|
||||
this.morePrefixes = morePrefixes;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -63,10 +49,4 @@ public class DefaultDomainCategoryResult implements DomainCategoryResult {
|
||||
public String getCategory() {
|
||||
return category;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasMorePrefixes() {
|
||||
return morePrefixes;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -35,16 +35,4 @@ public interface DomainCategoryResult {
|
||||
* @return The category (i.e. 'Web Email').
|
||||
*/
|
||||
String getCategory();
|
||||
|
||||
/**
|
||||
* @return In the event that there would be different matches for additional
|
||||
* prefixes, this can return true. For instance, if there was an entry for
|
||||
* 'mail.google.com' and 'chatenabled.mail.google.com', a search for
|
||||
* 'mail.google.com' would return the host suffix: 'mail.google.com' and
|
||||
* 'true' for hasMorePrefixes since an additional category could be added
|
||||
* for the 'chatenabled' prefix.
|
||||
*/
|
||||
default boolean hasMorePrefixes() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1,86 +0,0 @@
|
||||
/*
|
||||
* Autopsy Forensic Browser
|
||||
*
|
||||
* Copyright 2020 Basis Technology Corp.
|
||||
* Contact: carrier <at> sleuthkit <dot> org
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.url.analytics;
|
||||
|
||||
import com.google.common.annotations.Beta;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.sleuthkit.autopsy.url.analytics.Trie.TrieResult;
|
||||
|
||||
@Beta
|
||||
public class DomainSuffixTrie {
|
||||
|
||||
private static Iterable<String> getSuffixIter(String host) {
|
||||
// parse the tokens splitting on delimiter
|
||||
List<String> tokens = Stream.of(host.toLowerCase().split(DELIMITER))
|
||||
.filter(StringUtils::isNotBlank)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
Collections.reverse(tokens);
|
||||
return tokens;
|
||||
}
|
||||
|
||||
//private void Node get
|
||||
// Character for joining domain segments.
|
||||
private static final String JOINER = ".";
|
||||
// delimiter when used with regex for domains
|
||||
private static final String DELIMITER = "\\" + JOINER;
|
||||
|
||||
private final Trie<String, String> trie = new Trie<>();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param suffix
|
||||
* @param leaf
|
||||
*/
|
||||
public void add(String suffix, String leaf) {
|
||||
this.trie.add(getSuffixIter(suffix), leaf);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the host is a known type of host. If so, returns the
|
||||
* portion of the host suffix that signifies the domain type (i.e.
|
||||
* "hotmail.com" or "mail.google.com") and the domain type. Also returned in
|
||||
* the DomainCategoryResult is whether or not any children of the found node
|
||||
* in the trie and consequently, whether or not
|
||||
*
|
||||
* @param host The host.
|
||||
* @return The DomainCategoryResult if a portion of the suffix was found
|
||||
*
|
||||
*
|
||||
* A pair of the host suffix and domain type for that suffix if
|
||||
* found. Otherwise, returns null.
|
||||
*/
|
||||
public DomainCategoryResult findHostCategory(String host) {
|
||||
// if no host, return none.
|
||||
if (StringUtils.isBlank(host)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TrieResult<String, String> result = this.trie.getDeepest(getSuffixIter(host));
|
||||
List<String> keys = new ArrayList<>(result.getKeys());
|
||||
Collections.reverse(keys);
|
||||
String suffix = String.join(JOINER, keys);
|
||||
return new DefaultDomainCategoryResult(suffix, result.getValue(), result.hasChildren());
|
||||
}
|
||||
}
|
@ -1,132 +0,0 @@
|
||||
/*
|
||||
* To change this license header, choose License Headers in Project Properties.
|
||||
* To change this template file, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.url.analytics;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.commons.collections4.MapUtils;
|
||||
|
||||
class Trie<K, V> {
|
||||
|
||||
private class Node<K, V> {
|
||||
|
||||
private final Map<K, Node<K, V>> children = new HashMap<>();
|
||||
private V leafValue = null;
|
||||
|
||||
Node<K, V> getOrAddChild(K childKey) {
|
||||
Node<K, V> child = children.get(childKey);
|
||||
if (child == null) {
|
||||
child = new Node();
|
||||
children.put(childKey, child);
|
||||
}
|
||||
|
||||
return child;
|
||||
}
|
||||
|
||||
Node<K, V> getChild(K childKey) {
|
||||
return children.get(childKey);
|
||||
}
|
||||
|
||||
V getLeafValue() {
|
||||
return leafValue;
|
||||
}
|
||||
|
||||
void setLeafValue(V leafValue) {
|
||||
this.leafValue = leafValue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class TrieResult<K, V> {
|
||||
|
||||
private final V value;
|
||||
private final List<K> keys;
|
||||
private final boolean hasChildren;
|
||||
|
||||
TrieResult(V value, List<K> keys, boolean hasChildren) {
|
||||
this.value = value;
|
||||
this.keys = keys;
|
||||
this.hasChildren = hasChildren;
|
||||
}
|
||||
|
||||
V getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
List<K> getKeys() {
|
||||
return keys;
|
||||
}
|
||||
|
||||
boolean hasChildren() {
|
||||
return hasChildren;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Node<K, V> root = new Node<>();
|
||||
|
||||
void add(Iterable<K> keyTokens, V leafValue) {
|
||||
Node<K, V> node = root;
|
||||
for (K key : keyTokens) {
|
||||
node = node.getOrAddChild(key);
|
||||
}
|
||||
|
||||
node.setLeafValue(leafValue);
|
||||
}
|
||||
|
||||
V getExact(Iterable<K> keys) {
|
||||
Node<K, V> node = root;
|
||||
for (K key : keys) {
|
||||
node = node.getChild(key);
|
||||
if (node == null) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return node.getLeafValue();
|
||||
}
|
||||
|
||||
TrieResult<K, V> getDeepest(Iterable<K> keys) {
|
||||
Node<K, V> node = root;
|
||||
List<K> visited = new ArrayList<>();
|
||||
TrieResult<K, V> bestMatch = null;
|
||||
for (K key : keys) {
|
||||
if (node == null) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node.getLeafValue() != null) {
|
||||
bestMatch = new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
|
||||
}
|
||||
|
||||
node = node.getChild(key);
|
||||
visited.add(key);
|
||||
}
|
||||
|
||||
return bestMatch;
|
||||
}
|
||||
|
||||
TrieResult<K, V> getFirst(Iterable<K> keys) {
|
||||
Node<K, V> node = root;
|
||||
List<K> visited = new ArrayList<>();
|
||||
for (K key : keys) {
|
||||
if (node == null) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (node.getLeafValue() != null) {
|
||||
return new TrieResult<K, V>(node.getLeafValue(), visited, MapUtils.isNotEmpty(node.children));
|
||||
}
|
||||
|
||||
node = node.getChild(key);
|
||||
visited.add(key);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
@ -18,17 +18,21 @@
|
||||
*/
|
||||
package org.sleuthkit.autopsy.recentactivity;
|
||||
|
||||
import org.sleuthkit.autopsy.url.analytics.DomainSuffixTrie;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModule;
|
||||
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
||||
import org.sleuthkit.autopsy.url.analytics.DefaultDomainCategoryResult;
|
||||
import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider;
|
||||
import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult;
|
||||
|
||||
@ -44,38 +48,38 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
||||
private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName());
|
||||
|
||||
/**
|
||||
* Loads the trie of suffixes from the csv resource file.
|
||||
* Loads the domain suffixes from the csv resource file.
|
||||
*
|
||||
* @return The root trie node.
|
||||
* @return The mapping.
|
||||
* @throws IOException
|
||||
*/
|
||||
private static DomainSuffixTrie loadTrie() throws IOException {
|
||||
private static Map<String, String> loadMapping() throws IOException {
|
||||
try (InputStream is = DomainCategorizer.class.getResourceAsStream(DOMAIN_TYPE_CSV);
|
||||
InputStreamReader isReader = new InputStreamReader(is, StandardCharsets.UTF_8);
|
||||
BufferedReader reader = new BufferedReader(isReader)) {
|
||||
|
||||
DomainSuffixTrie trie = new DomainSuffixTrie();
|
||||
Map<String, String> mapping = new HashMap<>();
|
||||
int lineNum = 1;
|
||||
while (reader.ready()) {
|
||||
String line = reader.readLine();
|
||||
if (!StringUtils.isBlank(line)) {
|
||||
addItem(trie, line.trim(), lineNum);
|
||||
addItem(mapping, line.trim(), lineNum);
|
||||
lineNum++;
|
||||
}
|
||||
}
|
||||
|
||||
return trie;
|
||||
return mapping;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a trie node based on the csv line.
|
||||
* Adds a mapping based on the csv line.
|
||||
*
|
||||
* @param trie The root trie node.
|
||||
* @param mapping The suffix to category mapping.
|
||||
* @param line The line to be parsed.
|
||||
* @param lineNumber The line number of this csv line.
|
||||
*/
|
||||
private static void addItem(DomainSuffixTrie trie, String line, int lineNumber) {
|
||||
private static void addItem(Map<String, String> mapping, String line, int lineNumber) {
|
||||
// make sure this isn't a blank line.
|
||||
if (StringUtils.isBlank(line)) {
|
||||
return;
|
||||
@ -102,17 +106,17 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
||||
return;
|
||||
}
|
||||
|
||||
trie.add(hostSuffix, domainTypeStr);
|
||||
mapping.put(hostSuffix, domainTypeStr);
|
||||
}
|
||||
|
||||
// the root node for the trie containing suffixes for domain categories.
|
||||
private DomainSuffixTrie trie = null;
|
||||
// the host suffix to category mapping.
|
||||
private Map<String, String> mapping = null;
|
||||
|
||||
@Override
|
||||
public void initialize() throws IngestModuleException {
|
||||
if (this.trie == null) {
|
||||
if (this.mapping == null) {
|
||||
try {
|
||||
this.trie = loadTrie();
|
||||
this.mapping = loadMapping();
|
||||
} catch (IOException ex) {
|
||||
throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex);
|
||||
}
|
||||
@ -121,6 +125,21 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
||||
|
||||
@Override
|
||||
public DomainCategoryResult getCategory(String domain, String host) {
|
||||
return trie.findHostCategory(host);
|
||||
String hostToUse = StringUtils.isBlank(host) ? domain : host;
|
||||
|
||||
if (StringUtils.isBlank(hostToUse)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
List<String> tokens = Arrays.asList(hostToUse.split("\\."));
|
||||
for (int i = 0; i < tokens.size(); i++) {
|
||||
String searchString = String.join(".", tokens.subList(i, tokens.size()));
|
||||
String category = mapping.get(searchString);
|
||||
if (StringUtils.isNotBlank(category)) {
|
||||
return new DefaultDomainCategoryResult(searchString, category);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -123,7 +123,6 @@ class DomainCategorizer extends Extract {
|
||||
return host;
|
||||
}
|
||||
|
||||
|
||||
private DomainCategoryResult findCategory(String domain, String host) {
|
||||
List<DomainCategoryProvider> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
|
||||
for (DomainCategoryProvider provider : safeProviders) {
|
||||
@ -182,8 +181,8 @@ class DomainCategorizer extends Extract {
|
||||
BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
|
||||
String domainString = domainAttr.getValueString();
|
||||
|
||||
// make sure we have at least one of host or domain
|
||||
if (StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) {
|
||||
// make sure we have at least one of host or domain, and the host hasn't been seen before
|
||||
if ((StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) || (domainSuffixesSeen.contains(host))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -266,9 +265,9 @@ class DomainCategorizer extends Extract {
|
||||
provider.initialize();
|
||||
}
|
||||
|
||||
this.domainProviders = foundProviders == null ?
|
||||
Collections.emptyList() :
|
||||
foundProviders;
|
||||
this.domainProviders = foundProviders == null
|
||||
? Collections.emptyList()
|
||||
: foundProviders;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
Loading…
x
Reference in New Issue
Block a user