mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-08 22:29:33 +00:00
public Core package
This commit is contained in:
parent
8c7155c3a1
commit
fd5d759d2a
@ -336,6 +336,7 @@
|
|||||||
<package>org.sleuthkit.autopsy.textextractors.configs</package>
|
<package>org.sleuthkit.autopsy.textextractors.configs</package>
|
||||||
<package>org.sleuthkit.autopsy.textsummarizer</package>
|
<package>org.sleuthkit.autopsy.textsummarizer</package>
|
||||||
<package>org.sleuthkit.autopsy.texttranslation</package>
|
<package>org.sleuthkit.autopsy.texttranslation</package>
|
||||||
|
<package>org.sleuthkit.autopsy.url.analytics</package>
|
||||||
<package>org.sleuthkit.datamodel</package>
|
<package>org.sleuthkit.datamodel</package>
|
||||||
<package>org.sleuthkit.datamodel.blackboardutils</package>
|
<package>org.sleuthkit.datamodel.blackboardutils</package>
|
||||||
<package>org.sleuthkit.datamodel.blackboardutils.attributes</package>
|
<package>org.sleuthkit.datamodel.blackboardutils.attributes</package>
|
||||||
|
@ -0,0 +1,72 @@
|
|||||||
|
/*
|
||||||
|
* Autopsy Forensic Browser
|
||||||
|
*
|
||||||
|
* Copyright 2020 Basis Technology Corp.
|
||||||
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.sleuthkit.autopsy.url.analytics;
|
||||||
|
|
||||||
|
import com.google.common.annotations.Beta;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default implementation of the DomainCategoryResult.
|
||||||
|
*/
|
||||||
|
@Beta
|
||||||
|
public class DefaultDomainCategoryResult implements DomainCategoryResult {
|
||||||
|
|
||||||
|
private final String hostSuffix;
|
||||||
|
private final String category;
|
||||||
|
private final boolean morePrefixes;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default constructor assuming default for hasMorePrefixes of true.
|
||||||
|
* @param hostSuffix The portion of the suffix from the host or domain that was a
|
||||||
|
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
||||||
|
* @param category The category (i.e. 'Web Email').
|
||||||
|
*/
|
||||||
|
public DefaultDomainCategoryResult(String hostSuffix, String category) {
|
||||||
|
this(hostSuffix, category, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main constructor.
|
||||||
|
* @param hostSuffix The portion of the suffix from the host or domain that was a
|
||||||
|
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
||||||
|
* @param category The category (i.e. 'Web Email').
|
||||||
|
* @param morePrefixes In the event that there would be different matches for additional
|
||||||
|
* prefixes, this can be true.
|
||||||
|
*/
|
||||||
|
public DefaultDomainCategoryResult(String hostSuffix, String category, boolean morePrefixes) {
|
||||||
|
this.hostSuffix = hostSuffix;
|
||||||
|
this.category = category;
|
||||||
|
this.morePrefixes = morePrefixes;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHostSuffix() {
|
||||||
|
return hostSuffix;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getCategory() {
|
||||||
|
return category;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasMorePrefixes() {
|
||||||
|
return morePrefixes;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -16,38 +16,19 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.recentactivity;
|
package org.sleuthkit.autopsy.url.analytics;
|
||||||
|
|
||||||
|
import com.google.common.annotations.Beta;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModule;
|
import org.sleuthkit.autopsy.ingest.IngestModule;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interface providing the category of a domain for creating
|
* Interface providing the category of a domain for the purposes of creating
|
||||||
* TSK_WEB_CATEGORIZATION artifacts.
|
* TSK_WEB_CATEGORIZATION artifacts. These implementations are used in
|
||||||
|
* RecentActivity as a part of the ingest process. Implementers of this class
|
||||||
|
* should have a no-argument constructor in order to be properly instantiated.
|
||||||
*/
|
*/
|
||||||
|
@Beta
|
||||||
public interface DomainCategoryProvider {
|
public interface DomainCategoryProvider {
|
||||||
public static class DomainCategoryResult {
|
|
||||||
private final String hostSuffix;
|
|
||||||
private final String category;
|
|
||||||
private final boolean hasChildren;
|
|
||||||
|
|
||||||
public DomainCategoryResult(String hostSuffix, String category, boolean hasChildren) {
|
|
||||||
this.hostSuffix = hostSuffix;
|
|
||||||
this.category = category;
|
|
||||||
this.hasChildren = hasChildren;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getHostSuffix() {
|
|
||||||
return hostSuffix;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getCategory() {
|
|
||||||
return category;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasChildren() {
|
|
||||||
return hasChildren;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provides the DomainCategory for a given domain/host or null if none can
|
* Provides the DomainCategory for a given domain/host or null if none can
|
||||||
@ -59,6 +40,14 @@ public interface DomainCategoryProvider {
|
|||||||
* null if not.
|
* null if not.
|
||||||
*/
|
*/
|
||||||
DomainCategoryResult getCategory(String domain, String host);
|
DomainCategoryResult getCategory(String domain, String host);
|
||||||
|
|
||||||
void initialize() throws IngestModule.IngestModuleException;
|
/**
|
||||||
|
* Initializes this provider in preparation to handle 'getCategory' requests
|
||||||
|
* during ingest. Conceivably, the same instance of this class may have this
|
||||||
|
* called multiple times and should handle that possibility gracefully.
|
||||||
|
*
|
||||||
|
* @throws IngestModule.IngestModuleException
|
||||||
|
*/
|
||||||
|
default void initialize() throws IngestModule.IngestModuleException {
|
||||||
|
}
|
||||||
}
|
}
|
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Autopsy Forensic Browser
|
||||||
|
*
|
||||||
|
* Copyright 2020 Basis Technology Corp.
|
||||||
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.sleuthkit.autopsy.url.analytics;
|
||||||
|
|
||||||
|
import com.google.common.annotations.Beta;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The result of finding a match for the host or domain provided as an argument.
|
||||||
|
*/
|
||||||
|
@Beta
|
||||||
|
public interface DomainCategoryResult {
|
||||||
|
/**
|
||||||
|
* @return The portion of the suffix from the host or domain that was a
|
||||||
|
* match (i.e. 'mail.google.com' or 'hotmail.com').
|
||||||
|
*/
|
||||||
|
String getHostSuffix();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The category (i.e. 'Web Email').
|
||||||
|
*/
|
||||||
|
String getCategory();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return In the event that there would be different matches for additional
|
||||||
|
* prefixes, this can return true. For instance, if there was an entry for
|
||||||
|
* 'mail.google.com' and 'chatenabled.mail.google.com', a search for
|
||||||
|
* 'mail.google.com' would return the host suffix: 'mail.google.com' and
|
||||||
|
* 'true' for hasMorePrefixes since an additional category could be added
|
||||||
|
* for the 'chatenabled' prefix.
|
||||||
|
*/
|
||||||
|
default boolean hasMorePrefixes() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
@ -16,18 +16,20 @@
|
|||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.recentactivity;
|
package org.sleuthkit.autopsy.url.analytics;
|
||||||
|
|
||||||
|
import com.google.common.annotations.Beta;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.sleuthkit.autopsy.recentactivity.DomainCategoryProvider.DomainCategoryResult;
|
import org.sleuthkit.autopsy.url.analytics.Trie.TrieResult;
|
||||||
import org.sleuthkit.autopsy.recentactivity.Trie.TrieResult;
|
|
||||||
|
|
||||||
|
@Beta
|
||||||
public class DomainSuffixTrie {
|
public class DomainSuffixTrie {
|
||||||
|
|
||||||
private static Iterable<String> getSuffixIter(String host) {
|
private static Iterable<String> getSuffixIter(String host) {
|
||||||
// parse the tokens splitting on delimiter
|
// parse the tokens splitting on delimiter
|
||||||
List<String> tokens = Stream.of(host.toLowerCase().split(DELIMITER))
|
List<String> tokens = Stream.of(host.toLowerCase().split(DELIMITER))
|
||||||
@ -44,24 +46,32 @@ public class DomainSuffixTrie {
|
|||||||
// delimiter when used with regex for domains
|
// delimiter when used with regex for domains
|
||||||
private static final String DELIMITER = "\\" + JOINER;
|
private static final String DELIMITER = "\\" + JOINER;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private final Trie<String, String> trie = new Trie<>();
|
private final Trie<String, String> trie = new Trie<>();
|
||||||
|
|
||||||
void add(String suffix, String leaf) {
|
/**
|
||||||
|
*
|
||||||
|
* @param suffix
|
||||||
|
* @param leaf
|
||||||
|
*/
|
||||||
|
public void add(String suffix, String leaf) {
|
||||||
this.trie.add(getSuffixIter(suffix), leaf);
|
this.trie.add(getSuffixIter(suffix), leaf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines if the host is a known type of host. If so, returns the
|
* Determines if the host is a known type of host. If so, returns the
|
||||||
* portion of the host suffix that signifies the domain type (i.e.
|
* portion of the host suffix that signifies the domain type (i.e.
|
||||||
* "hotmail.com" or "mail.google.com") and the domain type.
|
* "hotmail.com" or "mail.google.com") and the domain type. Also returned in
|
||||||
|
* the DomainCategoryResult is whether or not any children of the found node
|
||||||
|
* in the trie and consequently, whether or not
|
||||||
*
|
*
|
||||||
* @param host The host.
|
* @param host The host.
|
||||||
* @return A pair of the host suffix and domain type for that suffix if
|
* @return The DomainCategoryResult if a portion of the suffix was found
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* A pair of the host suffix and domain type for that suffix if
|
||||||
* found. Otherwise, returns null.
|
* found. Otherwise, returns null.
|
||||||
*/
|
*/
|
||||||
DomainCategoryResult findHostCategory(String host) {
|
public DomainCategoryResult findHostCategory(String host) {
|
||||||
// if no host, return none.
|
// if no host, return none.
|
||||||
if (StringUtils.isBlank(host)) {
|
if (StringUtils.isBlank(host)) {
|
||||||
return null;
|
return null;
|
||||||
@ -71,6 +81,6 @@ public class DomainSuffixTrie {
|
|||||||
List<String> keys = new ArrayList<>(result.getKeys());
|
List<String> keys = new ArrayList<>(result.getKeys());
|
||||||
Collections.reverse(keys);
|
Collections.reverse(keys);
|
||||||
String suffix = String.join(JOINER, keys);
|
String suffix = String.join(JOINER, keys);
|
||||||
return new DomainCategoryResult(suffix, result.getValue(), result.hasChildren());
|
return new DefaultDomainCategoryResult(suffix, result.getValue(), result.hasChildren());
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -3,7 +3,7 @@
|
|||||||
* To change this template file, choose Tools | Templates
|
* To change this template file, choose Tools | Templates
|
||||||
* and open the template in the editor.
|
* and open the template in the editor.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.recentactivity;
|
package org.sleuthkit.autopsy.url.analytics;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -11,15 +11,15 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.apache.commons.collections4.MapUtils;
|
import org.apache.commons.collections4.MapUtils;
|
||||||
|
|
||||||
public class Trie<K, V> {
|
class Trie<K, V> {
|
||||||
|
|
||||||
private class Node<K, V> {
|
private class Node<K, V> {
|
||||||
|
|
||||||
private final Map<K, Node> children = new HashMap<>();
|
private final Map<K, Node<K, V>> children = new HashMap<>();
|
||||||
private V leafValue = null;
|
private V leafValue = null;
|
||||||
|
|
||||||
Node getOrAddChild(K childKey) {
|
Node<K, V> getOrAddChild(K childKey) {
|
||||||
Node child = children.get(childKey);
|
Node<K, V> child = children.get(childKey);
|
||||||
if (child == null) {
|
if (child == null) {
|
||||||
child = new Node();
|
child = new Node();
|
||||||
children.put(childKey, child);
|
children.put(childKey, child);
|
||||||
@ -28,7 +28,7 @@ public class Trie<K, V> {
|
|||||||
return child;
|
return child;
|
||||||
}
|
}
|
||||||
|
|
||||||
Node getChild(K childKey) {
|
Node<K, V> getChild(K childKey) {
|
||||||
return children.get(childKey);
|
return children.get(childKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -42,35 +42,36 @@ public class Trie<K, V> {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class TrieResult<K, V> {
|
static class TrieResult<K, V> {
|
||||||
|
|
||||||
private final V value;
|
private final V value;
|
||||||
private final List<K> keys;
|
private final List<K> keys;
|
||||||
private final boolean hasChildren;
|
private final boolean hasChildren;
|
||||||
|
|
||||||
public TrieResult(V value, List<K> keys, boolean hasChildren) {
|
TrieResult(V value, List<K> keys, boolean hasChildren) {
|
||||||
this.value = value;
|
this.value = value;
|
||||||
this.keys = keys;
|
this.keys = keys;
|
||||||
this.hasChildren = hasChildren;
|
this.hasChildren = hasChildren;
|
||||||
}
|
}
|
||||||
|
|
||||||
public V getValue() {
|
V getValue() {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<K> getKeys() {
|
List<K> getKeys() {
|
||||||
return keys;
|
return keys;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasChildren() {
|
boolean hasChildren() {
|
||||||
return hasChildren;
|
return hasChildren;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Node<K, V> root = new Node<>();
|
private Node<K, V> root = new Node<>();
|
||||||
|
|
||||||
public void add(Iterable<K> keyTokens, V leafValue) {
|
void add(Iterable<K> keyTokens, V leafValue) {
|
||||||
Node node = root;
|
Node<K, V> node = root;
|
||||||
for (K key : keyTokens) {
|
for (K key : keyTokens) {
|
||||||
node = node.getOrAddChild(key);
|
node = node.getOrAddChild(key);
|
||||||
}
|
}
|
||||||
@ -78,7 +79,7 @@ public class Trie<K, V> {
|
|||||||
node.setLeafValue(leafValue);
|
node.setLeafValue(leafValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
public V getExact(Iterable<K> keys) {
|
V getExact(Iterable<K> keys) {
|
||||||
Node<K, V> node = root;
|
Node<K, V> node = root;
|
||||||
for (K key : keys) {
|
for (K key : keys) {
|
||||||
node = node.getChild(key);
|
node = node.getChild(key);
|
||||||
@ -90,7 +91,7 @@ public class Trie<K, V> {
|
|||||||
return node.getLeafValue();
|
return node.getLeafValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
public TrieResult<K, V> getDeepest(Iterable<K> keys) {
|
TrieResult<K, V> getDeepest(Iterable<K> keys) {
|
||||||
Node<K, V> node = root;
|
Node<K, V> node = root;
|
||||||
List<K> visited = new ArrayList<>();
|
List<K> visited = new ArrayList<>();
|
||||||
TrieResult<K, V> bestMatch = null;
|
TrieResult<K, V> bestMatch = null;
|
||||||
@ -110,7 +111,7 @@ public class Trie<K, V> {
|
|||||||
return bestMatch;
|
return bestMatch;
|
||||||
}
|
}
|
||||||
|
|
||||||
public TrieResult<K, V> getFirst(Iterable<K> keys) {
|
TrieResult<K, V> getFirst(Iterable<K> keys) {
|
||||||
Node<K, V> node = root;
|
Node<K, V> node = root;
|
||||||
List<K> visited = new ArrayList<>();
|
List<K> visited = new ArrayList<>();
|
||||||
for (K key : keys) {
|
for (K key : keys) {
|
@ -18,6 +18,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.recentactivity;
|
package org.sleuthkit.autopsy.recentactivity;
|
||||||
|
|
||||||
|
import org.sleuthkit.autopsy.url.analytics.DomainSuffixTrie;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
@ -28,6 +29,8 @@ import org.apache.commons.lang.StringUtils;
|
|||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModule;
|
import org.sleuthkit.autopsy.ingest.IngestModule;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
||||||
|
import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider;
|
||||||
|
import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The default domain category provider that makes use of the default csv
|
* The default domain category provider that makes use of the default csv
|
||||||
@ -40,7 +43,6 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
|||||||
private static final String DOMAIN_TYPE_CSV = "default_domain_categories.csv"; //NON-NLS
|
private static final String DOMAIN_TYPE_CSV = "default_domain_categories.csv"; //NON-NLS
|
||||||
private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName());
|
private static final Logger logger = Logger.getLogger(DefaultDomainCategoryProvider.class.getName());
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads the trie of suffixes from the csv resource file.
|
* Loads the trie of suffixes from the csv resource file.
|
||||||
*
|
*
|
||||||
@ -99,19 +101,21 @@ class DefaultDomainCategoryProvider implements DomainCategoryProvider {
|
|||||||
logger.log(Level.WARNING, String.format("Could not determine host suffix for this line: \"%s\" at line %d", line, lineNumber));
|
logger.log(Level.WARNING, String.format("Could not determine host suffix for this line: \"%s\" at line %d", line, lineNumber));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
trie.add(hostSuffix, domainTypeStr);
|
trie.add(hostSuffix, domainTypeStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// the root node for the trie containing suffixes for domain categories.
|
// the root node for the trie containing suffixes for domain categories.
|
||||||
private DomainSuffixTrie trie = null;
|
private DomainSuffixTrie trie = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize() throws IngestModuleException {
|
public void initialize() throws IngestModuleException {
|
||||||
try {
|
if (this.trie == null) {
|
||||||
this.trie = loadTrie();
|
try {
|
||||||
} catch (IOException ex) {
|
this.trie = loadTrie();
|
||||||
throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex);
|
} catch (IOException ex) {
|
||||||
|
throw new IngestModule.IngestModuleException("Unable to load domain type csv for domain category analysis", ex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,20 +22,25 @@ import java.net.MalformedURLException;
|
|||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.http.conn.util.DomainType;
|
import org.openide.util.Lookup;
|
||||||
import org.openide.util.NbBundle.Messages;
|
import org.openide.util.NbBundle.Messages;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
|
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
|
||||||
import org.sleuthkit.autopsy.ingest.DataSourceIngestModuleProgress;
|
import org.sleuthkit.autopsy.ingest.DataSourceIngestModuleProgress;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModule;
|
import org.sleuthkit.autopsy.ingest.IngestModule;
|
||||||
import org.sleuthkit.autopsy.recentactivity.DomainCategoryProvider.DomainCategoryResult;
|
import org.sleuthkit.autopsy.url.analytics.DomainCategoryProvider;
|
||||||
|
import org.sleuthkit.autopsy.url.analytics.DomainCategoryResult;
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
||||||
@ -45,12 +50,12 @@ import org.sleuthkit.datamodel.Content;
|
|||||||
import org.sleuthkit.datamodel.TskCoreException;
|
import org.sleuthkit.datamodel.TskCoreException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Analyzes a URL to determine if the url host is one of a certain kind of category
|
* Analyzes a URL to determine if the url host is one of a certain kind of
|
||||||
* (i.e. webmail, disposable mail). If found, a web category artifact is
|
* category (i.e. webmail, disposable mail). If found, a web category artifact
|
||||||
* created.
|
* is created.
|
||||||
*
|
*
|
||||||
* CSV entries describing these domain types are compiled from sources.
|
* CSV entries describing these domain types are compiled from sources. webmail:
|
||||||
* webmail: https://github.com/mailcheck/mailcheck/wiki/List-of-Popular-Domains
|
* https://github.com/mailcheck/mailcheck/wiki/List-of-Popular-Domains
|
||||||
* disposable mail: https://www.npmjs.com/package/disposable-email-domains
|
* disposable mail: https://www.npmjs.com/package/disposable-email-domains
|
||||||
*/
|
*/
|
||||||
@Messages({
|
@Messages({
|
||||||
@ -60,7 +65,6 @@ import org.sleuthkit.datamodel.TskCoreException;
|
|||||||
})
|
})
|
||||||
class DomainCategorizer extends Extract {
|
class DomainCategorizer extends Extract {
|
||||||
|
|
||||||
|
|
||||||
// The url regex is based on the regex provided in https://tools.ietf.org/html/rfc3986#appendix-B
|
// The url regex is based on the regex provided in https://tools.ietf.org/html/rfc3986#appendix-B
|
||||||
// but expanded to be a little more flexible, and also properly parses user info and port in a url
|
// but expanded to be a little more flexible, and also properly parses user info and port in a url
|
||||||
// this item has optional colon since some urls were coming through without the colon
|
// this item has optional colon since some urls were coming through without the colon
|
||||||
@ -80,6 +84,7 @@ class DomainCategorizer extends Extract {
|
|||||||
|
|
||||||
private Content dataSource;
|
private Content dataSource;
|
||||||
private IngestJobContext context;
|
private IngestJobContext context;
|
||||||
|
private List<DomainCategoryProvider> domainProviders = Collections.emptyList();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main constructor.
|
* Main constructor.
|
||||||
@ -117,8 +122,20 @@ class DomainCategorizer extends Extract {
|
|||||||
|
|
||||||
return host;
|
return host;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private DomainCategoryResult findCategory(String domain, String host) {
|
||||||
|
List<DomainCategoryProvider> safeProviders = domainProviders == null ? Collections.emptyList() : domainProviders;
|
||||||
|
for (DomainCategoryProvider provider : safeProviders) {
|
||||||
|
DomainCategoryResult result = provider.getCategory(domain, host);
|
||||||
|
if (result != null) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Goes through web history artifacts and attempts to determine any hosts of
|
* Goes through web history artifacts and attempts to determine any hosts of
|
||||||
* a domain type. If any are found, a TSK_WEB_CATEGORIZATION artifact is
|
* a domain type. If any are found, a TSK_WEB_CATEGORIZATION artifact is
|
||||||
@ -160,15 +177,21 @@ class DomainCategorizer extends Extract {
|
|||||||
|
|
||||||
// atempt to get the host from the url provided.
|
// atempt to get the host from the url provided.
|
||||||
String host = getHost(urlString);
|
String host = getHost(urlString);
|
||||||
if (StringUtils.isBlank(host)) {
|
|
||||||
|
// get the url string from the artifact
|
||||||
|
BlackboardAttribute domainAttr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN));
|
||||||
|
String domainString = domainAttr.getValueString();
|
||||||
|
|
||||||
|
// make sure we have at least one of host or domain
|
||||||
|
if (StringUtils.isBlank(host) && StringUtils.isBlank(domainString)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if we reached this point, we are at least analyzing this item
|
// if we reached this point, we are at least analyzing this item
|
||||||
artifactsAnalyzed++;
|
artifactsAnalyzed++;
|
||||||
|
|
||||||
// attempt to get the domain type for the host using the suffix trie
|
// attempt to get the domain type for the host using the suffix trie
|
||||||
DomainCategoryResult domainEntryFound = findHostSuffix(host);
|
DomainCategoryResult domainEntryFound = findCategory(host, domainString);
|
||||||
if (domainEntryFound == null) {
|
if (domainEntryFound == null) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -216,9 +239,36 @@ class DomainCategorizer extends Extract {
|
|||||||
this.findDomainTypes();
|
this.findDomainTypes();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final Comparator<DomainCategoryProvider> PROVIDER_COMPARATOR
|
||||||
|
= (a, b) -> {
|
||||||
|
// if one item is the DefaultDomainCategoryProvider, and one is it, compare based on that.
|
||||||
|
int isDefaultCompare = Integer.compare(
|
||||||
|
a instanceof DefaultDomainCategoryProvider ? 0 : 1,
|
||||||
|
b instanceof DefaultDomainCategoryProvider ? 0 : 1);
|
||||||
|
|
||||||
|
if (isDefaultCompare != 0) {
|
||||||
|
return isDefaultCompare;
|
||||||
|
}
|
||||||
|
|
||||||
|
// otherwise, sort by the name of the fully qualified class for deterministic results.
|
||||||
|
return a.getClass().getName().compareToIgnoreCase(b.getClass().getName());
|
||||||
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
void configExtractor() throws IngestModule.IngestModuleException {
|
void configExtractor() throws IngestModule.IngestModuleException {
|
||||||
// TODO lookup needs to go here
|
List<DomainCategoryProvider> foundProviders
|
||||||
|
= Lookup.getDefault().lookupAll(DomainCategoryProvider.class).stream()
|
||||||
|
.filter(provider -> provider != null)
|
||||||
|
.sorted(PROVIDER_COMPARATOR)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
for (DomainCategoryProvider provider : foundProviders) {
|
||||||
|
provider.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
|
this.domainProviders = foundProviders == null ?
|
||||||
|
Collections.emptyList() :
|
||||||
|
foundProviders;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
Loading…
x
Reference in New Issue
Block a user