diff --git a/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java b/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java index af41bc1980..78547f8370 100644 --- a/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java +++ b/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2012-2015 Basis Technology Corp. + * Copyright 2012-2018 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,14 +18,22 @@ */ package org.sleuthkit.autopsy.coreutils; +import java.net.MalformedURLException; +import java.net.URL; import java.net.UnknownHostException; +import java.util.StringTokenizer; public class NetworkUtils { + + private NetworkUtils() { + } /** * Set the host name variable. Sometimes the network can be finicky, so the * answer returned by getHostName() could throw an exception or be null. * Have it read the environment variable if getHostName() is unsuccessful. + * + * @return the local host name */ public static String getLocalHostName() { String hostName = ""; @@ -41,4 +49,78 @@ public class NetworkUtils { } return hostName; } + + /** + * Attempt to manually extract the domain from a URL. + * + * @param url + * @return empty string if no domain could be found + */ + private static String getBaseDomain(String url) { + String host = null; + + //strip protocol + String cleanUrl = url.replaceFirst(".*:\\/\\/", ""); + + //strip after slashes + String dirToks[] = cleanUrl.split("\\/"); + if (dirToks.length > 0) { + host = dirToks[0]; + } else { + host = cleanUrl; + } + + //get the domain part from host (last 2) + StringTokenizer tok = new StringTokenizer(host, "."); + StringBuilder hostB = new StringBuilder(); + int toks = tok.countTokens(); + + for (int count = 0; count < toks; ++count) { + String part = tok.nextToken(); + int diff = toks - count; + if (diff < 3) { + hostB.append(part); + } + if (diff == 2) { + hostB.append("."); + } + } + + + String base = hostB.toString(); + // verify there are no special characters in there + if (base.matches(".*[~`!@#$%^&\\*\\(\\)\\+={}\\[\\];:\\?<>,/ ].*")) { + return ""; + } + return base; + } + + /** + * Attempt to extract the domain from a URL. + * Will start by using the built-in URL class, and if that fails will + * try to extract it manually. + * + * @param urlString The URL to extract the domain from + * @return empty string if no domain name was found + */ + public static String extractDomain(String urlString) { + if (urlString == null) { + return ""; + } + String result = ""; + + try { + URL url = new URL(urlString); + result = url.getHost(); + } catch (MalformedURLException ex) { + //do not log if not a valid URL - we will try to extract it ourselves + } + + //was not a valid URL, try a less picky method + if (result == null || result.trim().isEmpty()) { + return getBaseDomain(urlString); + } + return result; + } + } diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Chrome.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Chrome.java index 0f9a98cd88..69381d8a13 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Chrome.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Chrome.java @@ -39,6 +39,7 @@ import java.io.FileReader; import java.io.IOException; import org.sleuthkit.autopsy.casemodule.services.FileManager; import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.autopsy.coreutils.NetworkUtils; import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.ModuleDataEvent; import org.sleuthkit.datamodel.AbstractFile; @@ -163,7 +164,7 @@ class Chrome extends Extract { NbBundle.getMessage(this.getClass(), "Chrome.moduleName"))); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN, NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"), - (Util.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS + (NetworkUtils.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS BlackboardArtifact bbart = this.addArtifact(ARTIFACT_TYPE.TSK_WEB_HISTORY, historyFile, bbattributes); if (bbart != null) { @@ -286,7 +287,7 @@ class Chrome extends Extract { } else { date = Long.valueOf(0); } - String domain = Util.extractDomain(url); + String domain = NetworkUtils.extractDomain(url); try { BlackboardArtifact bbart = bookmarkFile.newArtifact(ARTIFACT_TYPE.TSK_WEB_BOOKMARK); Collection bbattributes = new ArrayList<>(); @@ -496,7 +497,7 @@ class Chrome extends Extract { //bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_LAST_ACCESSED.getTypeID(), "Recent Activity", "Last Visited", time)); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED, NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"), time)); - String domain = Util.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : ""); //NON-NLS + String domain = NetworkUtils.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : ""); //NON-NLS bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN, NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"), domain)); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME, @@ -590,7 +591,7 @@ class Chrome extends Extract { NbBundle.getMessage(this.getClass(), "Chrome.moduleName"))); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL_DECODED, NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"), - (Util.extractDomain((result.get("origin_url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS + (NetworkUtils.extractDomain((result.get("origin_url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_USER_NAME, NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"), ((result.get("username_value").toString() != null) ? result.get("username_value").toString().replaceAll("'", "''") : ""))); //NON-NLS diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractIE.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractIE.java index 90a0a4d7fa..29844b0827 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractIE.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractIE.java @@ -26,6 +26,7 @@ import java.io.BufferedReader; import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.ExecUtil; +import org.sleuthkit.autopsy.coreutils.NetworkUtils; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -609,6 +610,6 @@ class ExtractIE extends Extract { return null; } - return Util.extractDomain(url); + return NetworkUtils.extractDomain(url); } } diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Firefox.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Firefox.java index d1874d4010..6cb51795f3 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Firefox.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Firefox.java @@ -35,6 +35,7 @@ import java.util.logging.Level; import org.openide.util.NbBundle; import org.sleuthkit.autopsy.casemodule.services.FileManager; import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.autopsy.coreutils.NetworkUtils; import org.sleuthkit.autopsy.datamodel.ContentUtils; import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.IngestServices; @@ -669,6 +670,6 @@ class Firefox extends Extract { return null; } - return Util.extractDomain(url); + return NetworkUtils.extractDomain(url); } } diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Util.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Util.java index 9bff067394..fd9630cebd 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Util.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Util.java @@ -84,83 +84,6 @@ class Util { } } - /** - * - * @param url - * @return empty string if no domain could be found - */ - private static String getBaseDomain(String url) { - String host = null; - - //strip protocol - String cleanUrl = url.replaceFirst(".*:\\/\\/", ""); - - //strip after slashes - String dirToks[] = cleanUrl.split("\\/"); - if (dirToks.length > 0) { - host = dirToks[0]; - } else { - host = cleanUrl; - } - - //get the domain part from host (last 2) - StringTokenizer tok = new StringTokenizer(host, "."); - StringBuilder hostB = new StringBuilder(); - int toks = tok.countTokens(); - - for (int count = 0; count < toks; ++count) { - String part = tok.nextToken(); - int diff = toks - count; - if (diff < 3) { - hostB.append(part); - } - if (diff == 2) { - hostB.append("."); - } - } - - - String base = hostB.toString(); - // verify there are no special characters in there - if (base.matches(".*[~`!@#$%^&\\*\\(\\)\\+={}\\[\\];:\\?<>,/ ].*")) { - return ""; - } - return base; - } - - /** - * - * @param value - * @return empty string if no domain name was found - */ - public static String extractDomain(String value) { - if (value == null) { - return ""; - - } - String result = ""; - // String domainPattern = "(\\w+)\\.(AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CW|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SX|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XXX|YE|YT|ZA|ZM|ZW(co\\.[a-z].))"; - // Pattern p = Pattern.compile(domainPattern,Pattern.CASE_INSENSITIVE); - // Matcher m = p.matcher(value); - // while (m.find()) { - // result = value.substring(m.start(0),m.end(0)); - // } - - try { - URL url = new URL(value); - result = url.getHost(); - } catch (MalformedURLException ex) { - //do not log if not a valid URL, and handle later - //Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex); - } - - //was not a valid URL, try a less picky method - if (result == null || result.trim().isEmpty()) { - return getBaseDomain(value); - } - return result; - } - public static String getFileName(String value) { String filename = ""; String filematch = "^([a-zA-Z]\\:)(\\\\[^\\\\/:*?<>\"|]*(?