Moved extractDomain from RecentActivity into NetworkUtils

This commit is contained in:
Ann Priestman 2018-11-05 10:21:03 -05:00
parent 1c925b65ec
commit e5313083d8
5 changed files with 92 additions and 84 deletions

View File

@ -1,7 +1,7 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2012-2015 Basis Technology Corp.
* Copyright 2012-2018 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
@ -18,14 +18,22 @@
*/
package org.sleuthkit.autopsy.coreutils;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.StringTokenizer;
public class NetworkUtils {
private NetworkUtils() {
}
/**
* Set the host name variable. Sometimes the network can be finicky, so the
* answer returned by getHostName() could throw an exception or be null.
* Have it read the environment variable if getHostName() is unsuccessful.
*
* @return the local host name
*/
public static String getLocalHostName() {
String hostName = "";
@ -41,4 +49,78 @@ public class NetworkUtils {
}
return hostName;
}
/**
* Attempt to manually extract the domain from a URL.
*
* @param url
* @return empty string if no domain could be found
*/
private static String getBaseDomain(String url) {
String host = null;
//strip protocol
String cleanUrl = url.replaceFirst(".*:\\/\\/", "");
//strip after slashes
String dirToks[] = cleanUrl.split("\\/");
if (dirToks.length > 0) {
host = dirToks[0];
} else {
host = cleanUrl;
}
//get the domain part from host (last 2)
StringTokenizer tok = new StringTokenizer(host, ".");
StringBuilder hostB = new StringBuilder();
int toks = tok.countTokens();
for (int count = 0; count < toks; ++count) {
String part = tok.nextToken();
int diff = toks - count;
if (diff < 3) {
hostB.append(part);
}
if (diff == 2) {
hostB.append(".");
}
}
String base = hostB.toString();
// verify there are no special characters in there
if (base.matches(".*[~`!@#$%^&\\*\\(\\)\\+={}\\[\\];:\\?<>,/ ].*")) {
return "";
}
return base;
}
/**
* Attempt to extract the domain from a URL.
* Will start by using the built-in URL class, and if that fails will
* try to extract it manually.
*
* @param urlString The URL to extract the domain from
* @return empty string if no domain name was found
*/
public static String extractDomain(String urlString) {
if (urlString == null) {
return "";
}
String result = "";
try {
URL url = new URL(urlString);
result = url.getHost();
} catch (MalformedURLException ex) {
//do not log if not a valid URL - we will try to extract it ourselves
}
//was not a valid URL, try a less picky method
if (result == null || result.trim().isEmpty()) {
return getBaseDomain(urlString);
}
return result;
}
}

View File

@ -39,6 +39,7 @@ import java.io.FileReader;
import java.io.IOException;
import org.sleuthkit.autopsy.casemodule.services.FileManager;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
import org.sleuthkit.datamodel.AbstractFile;
@ -163,7 +164,7 @@ class Chrome extends Extract {
NbBundle.getMessage(this.getClass(), "Chrome.moduleName")));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN,
NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"),
(Util.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS
(NetworkUtils.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS
BlackboardArtifact bbart = this.addArtifact(ARTIFACT_TYPE.TSK_WEB_HISTORY, historyFile, bbattributes);
if (bbart != null) {
@ -286,7 +287,7 @@ class Chrome extends Extract {
} else {
date = Long.valueOf(0);
}
String domain = Util.extractDomain(url);
String domain = NetworkUtils.extractDomain(url);
try {
BlackboardArtifact bbart = bookmarkFile.newArtifact(ARTIFACT_TYPE.TSK_WEB_BOOKMARK);
Collection<BlackboardAttribute> bbattributes = new ArrayList<>();
@ -496,7 +497,7 @@ class Chrome extends Extract {
//bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_LAST_ACCESSED.getTypeID(), "Recent Activity", "Last Visited", time));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED,
NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"), time));
String domain = Util.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : ""); //NON-NLS
String domain = NetworkUtils.extractDomain((result.get("url").toString() != null) ? result.get("url").toString() : ""); //NON-NLS
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN,
NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"), domain));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME,
@ -590,7 +591,7 @@ class Chrome extends Extract {
NbBundle.getMessage(this.getClass(), "Chrome.moduleName")));
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_URL_DECODED,
NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"),
(Util.extractDomain((result.get("origin_url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS
(NetworkUtils.extractDomain((result.get("origin_url").toString() != null) ? result.get("url").toString() : "")))); //NON-NLS
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_USER_NAME,
NbBundle.getMessage(this.getClass(), "Chrome.parentModuleName"),
((result.get("username_value").toString() != null) ? result.get("username_value").toString().replaceAll("'", "''") : ""))); //NON-NLS

View File

@ -26,6 +26,7 @@ import java.io.BufferedReader;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.ExecUtil;
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
@ -609,6 +610,6 @@ class ExtractIE extends Extract {
return null;
}
return Util.extractDomain(url);
return NetworkUtils.extractDomain(url);
}
}

View File

@ -35,6 +35,7 @@ import java.util.logging.Level;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.services.FileManager;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.NetworkUtils;
import org.sleuthkit.autopsy.datamodel.ContentUtils;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.ingest.IngestServices;
@ -669,6 +670,6 @@ class Firefox extends Extract {
return null;
}
return Util.extractDomain(url);
return NetworkUtils.extractDomain(url);
}
}

View File

@ -84,83 +84,6 @@ class Util {
}
}
/**
*
* @param url
* @return empty string if no domain could be found
*/
private static String getBaseDomain(String url) {
String host = null;
//strip protocol
String cleanUrl = url.replaceFirst(".*:\\/\\/", "");
//strip after slashes
String dirToks[] = cleanUrl.split("\\/");
if (dirToks.length > 0) {
host = dirToks[0];
} else {
host = cleanUrl;
}
//get the domain part from host (last 2)
StringTokenizer tok = new StringTokenizer(host, ".");
StringBuilder hostB = new StringBuilder();
int toks = tok.countTokens();
for (int count = 0; count < toks; ++count) {
String part = tok.nextToken();
int diff = toks - count;
if (diff < 3) {
hostB.append(part);
}
if (diff == 2) {
hostB.append(".");
}
}
String base = hostB.toString();
// verify there are no special characters in there
if (base.matches(".*[~`!@#$%^&\\*\\(\\)\\+={}\\[\\];:\\?<>,/ ].*")) {
return "";
}
return base;
}
/**
*
* @param value
* @return empty string if no domain name was found
*/
public static String extractDomain(String value) {
if (value == null) {
return "";
}
String result = "";
// String domainPattern = "(\\w+)\\.(AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|CO|COM|COOP|CR|CU|CV|CW|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|IN|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NE|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SX|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XXX|YE|YT|ZA|ZM|ZW(co\\.[a-z].))";
// Pattern p = Pattern.compile(domainPattern,Pattern.CASE_INSENSITIVE);
// Matcher m = p.matcher(value);
// while (m.find()) {
// result = value.substring(m.start(0),m.end(0));
// }
try {
URL url = new URL(value);
result = url.getHost();
} catch (MalformedURLException ex) {
//do not log if not a valid URL, and handle later
//Logger.getLogger(Util.class.getName()).log(Level.SEVERE, null, ex);
}
//was not a valid URL, try a less picky method
if (result == null || result.trim().isEmpty()) {
return getBaseDomain(value);
}
return result;
}
public static String getFileName(String value) {
String filename = "";
String filematch = "^([a-zA-Z]\\:)(\\\\[^\\\\/:*?<>\"|]*(?<!\\[ \\]))*(\\.[a-zA-Z]{2,6})$"; //NON-NLS