Merge remote-tracking branch 'upstream/develop' into call_log_locations

This commit is contained in:
jmillman 2014-08-19 14:31:48 -04:00
commit 0d0a636f15
3 changed files with 134 additions and 105 deletions

View File

@ -24,9 +24,7 @@ import java.io.UnsupportedEncodingException;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level; import java.util.logging.Level;
import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory;
@ -45,7 +43,7 @@ import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.TskException; import org.sleuthkit.datamodel.TskCoreException;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap; import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
@ -65,13 +63,8 @@ class SearchEngineURLQueryAnalyzer extends Extract {
private static final Logger logger = Logger.getLogger(SearchEngineURLQueryAnalyzer.class.getName()); private static final Logger logger = Logger.getLogger(SearchEngineURLQueryAnalyzer.class.getName());
private static final String XMLFILE = "SEUQAMappings.xml"; //NON-NLS private static final String XMLFILE = "SEUQAMappings.xml"; //NON-NLS
private static final String XSDFILE = "SearchEngineSchema.xsd"; //NON-NLS private static final String XSDFILE = "SearchEngineSchema.xsd"; //NON-NLS
private static String[] searchEngineNames;
private static SearchEngineURLQueryAnalyzer.SearchEngine[] engines; private static SearchEngineURLQueryAnalyzer.SearchEngine[] engines;
private static Document xmlinput;
private static final SearchEngineURLQueryAnalyzer.SearchEngine NullEngine = new SearchEngineURLQueryAnalyzer.SearchEngine(
NbBundle.getMessage(SearchEngineURLQueryAnalyzer.class, "SearchEngineURLQueryAnalyzer.engineName.none"),
NbBundle.getMessage(SearchEngineURLQueryAnalyzer.class, "SearchEngineURLQueryAnalyzer.domainSubStr.none"),
new HashMap<String,String>());
private Content dataSource; private Content dataSource;
private IngestJobContext context; private IngestJobContext context;
@ -79,52 +72,100 @@ class SearchEngineURLQueryAnalyzer extends Extract {
moduleName = NbBundle.getMessage(ExtractIE.class, "SearchEngineURLQueryAnalyzer.moduleName.text"); moduleName = NbBundle.getMessage(ExtractIE.class, "SearchEngineURLQueryAnalyzer.moduleName.text");
} }
/**
* Stores the regular expression and non-reg exp pair of keys.
* Key in the case of "?q=foo" would be "?q=".
*/
private static class KeyPair {
private final String key;
private final String keyRegExp;
KeyPair (String key, String keyRegExp) {
this.key = key;
this.keyRegExp = keyRegExp;
}
String getKey() {
return key;
}
String getKeyRegExp() {
return keyRegExp;
}
}
private static class SearchEngine { private static class SearchEngine {
private String _engineName; private final String engineName;
private String _domainSubstring; private final String domainSubstring;
private Map<String, String> _splits; private final List<KeyPair> keyPairs;
private int _count; private int count;
SearchEngine(String engineName, String domainSubstring, Map<String, String> splits) { SearchEngine(String engineName, String domainSubstring, List<KeyPair> keyPairs) {
_engineName = engineName; this.engineName = engineName;
_domainSubstring = domainSubstring; this.domainSubstring = domainSubstring;
_splits = splits; this.keyPairs = keyPairs;
_count = 0; count = 0;
} }
void increment() { void increment() {
++_count; ++count;
} }
String getEngineName() { String getEngineName() {
return _engineName; return engineName;
} }
String getDomainSubstring() { String getDomainSubstring() {
return _domainSubstring; return domainSubstring;
} }
int getTotal() { int getTotal() {
return _count; return count;
} }
Set<Map.Entry<String, String>> getSplits() { /**
return this._splits.entrySet(); * Get the key values used in the URL to denote the search term
* @return
*/
List<KeyPair> getKeys() {
return this.keyPairs;
} }
@Override @Override
public String toString() { public String toString() {
String split = " "; String split = " ";
for (Map.Entry<String, String> kvp : getSplits()) { for (KeyPair kp : keyPairs) {
split = split + "[ " + kvp.getKey() + " :: " + kvp.getValue() + " ]" + ", "; split = split + "[ " + kp.getKey() + " :: " + kp.getKeyRegExp() + " ]" + ", ";
} }
return NbBundle.getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.toString", return NbBundle.getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.toString",
_engineName, _domainSubstring, _count, split); engineName, domainSubstring, count, split);
} }
} }
private void createEngines() { private void loadConfigFile() throws IngestModuleException {
Document xmlinput;
try {
String path = PlatformUtil.getUserConfigDirectory() + File.separator + XMLFILE;
File f = new File(path);
logger.log(Level.INFO, "Load successful"); //NON-NLS
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
xmlinput = db.parse(f);
if (!XMLUtil.xmlIsValid(xmlinput, SearchEngineURLQueryAnalyzer.class, XSDFILE)) {
logger.log(Level.WARNING, "Error loading Search Engines: could not validate against [" + XSDFILE + "], results may not be accurate."); //NON-NLS
}
} catch (IOException e) {
throw new IngestModuleException("Was not able to load SEUQAMappings.xml: " + e.getLocalizedMessage()); //NON-NLS
} catch (ParserConfigurationException pce) {
throw new IngestModuleException("Unable to build XML parser: " + pce.getLocalizedMessage()); //NON-NLS
} catch (SAXException sxe) {
throw new IngestModuleException("Unable to parse XML file: " + sxe.getLocalizedMessage()); //NON-NLS
}
NodeList nlist = xmlinput.getElementsByTagName("SearchEngine"); //NON-NLS NodeList nlist = xmlinput.getElementsByTagName("SearchEngine"); //NON-NLS
SearchEngineURLQueryAnalyzer.SearchEngine[] listEngines = new SearchEngineURLQueryAnalyzer.SearchEngine[nlist.getLength()]; SearchEngineURLQueryAnalyzer.SearchEngine[] listEngines = new SearchEngineURLQueryAnalyzer.SearchEngine[nlist.getLength()];
for (int i = 0; i < nlist.getLength(); i++) { for (int i = 0; i < nlist.getLength(); i++) {
@ -132,16 +173,17 @@ class SearchEngineURLQueryAnalyzer extends Extract {
String EngineName = nnm.getNamedItem("engine").getNodeValue(); //NON-NLS String EngineName = nnm.getNamedItem("engine").getNodeValue(); //NON-NLS
String EnginedomainSubstring = nnm.getNamedItem("domainSubstring").getNodeValue(); //NON-NLS String EnginedomainSubstring = nnm.getNamedItem("domainSubstring").getNodeValue(); //NON-NLS
Map<String, String> splits = new HashMap<>(); List<KeyPair> keys = new ArrayList<>();
NodeList listSplits = xmlinput.getElementsByTagName("splitToken"); //NON-NLS NodeList listSplits = xmlinput.getElementsByTagName("splitToken"); //NON-NLS
for (int k = 0; k < listSplits.getLength(); k++) { for (int k = 0; k < listSplits.getLength(); k++) {
if (listSplits.item(k).getParentNode().getAttributes().getNamedItem("engine").getNodeValue().equals(EngineName)) { //NON-NLS if (listSplits.item(k).getParentNode().getAttributes().getNamedItem("engine").getNodeValue().equals(EngineName)) { //NON-NLS
splits.put(listSplits.item(k).getAttributes().getNamedItem("plainToken").getNodeValue(), listSplits.item(k).getAttributes().getNamedItem("regexToken").getNodeValue()); //NON-NLS keys.add( new KeyPair(listSplits.item(k).getAttributes().getNamedItem("plainToken").getNodeValue(), listSplits.item(k).getAttributes().getNamedItem("regexToken").getNodeValue())); //NON-NLS
} }
} }
SearchEngineURLQueryAnalyzer.SearchEngine Se = new SearchEngineURLQueryAnalyzer.SearchEngine(EngineName, EnginedomainSubstring, splits); SearchEngineURLQueryAnalyzer.SearchEngine Se = new SearchEngineURLQueryAnalyzer.SearchEngine(EngineName, EnginedomainSubstring, keys);
//System.out.println("Search Engine: " + Se.toString()); //System.out.println("Search Engine: " + Se.toString());
listEngines[i] = Se; listEngines[i] = Se;
} }
@ -153,28 +195,22 @@ class SearchEngineURLQueryAnalyzer extends Extract {
* belongs to. * belongs to.
* *
* @param domain domain as part of the URL * @param domain domain as part of the URL
* @return supported search engine the domain belongs to, if any * @return supported search engine the domain belongs to or null if no match is found
* *
*/ */
private static SearchEngineURLQueryAnalyzer.SearchEngine getSearchEngine(String domain) { private static SearchEngineURLQueryAnalyzer.SearchEngine getSearchEngineFromUrl(String domain) {
if (engines == null) { if (engines == null) {
return SearchEngineURLQueryAnalyzer.NullEngine; return null;
} }
for (int i = 0; i < engines.length; i++) { for (SearchEngine engine : engines) {
if (domain.contains(engines[i].getDomainSubstring())) { if (domain.contains(engine.getDomainSubstring())) {
return engines[i]; return engine;
} }
} }
return SearchEngineURLQueryAnalyzer.NullEngine; return null;
} }
private void getSearchEngineNames() {
String[] listNames = new String[engines.length];
for (int i = 0; i < listNames.length; i++) {
listNames[i] = engines[i]._engineName;
}
searchEngineNames = listNames;
}
/** /**
* Attempts to extract the query from a URL. * Attempts to extract the query from a URL.
@ -182,12 +218,12 @@ class SearchEngineURLQueryAnalyzer extends Extract {
* @param url The URL string to be dissected. * @param url The URL string to be dissected.
* @return The extracted search query. * @return The extracted search query.
*/ */
private String extractSearchEngineQuery(String url) { private String extractSearchEngineQuery(SearchEngineURLQueryAnalyzer.SearchEngine eng, String url) {
String x = "NoQuery"; //NON-NLS String x = ""; //NON-NLS
SearchEngineURLQueryAnalyzer.SearchEngine eng = getSearchEngine(url);
for (Map.Entry<String, String> kvp : eng.getSplits()) { for (KeyPair kp : eng.getKeys()) {
if (url.contains(kvp.getKey())) { if (url.contains(kp.getKey())) {
x = split2(url, kvp.getValue()); x = getValue(url, kp.getKeyRegExp());
break; break;
} }
} }
@ -204,38 +240,48 @@ class SearchEngineURLQueryAnalyzer extends Extract {
* Splits URLs based on a delimeter (key). .contains() and .split() * Splits URLs based on a delimeter (key). .contains() and .split()
* *
* @param url The URL to be split * @param url The URL to be split
* @param value the delimeter value used to split the URL into its search * @param regExpKey the delimeter value used to split the URL into its search
* token, extracted from the xml. * token, extracted from the xml.
* @return The extracted search query * @return The extracted search query
* *
*/ */
private String split2(String url, String value) { private String getValue(String url, String regExpKey) {
String basereturn = "NoQuery"; //NON-NLS /* NOTE: This doesn't seem like the most wonderful way to do this, but we have data
String v = value; * that has a bunch of bogus URLs. Such as:
* - Multiple google "q=" terms, including one after a "#" tag. Google used the last one
* - Search/query part of the URL starting with a '#'.
* Attemps at more formal approaches of splitting on the "?" and then on "&" resulting in missing things.
*/
String value = ""; //NON-NLS
String v = regExpKey;
//Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex //Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex
if (value.contains("\\?")) { if (regExpKey.contains("\\?")) {
v = value.replace("\\?", "?"); v = regExpKey.replace("\\?", "?");
} }
String[] sp = url.split(v); String[] sp = url.split(v);
if (sp.length >= 2) { if (sp.length >= 2) {
if (sp[sp.length - 1].contains("&")) { if (sp[sp.length - 1].contains("&")) {
basereturn = sp[sp.length - 1].split("&")[0]; value = sp[sp.length - 1].split("&")[0];
} else { } else {
basereturn = sp[sp.length - 1]; value = sp[sp.length - 1];
} }
} }
return basereturn; return value;
} }
private void getURLs() { private void findSearchQueries() {
int totalQueries = 0; int totalQueries = 0;
try { try {
//from blackboard_artifacts //from blackboard_artifacts
Collection<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts("WHERE (`artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID() //NON-NLS Collection<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts("WHERE (`artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID() //NON-NLS
+ "' OR `artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + "') "); //List of every 'web_history' and 'bookmark' artifact NON-NLS + "' OR `artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + "') "); //List of every 'web_history' and 'bookmark' artifact NON-NLS
logger.log(Level.INFO, "Processing {0} blackboard artifacts.", listArtifacts.size()); //NON-NLS logger.log(Level.INFO, "Processing {0} blackboard artifacts.", listArtifacts.size()); //NON-NLS
getAll:
for (BlackboardArtifact artifact : listArtifacts) { for (BlackboardArtifact artifact : listArtifacts) {
if (context.isJobCancelled()) {
break; //User cancled the process.
}
//initializing default attributes //initializing default attributes
String query = ""; String query = "";
String searchEngineDomain = ""; String searchEngineDomain = "";
@ -254,25 +300,21 @@ class SearchEngineURLQueryAnalyzer extends Extract {
continue; continue;
} }
SearchEngineURLQueryAnalyzer.SearchEngine se = NullEngine; SearchEngineURLQueryAnalyzer.SearchEngine se = null;
//from blackboard_attributes //from blackboard_attributes
Collection<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes("Where `artifact_id` = " + artifact.getArtifactID()); //NON-NLS Collection<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes("Where `artifact_id` = " + artifact.getArtifactID()); //NON-NLS
getAttributes:
for (BlackboardAttribute attribute : listAttributes) { for (BlackboardAttribute attribute : listAttributes) {
if (context.isJobCancelled()) {
break getAll; //User cancled the process.
}
if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) { if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) {
final String urlString = attribute.getValueString(); final String urlString = attribute.getValueString();
se = getSearchEngine(urlString); se = getSearchEngineFromUrl(urlString);
if (!se.equals(NullEngine)) { if (se == null)
query = extractSearchEngineQuery(attribute.getValueString()); break;
if (query.equals("NoQuery") || query.equals("")) { //False positive match, artifact was not a query. NON-NLS
break getAttributes; query = extractSearchEngineQuery(se, attribute.getValueString());
} if (query.equals("")) //False positive match, artifact was not a query. NON-NLS
} else if (se.equals(NullEngine)) { break;
break getAttributes; //could not determine type. Will move onto next artifact
}
} else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) { } else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) {
browser = attribute.getValueString(); browser = attribute.getValueString();
} else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) { } else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) {
@ -282,7 +324,7 @@ class SearchEngineURLQueryAnalyzer extends Extract {
} }
} }
if (!se.equals(NullEngine) && !query.equals("NoQuery") && !query.equals("")) { //NON-NLS if (se != null && !query.equals("")) { //NON-NLS
Collection<BlackboardAttribute> bbattributes = new ArrayList<>(); Collection<BlackboardAttribute> bbattributes = new ArrayList<>();
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID(), bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID(),
NbBundle.getMessage(this.getClass(), NbBundle.getMessage(this.getClass(),
@ -301,7 +343,7 @@ class SearchEngineURLQueryAnalyzer extends Extract {
++totalQueries; ++totalQueries;
} }
} }
} catch (TskException e) { } catch (TskCoreException e) {
logger.log(Level.SEVERE, "Encountered error retrieving artifacts for search engine queries", e); //NON-NLS logger.log(Level.SEVERE, "Encountered error retrieving artifacts for search engine queries", e); //NON-NLS
} finally { } finally {
if (context.isJobCancelled()) { if (context.isJobCancelled()) {
@ -329,46 +371,24 @@ class SearchEngineURLQueryAnalyzer extends Extract {
public void process(Content dataSource, IngestJobContext context) { public void process(Content dataSource, IngestJobContext context) {
this.dataSource = dataSource; this.dataSource = dataSource;
this.context = context; this.context = context;
this.getURLs(); this.findSearchQueries();
logger.log(Level.INFO, "Search Engine stats: \n{0}", getTotals()); //NON-NLS logger.log(Level.INFO, "Search Engine stats: \n{0}", getTotals()); //NON-NLS
} }
@Override @Override
void init() throws IngestModuleException { void init() throws IngestModuleException {
try { try {
PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryAnalyzer.class, XMLFILE, false); PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryAnalyzer.class, XMLFILE, true);
init2();
} catch (IOException e) { } catch (IOException e) {
String message = NbBundle String message = NbBundle
.getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.init.exception.msg", XMLFILE); .getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.init.exception.msg", XMLFILE);
logger.log(Level.SEVERE, message, e); logger.log(Level.SEVERE, message, e);
throw new IngestModuleException(message); throw new IngestModuleException(message);
} }
loadConfigFile();
} }
private void init2() {
try {
String path = PlatformUtil.getUserConfigDirectory() + File.separator + XMLFILE;
File f = new File(path);
logger.log(Level.INFO, "Load successful"); //NON-NLS
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document xml = db.parse(f);
xmlinput = xml;
if (!XMLUtil.xmlIsValid(xml, SearchEngineURLQueryAnalyzer.class, XSDFILE)) {
logger.log(Level.WARNING, "Error loading Search Engines: could not validate against [" + XSDFILE + "], results may not be accurate."); //NON-NLS
}
createEngines();
getSearchEngineNames();
} catch (IOException e) {
logger.log(Level.SEVERE, "Was not able to load SEUQAMappings.xml", e); //NON-NLS
} catch (ParserConfigurationException pce) {
logger.log(Level.SEVERE, "Unable to build XML parser", pce); //NON-NLS
} catch (SAXException sxe) {
logger.log(Level.SEVERE, "Unable to parse XML file", sxe); //NON-NLS
}
}
@Override @Override
public void complete() { public void complete() {

View File

@ -1931,6 +1931,11 @@ class OS:
LINUX, MAC, WIN, CYGWIN = range(4) LINUX, MAC, WIN, CYGWIN = range(4)
if __name__ == "__main__": if __name__ == "__main__":
if sys.hexversion < 0x03000000:
print("Python 3 required")
sys.exit(1)
global SYS global SYS
if _platform == "linux" or _platform == "linux2": if _platform == "linux" or _platform == "linux2":
SYS = OS.LINUX SYS = OS.LINUX

View File

@ -398,5 +398,9 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
if sys.hexversion < 0x03000000:
print("Python 3 required")
sys.exit(1)
sys.exit(main()) sys.exit(main())