/* * Autopsy Forensic Browser * * Copyright 2011 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.sleuthkit.autopsy.recentactivity; import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.logging.Level; import javax.swing.JPanel; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.sleuthkit.autopsy.coreutils.PlatformUtil; import org.sleuthkit.autopsy.ingest.IngestImageWorkerController; import org.sleuthkit.autopsy.ingest.IngestModuleImage; import org.sleuthkit.autopsy.ingest.IngestModuleInit; import org.sleuthkit.autopsy.ingest.IngestServices; import org.sleuthkit.autopsy.ingest.ModuleDataEvent; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE; import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; import org.sleuthkit.datamodel.FsContent; import org.sleuthkit.datamodel.Image; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.NodeList; /** * This module attempts to extract web queries from major search engines by * querying the blackboard for web history and bookmark artifacts, and * extracting search text from them. * * * To add search engines, edit SearchEngines.xml under RecentActivity * */ public class SearchEngineURLQueryAnalyzer extends Extract implements IngestModuleImage { private IngestServices services; public static final String MODULE_NAME = "Search Engine URL Query Analyzer"; public final static String MODULE_VERSION = "1.0"; private String args; public static final String XMLFile = "SEUQAMappings.xml"; private static String[] searchEngineNames; private static SearchEngine[] engines; private static Document xmlinput; private static final SearchEngine NullEngine = new SearchEngine("NONE", "NONE", new HashMap()); //hide public constructor to prevent from instantiation by ingest module loader SearchEngineURLQueryAnalyzer() { } private static class SearchEngine { private String _engineName; private String _domainSubstring; private Map _splits; private int _count; SearchEngine(String engineName, String domainSubstring, Map splits){ _engineName = engineName; _domainSubstring = domainSubstring; _splits = splits; _count = 0; } public void increment(){ ++_count; } public String getEngineName(){ return _engineName; } public String getDomainSubstring(){ return _domainSubstring; } public int getTotal(){ return _count; } public Set> getSplits(){ return this._splits.entrySet(); } @Override public String toString(){ String split = " "; for(Map.Entry kvp : getSplits()){ split = split + "[ " + kvp.getKey() + " :: " + kvp.getValue() + " ]" + ", "; } return "Name: " + _engineName + "\n Domain Substring: " + _domainSubstring + "\n count: " + _count + "\n Split Tokens: \n " + split; } } private void createEngines(){ NodeList nlist = xmlinput.getElementsByTagName("SearchEngine"); SearchEngine[] listEngines = new SearchEngine[nlist.getLength()]; for(int i = 0;i < nlist.getLength(); i++){ try{ NamedNodeMap nnm = nlist.item(i).getAttributes(); String EngineName = nnm.getNamedItem("engine").getNodeValue(); String EnginedomainSubstring = nnm.getNamedItem("domainSubstring").getNodeValue(); Map splits = new HashMap(); NodeList listSplits = xmlinput.getElementsByTagName("splitToken"); for(int k = 0; k kvp : eng.getSplits()){ if(url.contains(kvp.getKey())){ x = split2(url, kvp.getValue()); break; } } try { //try to decode the url String decoded = URLDecoder.decode(x, "UTF-8"); return decoded; } catch (UnsupportedEncodingException uee) { //if it fails, return the encoded string logger.log(Level.FINE, "Error during URL decoding ", uee); return x; } } /** * Splits URLs based on a delimeter (key). .contains() and .split() * * @param url The URL to be split * @param kvp the delimeter key value pair used to split the URL into its search, extracted from the url type. * query. * @return The extracted search query * */ private String split2(String url, String value) { String basereturn = "NoQuery"; String v = value; //Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex if(value.contains("\\?")){ v = value.replace("\\?", "?"); } String[] sp = url.split(v); if (sp.length >= 2) { if (sp[sp.length - 1].contains("&")) { basereturn = sp[sp.length - 1].split("&")[0]; } else { basereturn = sp[sp.length - 1]; } } return basereturn; } private void getURLs(Image image, IngestImageWorkerController controller) { int totalQueries = 0; try { //from blackboard_artifacts Collection listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts("WHERE (`artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID() + "' OR `artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + "') "); //List of every 'web_history' and 'bookmark' artifact logger.info("Processing " + listArtifacts.size() + " blackboard artifacts."); getAll: for (BlackboardArtifact artifact : listArtifacts) { //initializing default attributes String query = ""; String searchEngineDomain = ""; String browser = ""; long last_accessed = -1; //from tsk_files FsContent fs = this.extractFiles(image, "select * from tsk_files where `obj_id` = '" + artifact.getObjectID() + "'").get(0); //associated file SearchEngine se = NullEngine; //from blackboard_attributes Collection listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes("Where `artifact_id` = " + artifact.getArtifactID()); getAttributes: for (BlackboardAttribute attribute : listAttributes) { if (controller.isCancelled()) { break getAll; //User cancled the process. } if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) { final String urlString = attribute.getValueString(); se = getSearchEngine(urlString); if (!se.equals(NullEngine)) { query = extractSearchEngineQuery(attribute.getValueString()); if (query.equals("NoQuery") || query.equals("")) { //False positive match, artifact was not a query. break getAttributes; } } else if (se.equals(NullEngine)) { break getAttributes; //could not determine type. Will move onto next artifact } } else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) { browser = attribute.getValueString(); } else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) { searchEngineDomain = attribute.getValueString(); } else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID()) { last_accessed = attribute.getValueLong(); } } if (!se.equals(NullEngine) && !query.equals("NoQuery") && !query.equals("")) { try { Collection bbattributes = new ArrayList(); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID(), MODULE_NAME, searchEngineDomain)); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_TEXT.getTypeID(), MODULE_NAME, query)); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID(), MODULE_NAME, browser)); bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DATETIME_ACCESSED.getTypeID(), MODULE_NAME, last_accessed)); this.addArtifact(ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY, fs, bbattributes); se.increment(); ++totalQueries; } catch (Exception e) { logger.log(Level.WARNING, "Error during add artifact.", e); this.addErrorMessage(this.getName() + ": Error while adding artifact"); } services.fireModuleDataEvent(new ModuleDataEvent("RecentActivity", BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY)); } } } catch (Exception e) { logger.log(Level.WARNING, "Encountered error retrieving artifacts: ", e); } finally { if (controller.isCancelled()) { logger.info("Operation terminated by user."); } logger.info("Extracted " + totalQueries + " queries from the blackboard"); } } private String getTotals() { String total = ""; for (SearchEngine se : engines) { total+= se.getEngineName() + " : "+ se.getTotal() + "\n"; } return total; } @Override public void process(Image image, IngestImageWorkerController controller) { this.getURLs(image, controller); logger.info("Search Engine stats: \n" + getTotals()); } @Override public void init(IngestModuleInit initContext) { try{ services = IngestServices.getDefault(); if(PlatformUtil.extractResourceToUserDir(SearchEngineURLQueryAnalyzer.class, XMLFile)){ init2(); } else{ logger.warning("Unable to find " + XMLFile); } } catch(IOException e){ logger.log(Level.WARNING, "Unable to find " + XMLFile , e); } } private void init2(){ try{ String path = PlatformUtil.getUserDirectory().getAbsolutePath() + File.separator + XMLFile; File f = new File(path); System.out.println("Load successful"); DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document xml = db.parse(f); xmlinput = xml; try{ createEngines(); getSearchEngineNames(); } catch(Exception e){ logger.log(Level.WARNING, "Unable to create Search Engines!", e); } } catch(Exception e){ logger.log(Level.WARNING, "Was not able to load SEUQAMappings.xml", e); } } @Override public void complete() { logger.info("running complete()"); } @Override public void stop() { logger.info("running stop()"); } @Override public String getName() { return MODULE_NAME; } @Override public String getDescription() { String total = ""; for(String name : searchEngineNames){ total += name + "\n"; } return "Extracts search queries on the following search engines: \n" + total; } @Override public String getVersion() { return MODULE_VERSION; } @Override public String getArguments() { return args; } @Override public void setArguments(String args) { this.args = args; } @Override public ModuleType getType() { return ModuleType.Image; } @Override public boolean hasBackgroundJobsRunning() { return false; } @Override public boolean hasSimpleConfiguration() { return false; } @Override public boolean hasAdvancedConfiguration() { return false; } @Override public void saveSimpleConfiguration() { } @Override public void saveAdvancedConfiguration() { } @Override public JPanel getSimpleConfiguration() { return null; } @Override public JPanel getAdvancedConfiguration() { return null; } }