mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-11 23:46:15 +00:00
444 lines
16 KiB
Java
444 lines
16 KiB
Java
/*
|
|
* Autopsy Forensic Browser
|
|
*
|
|
* Copyright 2011 Basis Technology Corp.
|
|
* Contact: carrier <at> sleuthkit <dot> org
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package org.sleuthkit.autopsy.recentactivity;
|
|
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.net.URLDecoder;
|
|
import java.util.ArrayList;
|
|
import java.util.Collection;
|
|
import java.util.logging.Level;
|
|
import java.util.logging.Logger;
|
|
import javax.swing.JPanel;
|
|
import org.sleuthkit.autopsy.ingest.IngestImageWorkerController;
|
|
import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
|
|
import org.sleuthkit.autopsy.ingest.IngestServiceImage;
|
|
import org.sleuthkit.autopsy.ingest.ServiceDataEvent;
|
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
|
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
|
import org.sleuthkit.datamodel.BlackboardAttribute;
|
|
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
|
|
import org.sleuthkit.datamodel.FsContent;
|
|
import org.sleuthkit.datamodel.Image;
|
|
|
|
/**
|
|
* This module attempts to extract web queries from major search engines by
|
|
* querying the blackboard for web history and bookmark artifacts, and
|
|
* extracting search text from them.
|
|
*
|
|
*
|
|
* Additions to the search engines require editing the following: SearchEngine
|
|
* enum, getSearchEngine(), extractSearchEngineQuery()
|
|
*
|
|
*/
|
|
public class SearchEngineURLQueryAnalyzer extends Extract implements IngestServiceImage {
|
|
|
|
static final String MODULE_NAME = "Search Engine Query Analyzer";
|
|
|
|
/**
|
|
* The record of supported engines*
|
|
*/
|
|
private static enum SearchEngine {
|
|
|
|
NONE("None"),
|
|
Google("Google"),
|
|
Bing("Bing"),
|
|
Yahoo("Yahoo"),
|
|
Baidu("Baidu"),
|
|
Sogou("Sogou"),
|
|
Soso("Soso"),
|
|
Yandex("Yandex"),
|
|
Youdao("Youdao"),
|
|
Biglobe("Biglobe"),
|
|
Linkestan("Linkestan"),
|
|
Parseek("Parseek"),
|
|
Parset("Parset");
|
|
private final String name;
|
|
private int total = 0;
|
|
|
|
SearchEngine(String name) {
|
|
this.name = name;
|
|
}
|
|
|
|
private int getTotal() {
|
|
return total;
|
|
}
|
|
|
|
private void increment() {
|
|
++total;
|
|
}
|
|
|
|
private String getName() {
|
|
return name;
|
|
}
|
|
};
|
|
|
|
SearchEngineURLQueryAnalyzer() {
|
|
}
|
|
|
|
/**
|
|
* Returns which of the supported SearchEngines, if any, the given string
|
|
* belongs to.
|
|
*
|
|
* @param domain domain as part of the URL
|
|
* @return supported search engine the domain belongs to, if any
|
|
*
|
|
*/
|
|
private static SearchEngine getSearchEngine(String domain) {
|
|
if (domain.contains(".com")) {
|
|
String[] d = domain.split(".com");
|
|
if (d.length != 0 && d[0].contains(".baidu")) {
|
|
return SearchEngine.Baidu;
|
|
} else if (d.length != 0 && d.length != 0 && d[0].contains(".bing")) {
|
|
return SearchEngine.Bing;
|
|
} else if (d.length != 0 && d[0].contains(".yahoo")) {
|
|
return SearchEngine.Yahoo;
|
|
} else if (d.length != 0 && d[0].contains(".google")) {
|
|
return SearchEngine.Google;
|
|
} else if (d.length != 0 && d[0].contains(".youdao")) {
|
|
return SearchEngine.Youdao;
|
|
} else if (d.length != 0 && d[0].contains(".soso.com")) {
|
|
return SearchEngine.Soso;
|
|
} else if (d.length != 0 && d[0].contains(".sogou.com")) {
|
|
return SearchEngine.Sogou;
|
|
} else if (d.length != 0 && d[0].contains(".linkestan.com")) {
|
|
return SearchEngine.Linkestan;
|
|
} else if (d.length != 0 && d[0].contains(".parseek.com")) {
|
|
return SearchEngine.Parseek;
|
|
} else if (d.length != 0 && d[0].contains(".parset.com")) {
|
|
return SearchEngine.Parset;
|
|
}
|
|
} else if (domain.contains(".ru")) {
|
|
String[] d = domain.split(".ru");
|
|
if (d[0].contains("yandex")) {
|
|
return SearchEngine.Yandex;
|
|
}
|
|
} else if (domain.contains(".ne.jp")) {
|
|
String[] d = domain.split(".ne.jp");
|
|
if (d[0].contains("biglobe")) {
|
|
return SearchEngine.Biglobe;
|
|
}
|
|
}
|
|
return SearchEngine.NONE;
|
|
}
|
|
|
|
/**
|
|
* Attempts to extract the query from a URL.
|
|
*
|
|
* @param se SearchEngine, used to determine format of search query.
|
|
* @param url The URL string to be dissected.
|
|
* @return The extracted search query.
|
|
*/
|
|
private String extractSearchEngineQuery(SearchEngine se, String url) {
|
|
String x = "";
|
|
|
|
//English Search Engines
|
|
|
|
//google.com
|
|
if (se.equals(SearchEngine.Google)) {
|
|
if (url.contains("?q=")) {
|
|
x = split2(url, "\\?q=");
|
|
} else {
|
|
x = split2(url, "&q=");
|
|
}
|
|
} //yahoo.com
|
|
else if (se.equals(SearchEngine.Yahoo)) {
|
|
x = split2(url, "\\?p=");
|
|
} //bing.com
|
|
else if (se.equals(SearchEngine.Bing)) {
|
|
x = split2(url, "\\?q=");
|
|
} //Chinese Search Engines
|
|
//baidu.com
|
|
else if (se.equals(SearchEngine.Baidu)) {
|
|
if (url.contains("?wd=")) {
|
|
x = split2(url, "\\?wd=");
|
|
} else if (url.contains("?kw=")) {
|
|
x = split2(url, "\\?kw=");
|
|
} else if (url.contains("baidu.com/q?") || url.contains("baidu.com/m?") || url.contains("baidu.com/i?")) {
|
|
x = split2(url, "word=");
|
|
} else if (url.contains("/qw=") || url.contains("?qw=")) {
|
|
x = split2(url, "\\qw=");
|
|
} else if (url.contains("bs=")) {
|
|
x = split2(url, "&bs=");
|
|
}
|
|
} //sogou.com
|
|
else if (se.equals(SearchEngine.Sogou)) {
|
|
x = split2(url, "query=");
|
|
} //Soso.com
|
|
else if (se.equals(SearchEngine.Soso)) {
|
|
if (url.contains("p=S")) {
|
|
x = split2(url, "p=S");
|
|
} else if (url.contains("?w=")) {
|
|
x = split2(url, "\\?w=");
|
|
} else {
|
|
x = split2(url, "&w=");
|
|
}
|
|
|
|
|
|
} //youdao.com
|
|
else if (se.equals(SearchEngine.Youdao)) {
|
|
if (url.contains("search?q=")) {
|
|
x = split2(url, "\\?q=");
|
|
} else if (url.contains("?i=")) {
|
|
x = split2(url, "\\?i=");
|
|
}
|
|
} //Russian Search Engines
|
|
//yandex.ru
|
|
else if (se.equals(SearchEngine.Yandex)) {
|
|
if (url.contains("?text=")) {
|
|
x = split2(url, "\\?text=");
|
|
} else {
|
|
x = split2(url, "&text=");
|
|
}
|
|
} //Japanese Search Engines
|
|
//biglobe.ne.jp
|
|
else if (se.equals(SearchEngine.Biglobe)) {
|
|
if (url.contains("?search=")) {
|
|
x = split2(url, "\\?search=");
|
|
} else if (url.contains("?q=")) {
|
|
x = split2(url, "\\?q=");
|
|
} else if (url.contains("/key/")) {
|
|
x = split2(url, "/key/");
|
|
} else if (url.contains("&q=")) {
|
|
x = split2(url, "&q=");
|
|
}
|
|
} //Persian & Arabic Search Engines
|
|
//Linkestan.com
|
|
else if (se.equals(SearchEngine.Linkestan)) {
|
|
x = split2(url, "\\?psearch=");
|
|
} //Parseek.com
|
|
else if (se.equals(SearchEngine.Parseek)) {
|
|
x = split2(url, "\\?q=");
|
|
} //Parset.com
|
|
else if (se.equals(SearchEngine.Parset)) {
|
|
x = split2(url, "\\?Keyword=");
|
|
}
|
|
|
|
try { //try to decode the url
|
|
String decoded = URLDecoder.decode(x, "UTF-8");
|
|
return decoded;
|
|
} catch (UnsupportedEncodingException uee) { //if it fails, return the encoded string
|
|
logger.info("Error during URL decoding: " + uee);
|
|
return x;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* Splits URLs based on a delimeter (key). .contains() and .split()
|
|
*
|
|
* @param url The URL to be split
|
|
* @param splitkey the delimeter used to split the URL into its search
|
|
* query.
|
|
* @return The extracted search query
|
|
*
|
|
*/
|
|
private String split2(String url, String splitkey) {
|
|
String basereturn = "NULL";
|
|
String splitKeyConverted = splitkey;
|
|
//Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex
|
|
if (splitkey.contains("\\?")) {
|
|
splitKeyConverted = splitkey.replace("\\?", "?"); //Handling java -> regex conversions and viceversa
|
|
}
|
|
if (url.contains(splitKeyConverted)) {
|
|
String[] sp = url.split(splitkey);
|
|
if (sp.length >= 2) {
|
|
if (sp[sp.length - 1].contains("&")) {
|
|
basereturn = sp[sp.length - 1].split("&")[0];
|
|
|
|
} else {
|
|
basereturn = sp[sp.length - 1];
|
|
}
|
|
}
|
|
}
|
|
return basereturn;
|
|
}
|
|
|
|
private void getURLs(Image image, IngestImageWorkerController controller) {
|
|
int totalQueries = 0;
|
|
try {
|
|
//from blackboard_artifacts
|
|
ArrayList<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts("WHERE (`artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID()
|
|
+ "' OR `artifact_type_id` = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + "') "); //List of every 'web_history' and 'bookmark' artifact
|
|
logger.info("Processing " + listArtifacts.size() + " blackboard artifacts.");
|
|
getAll:
|
|
for (BlackboardArtifact artifact : listArtifacts) {
|
|
//initializing default attributes
|
|
String source = ""; //becomes "bookmark" if attribute type 2, remains blank otherwise
|
|
String query = "";
|
|
String searchEngineName = "";
|
|
String searchEngineDomain = "";
|
|
String browser = "";
|
|
long last_accessed = -1;
|
|
//from tsk_files
|
|
FsContent fs = this.extractFiles(image, "select * from tsk_files where `obj_id` = '" + artifact.getObjectID() + "'").get(0); //associated file
|
|
SearchEngine se = SearchEngine.NONE;
|
|
//from blackboard_attributes
|
|
ArrayList<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes("Where `artifact_id` = " + artifact.getArtifactID());
|
|
getAttributes:
|
|
for (BlackboardAttribute attribute : listAttributes) {
|
|
if (controller.isCancelled()) {
|
|
break getAll; //User cancled the process.
|
|
}
|
|
if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_URL.getTypeID()) {
|
|
final String urlString = attribute.getValueString();
|
|
se = getSearchEngine(urlString);
|
|
if (!se.equals(SearchEngine.NONE)) {
|
|
query = extractSearchEngineQuery(se, attribute.getValueString());
|
|
searchEngineName = se.toString();
|
|
if (query.equals("NULL")) { //False positive match, artifact was not a query.
|
|
break getAttributes;
|
|
}
|
|
} else if (se.equals(SearchEngine.NONE)) {
|
|
break getAttributes; //could not determine type. Will move onto next artifact
|
|
}
|
|
} else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID()) {
|
|
browser = attribute.getValueString();
|
|
}
|
|
else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID()) {
|
|
searchEngineDomain = attribute.getValueString();
|
|
}
|
|
else if (attribute.getArtifactID() == BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID()) {
|
|
source = "bookmark";
|
|
} else if (attribute.getAttributeTypeID() == BlackboardAttribute.ATTRIBUTE_TYPE.TSK_LAST_ACCESSED.getTypeID()) {
|
|
last_accessed = attribute.getValueLong();
|
|
}
|
|
}
|
|
|
|
if (!se.equals(SearchEngine.NONE) && !query.equals("NULL")) {
|
|
try {
|
|
|
|
Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
|
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID(), MODULE_NAME, searchEngineDomain));
|
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_TEXT.getTypeID(), MODULE_NAME, query));
|
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID(), MODULE_NAME, source, browser));
|
|
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_LAST_ACCESSED.getTypeID(), MODULE_NAME, last_accessed));
|
|
this.addArtifact(ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY, fs, bbattributes);
|
|
se.increment();
|
|
++totalQueries;
|
|
} catch (Exception e) {
|
|
logger.log(Level.SEVERE, "Error while add artifact.", e + " from " + fs.toString());
|
|
this.addErrorMessage(this.getName() + ": Error while adding artifact");
|
|
}
|
|
IngestManagerProxy.fireServiceDataEvent(new ServiceDataEvent("RecentActivity", BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY));
|
|
}
|
|
|
|
|
|
}
|
|
} catch (Exception e) {
|
|
logger.info("Encountered error retrieving artifacts: " + e);
|
|
} finally {
|
|
if (controller.isCancelled()) {
|
|
logger.info("Operation terminated by user.");
|
|
}
|
|
logger.info("Extracted " + totalQueries + " queries from the blackboard");
|
|
}
|
|
}
|
|
|
|
private String getTotals() {
|
|
String total = "";
|
|
for (SearchEngine se : SearchEngine.values()) {
|
|
if (se.getTotal() != 0) {
|
|
total += se.getName() + ": " + se.getTotal() + "\n";
|
|
}
|
|
}
|
|
return total;
|
|
}
|
|
|
|
@Override
|
|
public void process(Image image, IngestImageWorkerController controller) {
|
|
this.getURLs(image, controller);
|
|
logger.info("Search Engine stats: \n" + getTotals());
|
|
}
|
|
|
|
@Override
|
|
public void init(IngestManagerProxy managerProxy) {
|
|
logger.info("running init()");
|
|
}
|
|
|
|
@Override
|
|
public void complete() {
|
|
logger.info("running complete()");
|
|
}
|
|
|
|
@Override
|
|
public void stop() {
|
|
logger.info("running stop()");
|
|
}
|
|
|
|
@Override
|
|
public String getName() {
|
|
return this.moduleName;
|
|
}
|
|
|
|
@Override
|
|
public String getDescription() {
|
|
SearchEngine[] values = SearchEngine.values();
|
|
String total = "";
|
|
int i = 0;
|
|
while (i < values.length) { //could alternatively just forbid values[0], but that's kind of volatile.
|
|
if (values[i] != SearchEngine.NONE) {
|
|
total += values[i].getName() + "\n";
|
|
}
|
|
i++;
|
|
}
|
|
|
|
return "Extracts search queries on the following search engines: " + total;
|
|
|
|
}
|
|
|
|
@Override
|
|
public ServiceType getType() {
|
|
return ServiceType.Image;
|
|
}
|
|
|
|
@Override
|
|
public boolean hasBackgroundJobsRunning() {
|
|
return false;
|
|
}
|
|
|
|
@Override
|
|
public boolean hasSimpleConfiguration() {
|
|
return false;
|
|
}
|
|
|
|
@Override
|
|
public boolean hasAdvancedConfiguration() {
|
|
return false;
|
|
}
|
|
|
|
@Override
|
|
public void saveSimpleConfiguration() {
|
|
}
|
|
|
|
@Override
|
|
public void saveAdvancedConfiguration() {
|
|
}
|
|
|
|
@Override
|
|
public JPanel getSimpleConfiguration() {
|
|
return null;
|
|
}
|
|
|
|
@Override
|
|
public JPanel getAdvancedConfiguration() {
|
|
return null;
|
|
}
|
|
}
|