mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
Added SearchEngineURLQueryAnalyzer submodule in Recent Activity.
A new tab under Extracted Content, "Web Search Engine Query" holds extracted search query information. Currently supports the following engines: Google, Bing, Yahoo, Baidu, Soso, Sougo, Yandex, Parset, Parseek, Linkestan, Biglobe, Youdao.
This commit is contained in:
parent
6cff0f074d
commit
4c7645692b
Binary file not shown.
After Width: | Height: | Size: 783 B |
@ -97,6 +97,9 @@ public class ArtifactTypeNode extends AbstractNode implements DisplayableItemNod
|
||||
return "recent_docs.png";
|
||||
case TSK_DEVICE_ATTACHED:
|
||||
return "usb_devices.png";
|
||||
case TSK_WEB_SEARCH_QUERY:
|
||||
return "searchquery.png";
|
||||
|
||||
}
|
||||
return "artifact-icon.png";
|
||||
}
|
||||
|
@ -47,6 +47,7 @@ public final class RAImageIngestService implements IngestServiceImage {
|
||||
private Firefox ffre = null;
|
||||
private Chrome chre = null;
|
||||
private ExtractIE eere = null;
|
||||
private SearchEngineURLQueryAnalyzer usq = null;
|
||||
|
||||
//public constructor is required
|
||||
//as multiple instances are created for processing multiple images simultenously
|
||||
@ -69,6 +70,7 @@ public final class RAImageIngestService implements IngestServiceImage {
|
||||
modules.add(ffre);
|
||||
modules.add(chre);
|
||||
modules.add(eere);
|
||||
modules.add(usq);
|
||||
managerProxy.postMessage(IngestMessage.createMessage(++messageId, MessageType.INFO, this, "Started " + image.getName()));
|
||||
controller.switchToDeterminate(modules.size());
|
||||
controller.progress(0);
|
||||
@ -131,6 +133,7 @@ public final class RAImageIngestService implements IngestServiceImage {
|
||||
this.chre = new Chrome();
|
||||
this.eree = new ExtractRegistry();
|
||||
this.ffre = new Firefox();
|
||||
this.usq = new SearchEngineURLQueryAnalyzer();
|
||||
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,391 @@
|
||||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
package org.sleuthkit.autopsy.recentactivity;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.logging.Level;
|
||||
import javax.swing.JPanel;
|
||||
import org.sleuthkit.autopsy.ingest.IngestImageWorkerController;
|
||||
import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
|
||||
import org.sleuthkit.autopsy.ingest.IngestServiceImage;
|
||||
import org.sleuthkit.autopsy.ingest.ServiceDataEvent;
|
||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||
import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE;
|
||||
import org.sleuthkit.datamodel.FsContent;
|
||||
import org.sleuthkit.datamodel.Image;
|
||||
|
||||
|
||||
|
||||
public class SearchEngineURLQueryAnalyzer extends Extract implements IngestServiceImage{
|
||||
protected String moduleName = "SEUQA";
|
||||
|
||||
|
||||
private static enum SearchEngine {NONE, Google, Bing, Yahoo, Baidu, Sogou, Soso, Yandex, Youdao, Biglobe, Linkestan, Parseek, Parset};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
SearchEngineURLQueryAnalyzer(){
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
private SearchEngine getSearchEngine(String domain){
|
||||
if(domain.contains(".com")){
|
||||
String[] d = domain.split(".com");
|
||||
if(d.length != 0 && d[0].contains(".baidu")){
|
||||
return SearchEngine.Baidu;
|
||||
}
|
||||
else if(d.length != 0 && d.length != 0 && d[0].contains(".bing")){
|
||||
return SearchEngine.Bing;
|
||||
}
|
||||
else if(d.length != 0 && d[0].contains(".yahoo")){
|
||||
return SearchEngine.Yahoo;
|
||||
}
|
||||
else if(d.length != 0 && d[0].contains(".google")){
|
||||
return SearchEngine.Google;
|
||||
}
|
||||
else if(d.length != 0 && d[0].contains(".youdao")){
|
||||
return SearchEngine.Youdao;
|
||||
}
|
||||
else if(d.length !=0 && d[0].contains(".soso.com")){
|
||||
return SearchEngine.Soso;
|
||||
}
|
||||
else if(d.length !=0 && d[0].contains(".sogou.com")){
|
||||
return SearchEngine.Sogou;
|
||||
}
|
||||
else if(d.length != 0 && d[0].contains(".linkestan.com")){
|
||||
return SearchEngine.Linkestan;
|
||||
}
|
||||
else if(d.length != 0 && d[0].contains(".parseek.com")){
|
||||
return SearchEngine.Parseek;
|
||||
}
|
||||
else if(d.length !=0 && d[0].contains(".parset.com")){
|
||||
return SearchEngine.Parset;
|
||||
}
|
||||
}
|
||||
else if (domain.contains(".ru")){
|
||||
String[] d = domain.split(".ru");
|
||||
if(d[0].contains("yandex")){
|
||||
return SearchEngine.Yandex;
|
||||
}
|
||||
}
|
||||
else if (domain.contains(".ne.jp")){
|
||||
String[] d = domain.split(".ne.jp");
|
||||
if(d[0].contains("biglobe")){
|
||||
return SearchEngine.Biglobe;
|
||||
}
|
||||
}
|
||||
return SearchEngine.NONE;
|
||||
}
|
||||
|
||||
private String extractSearchEngineQuery(SearchEngine se, String url){
|
||||
String x = "";
|
||||
|
||||
//English Search Engines
|
||||
|
||||
//google.com
|
||||
if(se.equals(SearchEngine.Google)){
|
||||
if(url.contains("?q=")){
|
||||
x = split2(url, "\\?q=");
|
||||
}
|
||||
else {
|
||||
x = split2(url, "&q=");
|
||||
}
|
||||
}
|
||||
|
||||
//yahoo.com
|
||||
else if(se.equals(SearchEngine.Yahoo)){
|
||||
x = split2(url, "\\?p=");
|
||||
}
|
||||
|
||||
//bing.com
|
||||
else if (se.equals(SearchEngine.Bing)){
|
||||
x = split2(url, "\\?q=");
|
||||
}
|
||||
|
||||
//Chinese Search Engines
|
||||
|
||||
//baidu.com
|
||||
else if (se.equals(SearchEngine.Baidu)){
|
||||
if(url.contains("?wd=")){
|
||||
x = split2(url, "\\?wd=");
|
||||
}
|
||||
else if(url.contains("?kw=")){
|
||||
x = split2(url, "\\?kw=");
|
||||
}
|
||||
else if(url.contains("baidu.com/q?") || url.contains("baidu.com/m?") || url.contains("baidu.com/i?")){
|
||||
x = split2(url, "word=");
|
||||
}
|
||||
else if (url.contains("/qw=") || url.contains("?qw=")){
|
||||
x = split2(url, "\\qw=");
|
||||
}
|
||||
else if (url.contains("bs=")){
|
||||
x = split2(url, "&bs=");
|
||||
}
|
||||
}
|
||||
|
||||
//sogou.com
|
||||
else if(se.equals(SearchEngine.Sogou)){
|
||||
x = split2(url, "query=");
|
||||
}
|
||||
|
||||
//Soso.com
|
||||
else if (se.equals(SearchEngine.Soso)){
|
||||
if(url.contains("p=S")){
|
||||
x = split2(url, "p=S");
|
||||
}
|
||||
else if (url.contains("?w=")){
|
||||
x = split2(url, "\\?w=");
|
||||
}
|
||||
else {
|
||||
x = split2(url, "&w=");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
//youdao.com
|
||||
else if(se.equals(SearchEngine.Youdao)){
|
||||
if(url.contains("search?q=")){
|
||||
x = split2(url, "\\?q=");
|
||||
}
|
||||
else if (url.contains("?i=")){
|
||||
x = split2(url, "\\?i=");
|
||||
}
|
||||
}
|
||||
|
||||
//Russian Search Engines
|
||||
|
||||
//yandex.ru
|
||||
else if(se.equals(SearchEngine.Yandex)){
|
||||
if(url.contains("?text=")){
|
||||
x = split2(url, "\\?text=");
|
||||
}
|
||||
else{
|
||||
x = split2(url, "&text=");
|
||||
}
|
||||
}
|
||||
|
||||
//Japanese Search Engines
|
||||
|
||||
//biglobe.ne.jp
|
||||
else if(se.equals(SearchEngine.Biglobe)){
|
||||
if(url.contains("?search=")){
|
||||
x = split2(url, "\\?search=");
|
||||
}
|
||||
else if(url.contains("?q=")){
|
||||
x = split2(url, "\\?q=");
|
||||
}
|
||||
else if(url.contains("/key/")){
|
||||
x = split2(url, "/key/");
|
||||
}
|
||||
|
||||
else if (url.contains("&q=")){
|
||||
x = split2(url, "&q=");
|
||||
}
|
||||
}
|
||||
|
||||
//Persian & Arabic Search Engines
|
||||
|
||||
//Linkestan.com
|
||||
else if(se.equals(SearchEngine.Linkestan)){
|
||||
x = split2(url, "\\?psearch=");
|
||||
}
|
||||
|
||||
//Parseek.com
|
||||
else if(se.equals(SearchEngine.Parseek)){
|
||||
x = split2(url, "\\?q=");
|
||||
}
|
||||
|
||||
//Parset.com
|
||||
else if(se.equals(SearchEngine.Parset)){
|
||||
x = split2(url, "\\?Keyword=");
|
||||
}
|
||||
|
||||
try{ //try to decode the url
|
||||
String decoded = URLDecoder.decode(x, "UTF-8");
|
||||
return decoded;
|
||||
}
|
||||
catch(UnsupportedEncodingException uee){ //if it fails, return the encoded string
|
||||
logger.info("Error during URL decoding: " + uee);
|
||||
return x;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//for splitting urls based on a key. Abstracted out of extractSearchEngineQuery()
|
||||
private String split2(String url, String splitkey){
|
||||
String basereturn = "NULL";
|
||||
String splitKeyConverted = splitkey;
|
||||
//Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex
|
||||
if(splitkey.contains("\\?")){
|
||||
splitKeyConverted = splitkey.replace("\\?", "?"); //Handling java -> regex conversions and viceversa
|
||||
}
|
||||
if (url.contains(splitKeyConverted)){
|
||||
String[] sp = url.split(splitkey);
|
||||
if(sp.length >= 2){
|
||||
if(sp[sp.length -1].contains("&")){
|
||||
basereturn = sp[sp.length -1].split("&")[0];
|
||||
|
||||
}
|
||||
else{
|
||||
basereturn = sp[sp.length -1];
|
||||
}
|
||||
}
|
||||
}
|
||||
return basereturn;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void getURLs(Image image){
|
||||
Collection<BlackboardAttribute> unknownAttr = new ArrayList<BlackboardAttribute>();
|
||||
try{
|
||||
//from blackboard_artifacts
|
||||
ArrayList<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts("WHERE (`artifact_type_id` = '2' OR `artifact_type_id` = '4') "); //List of every 'web_history' and 'bookmark' artifact
|
||||
int y = listArtifacts.size();
|
||||
int z = 1;
|
||||
getAll:
|
||||
for(BlackboardArtifact artifact : listArtifacts){
|
||||
String query = "";
|
||||
String domain = "";
|
||||
String browser = "";
|
||||
//from tsk_files
|
||||
FsContent fs = this.extractFiles(image, "select * from tsk_files where `obj_id` = '" + artifact.getObjectID() + "'").get(0);
|
||||
SearchEngine se = SearchEngine.NONE;
|
||||
long last_accessed = -1;
|
||||
//from blackboard_attributes
|
||||
ArrayList<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes("Where `artifact_id` = " + artifact.getArtifactID());
|
||||
getAttributes:
|
||||
for(BlackboardAttribute attribute : listAttributes){
|
||||
if(attribute.getAttributeTypeID() == 1){
|
||||
se = getSearchEngine(attribute.getValueString());
|
||||
if(! se.equals(SearchEngine.NONE)){
|
||||
query = extractSearchEngineQuery(se, attribute.getValueString());
|
||||
domain = se.toString();
|
||||
if(query.equals("NULL")){ //False positive match, artifact was not a query.
|
||||
break getAttributes;
|
||||
}
|
||||
}
|
||||
else if(se.equals(SearchEngine.NONE)){
|
||||
break getAttributes; //could not determine type. Will move onto next artifact
|
||||
}
|
||||
}
|
||||
else if(attribute.getAttributeTypeID() == 4){
|
||||
browser = attribute.getValueString();
|
||||
}
|
||||
else if(attribute.getAttributeTypeID() == 33){
|
||||
last_accessed = attribute.getValueLong();
|
||||
}
|
||||
}
|
||||
|
||||
if(!se.equals(SearchEngine.NONE) && !query.equals("NULL")){
|
||||
try{
|
||||
Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
|
||||
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_DOMAIN.getTypeID(), "SEUQA", "Base URL", domain));
|
||||
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_TEXT.getTypeID(), "SEUQA", "Extracted search query", query));
|
||||
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID(), "SEUQA", "Browser Name", browser));
|
||||
bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_LAST_ACCESSED.getTypeID(), "SEUQA", "Last Accessed", last_accessed));
|
||||
this.addArtifact(ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY, fs , bbattributes);
|
||||
}
|
||||
catch(Exception e){
|
||||
logger.log(Level.SEVERE, "Error while add artifact.", e + " at " + fs.toString());
|
||||
this.addErrorMessage(this.getName() + ": Error while adding artifact");
|
||||
}
|
||||
IngestManagerProxy.fireServiceDataEvent(new ServiceDataEvent("RecentActivity", BlackboardArtifact.ARTIFACT_TYPE.TSK_TRACKPOINT));
|
||||
}
|
||||
z++;
|
||||
|
||||
}
|
||||
}
|
||||
catch (Exception e){
|
||||
logger.info("Encountered error retrieving artifacts: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void process(Image image, IngestImageWorkerController controller) {
|
||||
this.getURLs(image);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void init(IngestManagerProxy managerProxy) {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void complete() {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void stop() {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return this.moduleName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "Extracts search queries on major search engines";
|
||||
}
|
||||
|
||||
@Override
|
||||
public ServiceType getType() {
|
||||
return ServiceType.Image;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasBackgroundJobsRunning() {
|
||||
throw new UnsupportedOperationException("Not supported yet.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasSimpleConfiguration() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasAdvancedConfiguration() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void saveSimpleConfiguration() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void saveAdvancedConfiguration() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public JPanel getSimpleConfiguration() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public JPanel getAdvancedConfiguration() {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
@ -74,9 +74,12 @@ Currently, the script supports four argument parameters:
|
||||
|
||||
-s {imgname} : runs the test with a single image as specified by {imgname}. Must include the path, it does not read from input.
|
||||
-r or --rebuild : runs in REBUILD mode, see 5 below
|
||||
-l {config} or --list {config} : runs with the specified {config} file. If you do not specify a path to the file, it searches for it under ./. It will attempt to search for the file by the specified path name if surrounded in quotes. By default will also search ./input for images in addition.
|
||||
-u : runs ignoring unallocated space. Useful for determining framework is operational. Appends "-u" to the output and gold folders for distinguishing them from others. Will automatically compare the right reports.
|
||||
-l {config} or --list {config} : runs with the specified {config} file. If you do not specify a path to the file, it searches for it under ./. It will attempt to search for the file by the specified path name if surrounded in quotes. By default will also search ./input for images in addition. Appends "-l" to the output and gold folders for distinguishing.
|
||||
-u or --unallocated : runs ignoring unallocated space. Useful for determining framework is operational. Appends "-u" to the output and gold folders for distinguishing them from others. Will automatically compare the right reports.
|
||||
-i or --ignore : Will ignore the ./input directory. Use only in combination with -l
|
||||
-d or --delete : Will not delete the keyword search Solr index upon ingest completion. Will consume more disk space.
|
||||
-v or --verbose : Prints logged warnings after each ingest
|
||||
-e or --exception : When followed by a string, will only print out the exceptions that occured that contain the string. Case sensitive.
|
||||
|
||||
These can be run in any combination.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user