mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-19 19:14:55 +00:00
update SearchEngineURLQueryExtractor.java
This commit is contained in:
parent
47c298fa89
commit
79018b91ff
@ -1,7 +1,7 @@
|
|||||||
/*
|
/*
|
||||||
* Autopsy Forensic Browser
|
* Autopsy Forensic Browser
|
||||||
*
|
*
|
||||||
* Copyright 2012-2014 Basis Technology Corp.
|
* Copyright 2012-2018 Basis Technology Corp.
|
||||||
* Contact: carrier <at> sleuthkit <dot> org
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
*
|
*
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
@ -27,7 +27,6 @@ import java.util.Arrays;
|
|||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import javax.xml.parsers.DocumentBuilder;
|
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
import javax.xml.parsers.DocumentBuilderFactory;
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
@ -36,9 +35,8 @@ import org.sleuthkit.autopsy.coreutils.PlatformUtil;
|
|||||||
import org.sleuthkit.autopsy.coreutils.XMLUtil;
|
import org.sleuthkit.autopsy.coreutils.XMLUtil;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
import org.sleuthkit.autopsy.ingest.IngestModule.IngestModuleException;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestServices;
|
|
||||||
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
|
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
|
import org.sleuthkit.datamodel.Blackboard;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
||||||
import static org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY;
|
import static org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY;
|
||||||
@ -78,7 +76,7 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
|
|
||||||
private static final String XMLFILE = "SEUQAMappings.xml"; //NON-NLS
|
private static final String XMLFILE = "SEUQAMappings.xml"; //NON-NLS
|
||||||
private static final String XSDFILE = "SearchEngineSchema.xsd"; //NON-NLS
|
private static final String XSDFILE = "SearchEngineSchema.xsd"; //NON-NLS
|
||||||
private static SearchEngineURLQueryExtractor.SearchEngine[] engines;
|
private static SearchEngine[] engines;
|
||||||
|
|
||||||
private Content dataSource;
|
private Content dataSource;
|
||||||
private IngestJobContext context;
|
private IngestJobContext context;
|
||||||
@ -166,11 +164,9 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
Document xmlinput;
|
Document xmlinput;
|
||||||
try {
|
try {
|
||||||
String path = PlatformUtil.getUserConfigDirectory() + File.separator + XMLFILE;
|
String path = PlatformUtil.getUserConfigDirectory() + File.separator + XMLFILE;
|
||||||
File f = new File(path);
|
File configFile = new File(path);
|
||||||
logger.log(Level.INFO, "Load successful"); //NON-NLS
|
logger.log(Level.INFO, "Load successful"); //NON-NLS
|
||||||
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
|
xmlinput = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(configFile);
|
||||||
DocumentBuilder db = dbf.newDocumentBuilder();
|
|
||||||
xmlinput = db.parse(f);
|
|
||||||
|
|
||||||
if (!XMLUtil.xmlIsValid(xmlinput, SearchEngineURLQueryExtractor.class, XSDFILE)) {
|
if (!XMLUtil.xmlIsValid(xmlinput, SearchEngineURLQueryExtractor.class, XSDFILE)) {
|
||||||
logger.log(Level.WARNING, "Error loading Search Engines: could not validate against [" + XSDFILE + "], results may not be accurate."); //NON-NLS
|
logger.log(Level.WARNING, "Error loading Search Engines: could not validate against [" + XSDFILE + "], results may not be accurate."); //NON-NLS
|
||||||
@ -185,7 +181,7 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
}
|
}
|
||||||
|
|
||||||
NodeList nlist = xmlinput.getElementsByTagName("SearchEngine"); //NON-NLS
|
NodeList nlist = xmlinput.getElementsByTagName("SearchEngine"); //NON-NLS
|
||||||
SearchEngineURLQueryExtractor.SearchEngine[] listEngines = new SearchEngineURLQueryExtractor.SearchEngine[nlist.getLength()];
|
SearchEngine[] listEngines = new SearchEngine[nlist.getLength()];
|
||||||
for (int i = 0; i < nlist.getLength(); i++) {
|
for (int i = 0; i < nlist.getLength(); i++) {
|
||||||
NamedNodeMap nnm = nlist.item(i).getAttributes();
|
NamedNodeMap nnm = nlist.item(i).getAttributes();
|
||||||
|
|
||||||
@ -200,8 +196,8 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SearchEngineURLQueryExtractor.SearchEngine Se = new SearchEngineURLQueryExtractor.SearchEngine(EngineName, EnginedomainSubstring, keys);
|
SearchEngine searchEngine = new SearchEngine(EngineName, EnginedomainSubstring, keys);
|
||||||
listEngines[i] = Se;
|
listEngines[i] = searchEngine;
|
||||||
}
|
}
|
||||||
engines = listEngines;
|
engines = listEngines;
|
||||||
}
|
}
|
||||||
@ -216,7 +212,7 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
* is found
|
* is found
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private static SearchEngineURLQueryExtractor.SearchEngine getSearchEngineFromUrl(String domain) {
|
private static SearchEngine getSearchEngineFromUrl(String domain) {
|
||||||
if (engines == null) {
|
if (engines == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@ -235,32 +231,31 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
*
|
*
|
||||||
* @return The extracted search query.
|
* @return The extracted search query.
|
||||||
*/
|
*/
|
||||||
private String extractSearchEngineQuery(SearchEngineURLQueryExtractor.SearchEngine eng, String url) {
|
private String extractSearchEngineQuery(SearchEngine eng, String url) {
|
||||||
String x = ""; //NON-NLS
|
String value = ""; //NON-NLS
|
||||||
|
|
||||||
for (KeyPair kp : eng.getKeys()) {
|
for (KeyPair kp : eng.getKeys()) {
|
||||||
if (url.contains(kp.getKey())) {
|
if (url.contains(kp.getKey())) {
|
||||||
x = getValue(url, kp.getKeyRegExp());
|
value = getValue(url, kp.getKeyRegExp());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try { //try to decode the url
|
try { //try to decode the url
|
||||||
String decoded = URLDecoder.decode(x, "UTF-8"); //NON-NLS
|
return URLDecoder.decode(value, "UTF-8"); //NON-NLS
|
||||||
return decoded;
|
|
||||||
} catch (UnsupportedEncodingException exception) { //if it fails, return the encoded string
|
} catch (UnsupportedEncodingException exception) { //if it fails, return the encoded string
|
||||||
logger.log(Level.FINE, "Error during URL decoding, returning undecoded value:"
|
logger.log(Level.FINE, "Error during URL decoding, returning undecoded value:"
|
||||||
+ "\n\tURL: " + url
|
+ "\n\tURL: " + url
|
||||||
+ "\n\tUndecoded value: " + x
|
+ "\n\tUndecoded value: " + value
|
||||||
+ "\n\tEngine name: " + eng.getEngineName()
|
+ "\n\tEngine name: " + eng.getEngineName()
|
||||||
+ "\n\tEngine domain: " + eng.getDomainSubstring(), exception); //NON-NLS
|
+ "\n\tEngine domain: " + eng.getDomainSubstring(), exception); //NON-NLS
|
||||||
return x;
|
return value;
|
||||||
} catch (IllegalArgumentException exception) { //if it fails, return the encoded string
|
} catch (IllegalArgumentException exception) { //if it fails, return the encoded string
|
||||||
logger.log(Level.SEVERE, "Illegal argument passed to URL decoding, returning undecoded value:"
|
logger.log(Level.SEVERE, "Illegal argument passed to URL decoding, returning undecoded value:"
|
||||||
+ "\n\tURL: " + url
|
+ "\n\tURL: " + url
|
||||||
+ "\n\tUndecoded value: " + x
|
+ "\n\tUndecoded value: " + value
|
||||||
+ "\n\tEngine name: " + eng.getEngineName()
|
+ "\n\tEngine name: " + eng.getEngineName()
|
||||||
+ "\n\tEngine domain: " + eng.getDomainSubstring(), exception); //NON-NLS)
|
+ "\n\tEngine domain: " + eng.getDomainSubstring(), exception); //NON-NLS)
|
||||||
return x;
|
return value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,18 +278,16 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
* at more formal approaches of splitting on the "?" and then on "&"
|
* at more formal approaches of splitting on the "?" and then on "&"
|
||||||
* resulting in missing things.
|
* resulting in missing things.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
//TODO: What does this old comment mean? : Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex
|
||||||
String value = ""; //NON-NLS
|
String value = ""; //NON-NLS
|
||||||
String v = regExpKey;
|
|
||||||
//Want to determine if string contains a string based on splitkey, but we want to split the string on splitKeyConverted due to regex
|
String[] tokens = url.split(regExpKey.replace("\\?", "?"));
|
||||||
if (regExpKey.contains("\\?")) {
|
if (tokens.length >= 2) {
|
||||||
v = regExpKey.replace("\\?", "?");
|
if (tokens[tokens.length - 1].contains("&")) {
|
||||||
}
|
value = tokens[tokens.length - 1].split("&")[0];
|
||||||
String[] sp = url.split(v);
|
|
||||||
if (sp.length >= 2) {
|
|
||||||
if (sp[sp.length - 1].contains("&")) {
|
|
||||||
value = sp[sp.length - 1].split("&")[0];
|
|
||||||
} else {
|
} else {
|
||||||
value = sp[sp.length - 1];
|
value = tokens[tokens.length - 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return value;
|
return value;
|
||||||
@ -302,68 +295,59 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
|
|
||||||
private void findSearchQueries() {
|
private void findSearchQueries() {
|
||||||
|
|
||||||
int totalQueries = 0;
|
Collection<BlackboardArtifact> sourceArtifacts = new ArrayList<>();
|
||||||
try {
|
try {
|
||||||
//from blackboard_artifacts
|
//List of every 'web_history' and 'bookmark'
|
||||||
Collection<BlackboardArtifact> listArtifacts = currentCase.getSleuthkitCase().getMatchingArtifacts("WHERE (blackboard_artifacts.artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_BOOKMARK.getTypeID() //NON-NLS
|
sourceArtifacts.addAll(tskCase.getBlackboardArtifacts(ARTIFACT_TYPE.TSK_WEB_BOOKMARK));
|
||||||
+ "' OR blackboard_artifacts.artifact_type_id = '" + ARTIFACT_TYPE.TSK_WEB_HISTORY.getTypeID() + "') "); //List of every 'web_history' and 'bookmark' artifact NON-NLS
|
sourceArtifacts.addAll(tskCase.getBlackboardArtifacts(ARTIFACT_TYPE.TSK_WEB_HISTORY));
|
||||||
logger.log(Level.INFO, "Processing {0} blackboard artifacts.", listArtifacts.size()); //NON-NLS
|
} catch (TskCoreException tskCoreException) {
|
||||||
|
logger.log(Level.SEVERE, "Error getting TSK_WEB_BOOKMARK or TSK_WEB_HISTORY artifacts", tskCoreException); //NON-NLS
|
||||||
|
}
|
||||||
|
logger.log(Level.INFO, "Processing {0} blackboard artifacts.", sourceArtifacts.size()); //NON-NLS
|
||||||
|
|
||||||
for (BlackboardArtifact artifact : listArtifacts) {
|
Collection<BlackboardArtifact> queryArtifacts = new ArrayList<>();
|
||||||
|
for (BlackboardArtifact sourceArtifact : sourceArtifacts) {
|
||||||
if (context.dataSourceIngestIsCancelled()) {
|
if (context.dataSourceIngestIsCancelled()) {
|
||||||
break; //User cancelled the process.
|
break; //User cancelled the process.
|
||||||
}
|
}
|
||||||
|
long fileId = sourceArtifact.getObjectID();
|
||||||
//initializing default attributes
|
try {
|
||||||
String query = "";
|
if (false == tskCase.isFileFromSource(dataSource, fileId)) {
|
||||||
String searchEngineDomain = "";
|
continue; //File was from a different dataSource. Skipping.
|
||||||
String browser = "";
|
}
|
||||||
long last_accessed = -1;
|
} catch (TskCoreException ex) {
|
||||||
|
logger.log(Level.SEVERE, "Encountered error determining if file " + fileId + "is from datasource " + dataSource.getId(), ex); //NON-NLS
|
||||||
long fileId = artifact.getObjectID();
|
|
||||||
boolean isFromSource = tskCase.isFileFromSource(dataSource, fileId);
|
|
||||||
if (!isFromSource) {
|
|
||||||
//File was from a different dataSource. Skipping.
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
AbstractFile file = tskCase.getAbstractFileById(fileId);
|
AbstractFile file;
|
||||||
|
try {
|
||||||
|
file = tskCase.getAbstractFileById(fileId);
|
||||||
if (file == null) {
|
if (file == null) {
|
||||||
|
logger.log(Level.WARNING, "There was no file for id {0}", fileId); //NON-NLS
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} catch (TskCoreException ex) {
|
||||||
|
logger.log(Level.SEVERE, "Error getting file for id " + fileId, ex); //NON-NLS
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
SearchEngineURLQueryExtractor.SearchEngine se = null;
|
try {
|
||||||
//from blackboard_attributes
|
final String urlString = sourceArtifact.getAttribute(new BlackboardAttribute.Type(TSK_URL)).getValueString();
|
||||||
Collection<BlackboardAttribute> listAttributes = currentCase.getSleuthkitCase().getMatchingAttributes("WHERE artifact_id = " + artifact.getArtifactID()); //NON-NLS
|
SearchEngine searchEngine = getSearchEngineFromUrl(urlString);
|
||||||
|
if (searchEngine == null) { //TODO: should we log this?
|
||||||
for (BlackboardAttribute attribute : listAttributes) {
|
continue;
|
||||||
if (attribute.getAttributeType().getTypeID() == TSK_URL.getTypeID()) {
|
|
||||||
final String urlString = attribute.getValueString();
|
|
||||||
se = getSearchEngineFromUrl(urlString);
|
|
||||||
if (se == null) {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
query = extractSearchEngineQuery(se, attribute.getValueString());
|
String query = extractSearchEngineQuery(searchEngine, urlString);
|
||||||
if (query.isEmpty()) //False positive match, artifact was not a query. NON-NLS
|
if (query.isEmpty()) { //False positive match, artifact was not a query.
|
||||||
{
|
continue;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (attribute.getAttributeType().getTypeID() == TSK_PROG_NAME.getTypeID()) {
|
String browser = sourceArtifact.getAttribute(new BlackboardAttribute.Type(TSK_PROG_NAME)).getValueString();
|
||||||
browser = attribute.getValueString();
|
String searchEngineDomain = sourceArtifact.getAttribute(new BlackboardAttribute.Type(TSK_DOMAIN)).getValueString();
|
||||||
} else if (attribute.getAttributeType().getTypeID() == TSK_DOMAIN.getTypeID()) {
|
long last_accessed = sourceArtifact.getAttribute(new BlackboardAttribute.Type(TSK_DATETIME_ACCESSED)).getValueLong();
|
||||||
searchEngineDomain = attribute.getValueString();
|
|
||||||
} else if (attribute.getAttributeType().getTypeID() == TSK_DATETIME_ACCESSED.getTypeID()) {
|
|
||||||
last_accessed = attribute.getValueLong();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (se != null && !query.isEmpty()) { //NON-NLS
|
|
||||||
// If date doesn't exist, change to 0 (instead of 1969)
|
|
||||||
if (last_accessed == -1) {
|
|
||||||
last_accessed = 0;
|
|
||||||
}
|
|
||||||
Collection<BlackboardAttribute> bbattributes = Arrays.asList(
|
Collection<BlackboardAttribute> bbattributes = Arrays.asList(
|
||||||
new BlackboardAttribute(
|
new BlackboardAttribute(
|
||||||
TSK_DOMAIN, PARENT_MODULE_NAME,
|
TSK_DOMAIN, PARENT_MODULE_NAME,
|
||||||
@ -377,35 +361,23 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
new BlackboardAttribute(
|
new BlackboardAttribute(
|
||||||
TSK_DATETIME_ACCESSED, PARENT_MODULE_NAME,
|
TSK_DATETIME_ACCESSED, PARENT_MODULE_NAME,
|
||||||
last_accessed));
|
last_accessed));
|
||||||
|
|
||||||
BlackboardArtifact bbart = file.newArtifact(TSK_WEB_SEARCH_QUERY);
|
BlackboardArtifact bbart = file.newArtifact(TSK_WEB_SEARCH_QUERY);
|
||||||
bbart.addAttributes(bbattributes);
|
bbart.addAttributes(bbattributes);
|
||||||
se.increment();
|
queryArtifacts.add(bbart);
|
||||||
++totalQueries;
|
searchEngine.increment();
|
||||||
}
|
} catch (TskCoreException ex) {
|
||||||
}
|
logger.log(Level.SEVERE, "Encountered error creating search query artifacts.", ex); //NON-NLS
|
||||||
} catch (TskCoreException e) {
|
|
||||||
logger.log(Level.SEVERE, "Encountered error retrieving artifacts for search engine queries", e); //NON-NLS
|
|
||||||
} finally {
|
|
||||||
if (context.dataSourceIngestIsCancelled()) {
|
|
||||||
logger.info("Operation terminated by user."); //NON-NLS
|
|
||||||
}
|
|
||||||
//TODO: should this be batched? Should it include the actual artifact(s)?
|
|
||||||
IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(
|
|
||||||
NbBundle.getMessage(this.getClass(), "SearchEngineURLQueryAnalyzer.parentModuleName.noSpace"),
|
|
||||||
BlackboardArtifact.ARTIFACT_TYPE.TSK_WEB_SEARCH_QUERY));
|
|
||||||
logger.log(Level.INFO, "Extracted {0} queries from the blackboard", totalQueries); //NON-NLS
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getTotals() {
|
try {
|
||||||
String total = "";
|
blackboard.postArtifacts(queryArtifacts, PARENT_MODULE_NAME);
|
||||||
if (engines == null) {
|
} catch (Blackboard.BlackboardException ex) {
|
||||||
return total;
|
logger.log(Level.SEVERE, "Encountered error posting search query artifacts.", ex); //NON-NLS
|
||||||
}
|
}
|
||||||
for (SearchEngineURLQueryExtractor.SearchEngine se : engines) {
|
|
||||||
total += se.getEngineName() + " : " + se.getTotal() + "\n";
|
logger.log(Level.INFO, "Extracted {0} queries from the blackboard", queryArtifacts.size()); //NON-NLS
|
||||||
}
|
|
||||||
return total;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -413,13 +385,20 @@ final class SearchEngineURLQueryExtractor extends Extract {
|
|||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.findSearchQueries();
|
this.findSearchQueries();
|
||||||
logger.log(Level.INFO, "Search Engine stats: \n{0}", getTotals()); //NON-NLS
|
|
||||||
|
String totals = "";
|
||||||
|
for (SearchEngine se : engines) {
|
||||||
|
totals += se.getEngineName() + " : " + se.getTotal() + "\n";
|
||||||
|
}
|
||||||
|
logger.log(Level.INFO, "Search Engine stats: \n{0}", totals); //NON-NLS
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
||||||
void configExtractor() throws IngestModuleException {
|
void configExtractor() throws IngestModuleException {
|
||||||
try {
|
try {
|
||||||
PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryExtractor.class, XMLFILE, true);
|
PlatformUtil.extractResourceToUserConfigDir(SearchEngineURLQueryExtractor.class,
|
||||||
|
XMLFILE, true);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
String message = Bundle.SearchEngineURLQueryAnalyzer_init_exception_msg(XMLFILE);
|
String message = Bundle.SearchEngineURLQueryAnalyzer_init_exception_msg(XMLFILE);
|
||||||
logger.log(Level.SEVERE, message, e);
|
logger.log(Level.SEVERE, message, e);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user