mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-15 09:17:42 +00:00
Keyword search ingest improvements: alternative performQueryPerTerm() and equivalent bb write methods - more optimal for grouping results per hit (in ingest) rather then per query.
Improves overall keyword search ingest performance. Improve regex results highlight by using content_ws field.
This commit is contained in:
parent
30a2e3f9f4
commit
69d55f7cc1
@ -534,7 +534,7 @@
|
|||||||
|
|
||||||
<!-- field with white-space tokenized words for TermsComponent regex search (useful for fast search of IP addresses, URLs, certain phone numbers)
|
<!-- field with white-space tokenized words for TermsComponent regex search (useful for fast search of IP addresses, URLs, certain phone numbers)
|
||||||
also be useful for Lucene based queries containing special characters-->
|
also be useful for Lucene based queries containing special characters-->
|
||||||
<field name="content_ws" type="text_ws" indexed="true" stored="false" multiValued="true"/>
|
<field name="content_ws" type="text_ws" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
|
||||||
<!-- Uncommenting the following will create a "timestamp" field using
|
<!-- Uncommenting the following will create a "timestamp" field using
|
||||||
a default value of "NOW" to indicate when each document was indexed.
|
a default value of "NOW" to indicate when each document was indexed.
|
||||||
|
@ -57,7 +57,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
public static final String MODULE_NAME = "Keyword Search";
|
public static final String MODULE_NAME = "Keyword Search";
|
||||||
private static KeywordSearchIngestService instance = null;
|
private static KeywordSearchIngestService instance = null;
|
||||||
private IngestManagerProxy managerProxy;
|
private IngestManagerProxy managerProxy;
|
||||||
private static final long MAX_STRING_EXTRACT_SIZE = 10 * (1 << 10); // * (1 << 10);
|
private static final long MAX_STRING_EXTRACT_SIZE = 1 * (1 << 10); // * (1 << 10); TODO increase
|
||||||
private static final long MAX_INDEX_SIZE = 100 * (1 << 10) * (1 << 10);
|
private static final long MAX_INDEX_SIZE = 100 * (1 << 10) * (1 << 10);
|
||||||
private Ingester ingester;
|
private Ingester ingester;
|
||||||
private volatile boolean commitIndex = false; //whether to commit index next time
|
private volatile boolean commitIndex = false; //whether to commit index next time
|
||||||
@ -78,7 +78,6 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
private volatile boolean finalRunComplete = false;
|
private volatile boolean finalRunComplete = false;
|
||||||
private final String hashDBServiceName = "Hash Lookup";
|
private final String hashDBServiceName = "Hash Lookup";
|
||||||
private SleuthkitCase caseHandle = null;
|
private SleuthkitCase caseHandle = null;
|
||||||
|
|
||||||
// TODO: use a more robust method than checking file extension to determine
|
// TODO: use a more robust method than checking file extension to determine
|
||||||
// whether to try a file
|
// whether to try a file
|
||||||
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
|
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
|
||||||
@ -89,8 +88,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
|
|
||||||
public enum IngestStatus {
|
public enum IngestStatus {
|
||||||
|
|
||||||
INGESTED, EXTRACTED_INGESTED, SKIPPED,
|
INGESTED, EXTRACTED_INGESTED, SKIPPED,};
|
||||||
};
|
|
||||||
private Map<Long, IngestStatus> ingestStatus;
|
private Map<Long, IngestStatus> ingestStatus;
|
||||||
private Map<String, List<FsContent>> reportedHits; //already reported hits
|
private Map<String, List<FsContent>> reportedHits; //already reported hits
|
||||||
|
|
||||||
@ -109,14 +107,14 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
//logger.log(Level.INFO, "hashdb result: " + hashDBResult + "file: " + fsContent.getName());
|
//logger.log(Level.INFO, "hashdb result: " + hashDBResult + "file: " + fsContent.getName());
|
||||||
if (hashDBResult == IngestServiceFsContent.ProcessResult.COND_STOP) {
|
if (hashDBResult == IngestServiceFsContent.ProcessResult.COND_STOP) {
|
||||||
return ProcessResult.OK;
|
return ProcessResult.OK;
|
||||||
}
|
} else if (hashDBResult == IngestServiceFsContent.ProcessResult.ERROR) {
|
||||||
else if (hashDBResult == IngestServiceFsContent.ProcessResult.ERROR) {
|
|
||||||
//notify depending service that keyword search (would) encountered error for this file
|
//notify depending service that keyword search (would) encountered error for this file
|
||||||
return ProcessResult.ERROR;
|
return ProcessResult.ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (processedFiles == false)
|
if (processedFiles == false) {
|
||||||
processedFiles = true;
|
processedFiles = true;
|
||||||
|
}
|
||||||
|
|
||||||
//check if time to commit and previous search is not running
|
//check if time to commit and previous search is not running
|
||||||
//commiting while searching causes performance issues
|
//commiting while searching causes performance issues
|
||||||
@ -201,7 +199,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
caseHandle = Case.getCurrentCase().getSleuthkitCase();
|
caseHandle = Case.getCurrentCase().getSleuthkitCase();
|
||||||
|
|
||||||
this.managerProxy = managerProxy;
|
this.managerProxy = managerProxy;
|
||||||
|
|
||||||
//this deregisters previously registered listeners at every init()
|
//this deregisters previously registered listeners at every init()
|
||||||
pcs = new PropertyChangeSupport(KeywordSearchIngestService.class);
|
pcs = new PropertyChangeSupport(KeywordSearchIngestService.class);
|
||||||
|
|
||||||
@ -286,16 +284,16 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
//no need to check timer thread
|
//no need to check timer thread
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized boolean backgroundJobsCompleteListener(PropertyChangeListener l) {
|
public synchronized boolean backgroundJobsCompleteListener(PropertyChangeListener l) {
|
||||||
if (finalRunComplete == true)
|
if (finalRunComplete == true) {
|
||||||
return false;
|
return false;
|
||||||
else {
|
} else {
|
||||||
pcs.addPropertyChangeListener(l);
|
pcs.addPropertyChangeListener(l);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void commit() {
|
private void commit() {
|
||||||
@ -504,7 +502,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class Searcher extends SwingWorker<Object,Void> {
|
private class Searcher extends SwingWorker<Object, Void> {
|
||||||
|
|
||||||
private List<Keyword> keywords;
|
private List<Keyword> keywords;
|
||||||
private ProgressHandle progress;
|
private ProgressHandle progress;
|
||||||
@ -547,52 +545,69 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
|
|
||||||
KeywordSearchQuery del = null;
|
KeywordSearchQuery del = null;
|
||||||
|
|
||||||
if (keywordQuery.isLiteral()) {
|
boolean isRegex = !keywordQuery.isLiteral();
|
||||||
|
if (!isRegex) {
|
||||||
del = new LuceneQuery(keywordQuery);
|
del = new LuceneQuery(keywordQuery);
|
||||||
del.escape();
|
del.escape();
|
||||||
} else {
|
} else {
|
||||||
del = new TermComponentQuery(keywordQuery);
|
del = new TermComponentQuery(keywordQuery);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<FsContent> queryResult = null;
|
Map<String, List<FsContent>> queryResult = null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
queryResult = del.performQuery();
|
queryResult = del.performQueryPerTerm();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.log(Level.INFO, "Error performing query: " + keywordQuery.getQuery(), e);
|
logger.log(Level.INFO, "Error performing query: " + keywordQuery.getQuery(), e);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
//calculate new results but substracting results already obtained in this run
|
//calculate new results but substracting results already obtained in this run
|
||||||
List<FsContent> newResults = new ArrayList<FsContent>();
|
Map<Keyword, List<FsContent>> newResults = new HashMap<Keyword, List<FsContent>>();
|
||||||
|
|
||||||
List<FsContent> curResults = currentResults.get(keywordQuery);
|
for (String termResult : queryResult.keySet()) {
|
||||||
if (curResults == null) {
|
List<FsContent> queryTermResults = queryResult.get(termResult);
|
||||||
currentResults.put(keywordQuery, queryResult);
|
Keyword termResultK = new Keyword(termResult, !isRegex);
|
||||||
newResults = queryResult;
|
List<FsContent> curTermResults = currentResults.get(termResultK);
|
||||||
} else {
|
if (curTermResults == null) {
|
||||||
for (FsContent res : queryResult) {
|
currentResults.put(termResultK, queryTermResults);
|
||||||
if (!curResults.contains(res)) {
|
newResults.put(termResultK, queryTermResults);
|
||||||
//add to new results
|
} else {
|
||||||
newResults.add(res);
|
//some fscontent hits already exist for this keyword
|
||||||
|
for (FsContent res : queryTermResults) {
|
||||||
|
if (!curTermResults.contains(res)) {
|
||||||
|
//add to new results
|
||||||
|
List<FsContent> newResultsFs = newResults.get(termResultK);
|
||||||
|
if (newResultsFs == null) {
|
||||||
|
newResultsFs = new ArrayList<FsContent>();
|
||||||
|
newResults.put(termResultK, newResultsFs);
|
||||||
|
}
|
||||||
|
newResultsFs.add(res);
|
||||||
|
curTermResults.add(res);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//update current result with new ones
|
|
||||||
curResults.addAll(newResults);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!newResults.isEmpty()) {
|
if (!newResults.isEmpty()) {
|
||||||
|
|
||||||
//write results to BB
|
//write results to BB
|
||||||
Collection<BlackboardArtifact> newArtifacts = new ArrayList<BlackboardArtifact>(); //new artifacts to report
|
Collection<BlackboardArtifact> newArtifacts = new ArrayList<BlackboardArtifact>(); //new artifacts to report
|
||||||
for (FsContent hitFile : newResults) {
|
for (final Keyword hitTerm : newResults.keySet()) {
|
||||||
if (this.isCancelled()) {
|
List<FsContent> fsContentHits = newResults.get(hitTerm);
|
||||||
return null;
|
for (final FsContent hitFile : fsContentHits) {
|
||||||
}
|
if (this.isCancelled()) {
|
||||||
Collection<KeywordWriteResult> written = del.writeToBlackBoard(hitFile, listName);
|
return null;
|
||||||
for (KeywordWriteResult res : written) {
|
}
|
||||||
newArtifacts.add(res.getArtifact());
|
KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, listName);
|
||||||
|
if (written == null) {
|
||||||
|
logger.log(Level.INFO, "BB artifact for keyword not written: " + hitTerm.toString());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
newArtifacts.add(written.getArtifact());
|
||||||
|
|
||||||
//generate a data message for each artifact
|
//generate a data message for each artifact
|
||||||
StringBuilder subjectSb = new StringBuilder();
|
StringBuilder subjectSb = new StringBuilder();
|
||||||
@ -606,7 +621,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
}
|
}
|
||||||
subjectSb.append("<");
|
subjectSb.append("<");
|
||||||
String uniqueKey = null;
|
String uniqueKey = null;
|
||||||
BlackboardAttribute attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
|
BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
final String keyword = attr.getValueString();
|
final String keyword = attr.getValueString();
|
||||||
subjectSb.append(keyword);
|
subjectSb.append(keyword);
|
||||||
@ -625,7 +640,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
detailsSb.append("</tr>");
|
detailsSb.append("</tr>");
|
||||||
|
|
||||||
//preview
|
//preview
|
||||||
attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
|
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
detailsSb.append("<tr>");
|
detailsSb.append("<tr>");
|
||||||
detailsSb.append("<th>Preview</th>");
|
detailsSb.append("<th>Preview</th>");
|
||||||
@ -642,7 +657,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
|
|
||||||
|
|
||||||
//list
|
//list
|
||||||
attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID());
|
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID());
|
||||||
detailsSb.append("<tr>");
|
detailsSb.append("<tr>");
|
||||||
detailsSb.append("<th>List</th>");
|
detailsSb.append("<th>List</th>");
|
||||||
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
|
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
|
||||||
@ -650,7 +665,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
|
|
||||||
//regex
|
//regex
|
||||||
if (!keywordQuery.isLiteral()) {
|
if (!keywordQuery.isLiteral()) {
|
||||||
attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
|
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
detailsSb.append("<tr>");
|
detailsSb.append("<tr>");
|
||||||
detailsSb.append("<th>RegEx</th>");
|
detailsSb.append("<th>RegEx</th>");
|
||||||
@ -661,9 +676,10 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
|||||||
}
|
}
|
||||||
detailsSb.append("</table>");
|
detailsSb.append("</table>");
|
||||||
|
|
||||||
managerProxy.postMessage(IngestMessage.createDataMessage(++messageID, instance, subjectSb.toString(), detailsSb.toString(), uniqueKey, res.getArtifact()));
|
managerProxy.postMessage(IngestMessage.createDataMessage(++messageID, instance, subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact()));
|
||||||
}
|
|
||||||
} //for each file hit
|
} //for each term hit
|
||||||
|
}//for each file hit
|
||||||
|
|
||||||
//update artifact browser
|
//update artifact browser
|
||||||
IngestManager.fireServiceDataEvent(new ServiceDataEvent(MODULE_NAME, ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
|
IngestManager.fireServiceDataEvent(new ServiceDataEvent(MODULE_NAME, ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
|
||||||
|
@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch;
|
|||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import org.apache.solr.client.solrj.response.TermsResponse.Term;
|
import org.apache.solr.client.solrj.response.TermsResponse.Term;
|
||||||
import org.sleuthkit.datamodel.FsContent;
|
import org.sleuthkit.datamodel.FsContent;
|
||||||
|
|
||||||
@ -34,10 +35,20 @@ public interface KeywordSearchQuery {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* execute query and return results without publishing them
|
* execute query and return results without publishing them
|
||||||
|
* return results for all matching terms
|
||||||
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public List<FsContent> performQuery();
|
public List<FsContent> performQuery();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* execute query and return results without publishing them
|
||||||
|
* return results per term
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Map<String,List<FsContent>> performQueryPerTerm();
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* execute the query and publish results
|
* execute the query and publish results
|
||||||
@ -80,6 +91,16 @@ public interface KeywordSearchQuery {
|
|||||||
* @return collection of results (with cached bb artifacts/attributes) created and written
|
* @return collection of results (with cached bb artifacts/attributes) created and written
|
||||||
*/
|
*/
|
||||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName);
|
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* write results to blackboard per single term and file hit
|
||||||
|
* this method is useful if something else should keep track of partial results to write
|
||||||
|
* @param termHit term for only which to write results
|
||||||
|
* @param newFsHit fscontent for which to write results for this hit
|
||||||
|
* @param listName listname
|
||||||
|
* @return collection of results (with cached bb artifacts/attributes) created and written
|
||||||
|
*/
|
||||||
|
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,6 +35,7 @@ import org.sleuthkit.autopsy.datamodel.KeyValue;
|
|||||||
import org.sleuthkit.autopsy.keywordsearch.KeywordSearch.QueryType;
|
import org.sleuthkit.autopsy.keywordsearch.KeywordSearch.QueryType;
|
||||||
import org.sleuthkit.datamodel.FsContent;
|
import org.sleuthkit.datamodel.FsContent;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Query manager responsible for running appropriate queries and displaying results
|
* Query manager responsible for running appropriate queries and displaying results
|
||||||
* for single, multi keyword queries, with detailed or collapsed results
|
* for single, multi keyword queries, with detailed or collapsed results
|
||||||
@ -143,10 +144,16 @@ public class KeywordSearchQueryManager implements KeywordSearchQuery {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<FsContent> performQuery() {
|
public List<FsContent> performQuery() {
|
||||||
//not done here
|
throw new UnsupportedOperationException("performQuery() unsupported");
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, List<FsContent>> performQueryPerTerm() {
|
||||||
|
throw new UnsupportedOperationException("performQueryPerTerm() unsupported");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean validate() {
|
public boolean validate() {
|
||||||
boolean allValid = true;
|
boolean allValid = true;
|
||||||
@ -192,14 +199,16 @@ public class KeywordSearchQueryManager implements KeywordSearchQuery {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
||||||
Collection<KeywordWriteResult> ret = new ArrayList<KeywordWriteResult>();
|
throw new UnsupportedOperationException("writeToBlackBoard() unsupported by manager");
|
||||||
for (KeywordSearchQuery q : queryDelegates) {
|
|
||||||
ret.addAll(q.writeToBlackBoard(newFsHit, listName));
|
|
||||||
}
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName) {
|
||||||
|
throw new UnsupportedOperationException("writeToBlackBoard() unsupported by manager");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* custom KeyValue that also stores query object to execute
|
* custom KeyValue that also stores query object to execute
|
||||||
*/
|
*/
|
||||||
|
@ -23,6 +23,7 @@ import java.sql.SQLException;
|
|||||||
import java.sql.Statement;
|
import java.sql.Statement;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
@ -59,9 +60,8 @@ public class LuceneQuery implements KeywordSearchQuery {
|
|||||||
private Keyword keywordQuery = null;
|
private Keyword keywordQuery = null;
|
||||||
//use different highlight Solr fields for regex and literal search
|
//use different highlight Solr fields for regex and literal search
|
||||||
static final String HIGHLIGHT_FIELD_LITERAL = "content";
|
static final String HIGHLIGHT_FIELD_LITERAL = "content";
|
||||||
//TODO change to content_ws and in Solr schema to stored="true" to improve regex highlight matching
|
//static final String HIGHLIGHT_FIELD_REGEX = "content";
|
||||||
static final String HIGHLIGHT_FIELD_REGEX = "content";
|
static final String HIGHLIGHT_FIELD_REGEX = "content_ws";
|
||||||
//static final String HIGHLIGHT_FIELD_REGEX = "content_ws";
|
|
||||||
|
|
||||||
public LuceneQuery(Keyword keywordQuery) {
|
public LuceneQuery(Keyword keywordQuery) {
|
||||||
this(keywordQuery.getQuery());
|
this(keywordQuery.getQuery());
|
||||||
@ -160,6 +160,18 @@ public class LuceneQuery implements KeywordSearchQuery {
|
|||||||
return matches;
|
return matches;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, List<FsContent>> performQueryPerTerm() {
|
||||||
|
Map<String, List<FsContent>> results = new HashMap<String, List<FsContent>>();
|
||||||
|
//in case of single term literal query there is only 1 term, so delegate to performQuery()
|
||||||
|
results.put(query, performQuery());
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void execute() {
|
public void execute() {
|
||||||
escape();
|
escape();
|
||||||
@ -211,19 +223,28 @@ public class LuceneQuery implements KeywordSearchQuery {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
||||||
|
List<KeywordWriteResult> ret = new ArrayList<KeywordWriteResult>();
|
||||||
|
KeywordWriteResult written = writeToBlackBoard(query, newFsHit, listName);
|
||||||
|
if (written != null)
|
||||||
|
ret.add(written);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName) {
|
||||||
final String MODULE_NAME = KeywordSearchIngestService.MODULE_NAME;
|
final String MODULE_NAME = KeywordSearchIngestService.MODULE_NAME;
|
||||||
|
|
||||||
Collection<KeywordWriteResult> writeResults = new ArrayList<KeywordWriteResult>();
|
|
||||||
KeywordWriteResult writeResult = null;
|
KeywordWriteResult writeResult = null;
|
||||||
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
|
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
|
||||||
BlackboardArtifact bba = null;
|
BlackboardArtifact bba = null;
|
||||||
try {
|
try {
|
||||||
bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
||||||
writeResult = new KeywordWriteResult(bba);
|
writeResult = new KeywordWriteResult(bba);
|
||||||
writeResults.add(writeResult);
|
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
|
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
|
||||||
return writeResults;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
String snippet = null;
|
String snippet = null;
|
||||||
@ -236,7 +257,7 @@ public class LuceneQuery implements KeywordSearchQuery {
|
|||||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "", KeywordSearchUtil.escapeForBlackBoard(snippet)));
|
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "", KeywordSearchUtil.escapeForBlackBoard(snippet)));
|
||||||
}
|
}
|
||||||
//keyword
|
//keyword
|
||||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", query));
|
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", termHit));
|
||||||
//list
|
//list
|
||||||
if (listName == null) {
|
if (listName == null) {
|
||||||
listName = "";
|
listName = "";
|
||||||
@ -249,17 +270,18 @@ public class LuceneQuery implements KeywordSearchQuery {
|
|||||||
if (keywordQuery != null) {
|
if (keywordQuery != null) {
|
||||||
BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
|
BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
|
||||||
if (selType != null) {
|
if (selType != null) {
|
||||||
attributes.add(new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", query));
|
attributes.add(new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", termHit));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
bba.addAttributes(attributes); //write out to bb
|
bba.addAttributes(attributes); //write out to bb
|
||||||
writeResult.add(attributes);
|
writeResult.add(attributes);
|
||||||
|
return writeResult;
|
||||||
} catch (TskException e) {
|
} catch (TskException e) {
|
||||||
logger.log(Level.INFO, "Error adding bb attributes to artifact", e);
|
logger.log(Level.INFO, "Error adding bb attributes to artifact", e);
|
||||||
}
|
}
|
||||||
return writeResults;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -18,8 +18,6 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.URLEncoder;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@ -30,7 +28,6 @@ import java.util.Map;
|
|||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.logging.Logger;
|
import java.util.logging.Logger;
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
import java.util.regex.PatternSyntaxException;
|
import java.util.regex.PatternSyntaxException;
|
||||||
import javax.swing.SwingWorker;
|
import javax.swing.SwingWorker;
|
||||||
@ -156,9 +153,73 @@ public class TermComponentQuery implements KeywordSearchQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName) {
|
||||||
final String MODULE_NAME = KeywordSearchIngestService.MODULE_NAME;
|
final String MODULE_NAME = KeywordSearchIngestService.MODULE_NAME;
|
||||||
|
|
||||||
|
//snippet
|
||||||
|
String snippet = null;
|
||||||
|
try {
|
||||||
|
snippet = LuceneQuery.querySnippet(KeywordSearchUtil.escapeLuceneQuery(termHit, true, false), newFsHit.getId(), true);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.log(Level.INFO, "Error querying snippet: " + termHit, e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (snippet == null || snippet.equals("")) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
//there is match actually in this file, create artifact only then
|
||||||
|
BlackboardArtifact bba = null;
|
||||||
|
KeywordWriteResult writeResult = null;
|
||||||
|
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
|
||||||
|
try {
|
||||||
|
bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
||||||
|
writeResult = new KeywordWriteResult(bba);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//regex match
|
||||||
|
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", termHit));
|
||||||
|
//list
|
||||||
|
if (listName == null) {
|
||||||
|
listName = "";
|
||||||
|
}
|
||||||
|
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID(), MODULE_NAME, "", listName));
|
||||||
|
|
||||||
|
//preview
|
||||||
|
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "", snippet));
|
||||||
|
|
||||||
|
//regex keyword
|
||||||
|
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "", termsQuery));
|
||||||
|
|
||||||
|
//selector TODO move to general info artifact
|
||||||
|
/*
|
||||||
|
if (keywordQuery != null) {
|
||||||
|
BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
|
||||||
|
if (selType != null) {
|
||||||
|
BlackboardAttribute selAttr = new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", regexMatch);
|
||||||
|
attributes.add(selAttr);
|
||||||
|
}
|
||||||
|
} */
|
||||||
|
|
||||||
|
try {
|
||||||
|
bba.addAttributes(attributes);
|
||||||
|
writeResult.add(attributes);
|
||||||
|
return writeResult;
|
||||||
|
} catch (TskException e) {
|
||||||
|
logger.log(Level.INFO, "Error adding bb attributes for terms search artifact", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
||||||
Collection<KeywordWriteResult> writeResults = new ArrayList<KeywordWriteResult>();
|
Collection<KeywordWriteResult> writeResults = new ArrayList<KeywordWriteResult>();
|
||||||
|
|
||||||
//get unique term matches, all cases
|
//get unique term matches, all cases
|
||||||
@ -169,70 +230,46 @@ public class TermComponentQuery implements KeywordSearchQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (String regexMatch : matches.keySet()) {
|
for (String regexMatch : matches.keySet()) {
|
||||||
//snippet
|
KeywordWriteResult written = writeToBlackBoard(regexMatch, newFsHit, listName);
|
||||||
String snippet = null;
|
if (written != null)
|
||||||
try {
|
writeResults.add(written);
|
||||||
snippet = LuceneQuery.querySnippet(KeywordSearchUtil.escapeLuceneQuery(regexMatch, true, false), newFsHit.getId(), true);
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.log(Level.INFO, "Error querying snippet: " + regexMatch, e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (snippet == null || snippet.equals("")) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
//there is match actually in this file, create artifact only then
|
|
||||||
BlackboardArtifact bba = null;
|
|
||||||
KeywordWriteResult writeResult = null;
|
|
||||||
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
|
|
||||||
try {
|
|
||||||
bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
|
||||||
writeResult = new KeywordWriteResult(bba);
|
|
||||||
writeResults.add(writeResult);
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
//regex match
|
|
||||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", regexMatch));
|
|
||||||
//list
|
|
||||||
if (listName == null) {
|
|
||||||
listName = "";
|
|
||||||
}
|
|
||||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID(), MODULE_NAME, "", listName));
|
|
||||||
|
|
||||||
//preview
|
|
||||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "", snippet));
|
|
||||||
|
|
||||||
//regex keyword
|
|
||||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "", termsQuery));
|
|
||||||
|
|
||||||
//selector TODO move to general info artifact
|
|
||||||
/*
|
|
||||||
if (keywordQuery != null) {
|
|
||||||
BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
|
|
||||||
if (selType != null) {
|
|
||||||
BlackboardAttribute selAttr = new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", regexMatch);
|
|
||||||
attributes.add(selAttr);
|
|
||||||
}
|
|
||||||
} */
|
|
||||||
|
|
||||||
try {
|
|
||||||
bba.addAttributes(attributes);
|
|
||||||
writeResult.add(attributes);
|
|
||||||
} catch (TskException e) {
|
|
||||||
logger.log(Level.INFO, "Error adding bb attributes for terms search artifact", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
} //for each term
|
} //for each term
|
||||||
|
|
||||||
return writeResults;
|
return writeResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, List<FsContent>> performQueryPerTerm() {
|
||||||
|
Map<String, List<FsContent>> results = new HashMap<String, List<FsContent>>();
|
||||||
|
|
||||||
|
final SolrQuery q = createQuery();
|
||||||
|
terms = executeQuery(q);
|
||||||
|
|
||||||
|
|
||||||
|
for (Term term : terms) {
|
||||||
|
final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm(), true, false);
|
||||||
|
if (termS.contains("*")) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder filesQueryB = new StringBuilder();
|
||||||
|
filesQueryB.append(TERMS_SEARCH_FIELD).append(":").append(termS);
|
||||||
|
final String queryStr = filesQueryB.toString();
|
||||||
|
|
||||||
|
LuceneQuery filesQuery = new LuceneQuery(queryStr);
|
||||||
|
try {
|
||||||
|
List<FsContent> subResults = filesQuery.performQuery();
|
||||||
|
results.put(term.getTerm(), subResults);
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
logger.log(Level.SEVERE, "Error executing Solr query,", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* return collapsed matches with all files for the query
|
* return collapsed matches with all files for the query
|
||||||
* without per match breakdown
|
* without per match breakdown
|
||||||
|
Loading…
x
Reference in New Issue
Block a user