mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-15 09:17:42 +00:00
Keyword search ingest improvements: alternative performQueryPerTerm() and equivalent bb write methods - more optimal for grouping results per hit (in ingest) rather then per query.
Improves overall keyword search ingest performance. Improve regex results highlight by using content_ws field.
This commit is contained in:
parent
30a2e3f9f4
commit
69d55f7cc1
@ -534,7 +534,7 @@
|
||||
|
||||
<!-- field with white-space tokenized words for TermsComponent regex search (useful for fast search of IP addresses, URLs, certain phone numbers)
|
||||
also be useful for Lucene based queries containing special characters-->
|
||||
<field name="content_ws" type="text_ws" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="content_ws" type="text_ws" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<!-- Uncommenting the following will create a "timestamp" field using
|
||||
a default value of "NOW" to indicate when each document was indexed.
|
||||
|
@ -57,7 +57,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
public static final String MODULE_NAME = "Keyword Search";
|
||||
private static KeywordSearchIngestService instance = null;
|
||||
private IngestManagerProxy managerProxy;
|
||||
private static final long MAX_STRING_EXTRACT_SIZE = 10 * (1 << 10); // * (1 << 10);
|
||||
private static final long MAX_STRING_EXTRACT_SIZE = 1 * (1 << 10); // * (1 << 10); TODO increase
|
||||
private static final long MAX_INDEX_SIZE = 100 * (1 << 10) * (1 << 10);
|
||||
private Ingester ingester;
|
||||
private volatile boolean commitIndex = false; //whether to commit index next time
|
||||
@ -78,7 +78,6 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
private volatile boolean finalRunComplete = false;
|
||||
private final String hashDBServiceName = "Hash Lookup";
|
||||
private SleuthkitCase caseHandle = null;
|
||||
|
||||
// TODO: use a more robust method than checking file extension to determine
|
||||
// whether to try a file
|
||||
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
|
||||
@ -89,8 +88,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
|
||||
public enum IngestStatus {
|
||||
|
||||
INGESTED, EXTRACTED_INGESTED, SKIPPED,
|
||||
};
|
||||
INGESTED, EXTRACTED_INGESTED, SKIPPED,};
|
||||
private Map<Long, IngestStatus> ingestStatus;
|
||||
private Map<String, List<FsContent>> reportedHits; //already reported hits
|
||||
|
||||
@ -109,14 +107,14 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
//logger.log(Level.INFO, "hashdb result: " + hashDBResult + "file: " + fsContent.getName());
|
||||
if (hashDBResult == IngestServiceFsContent.ProcessResult.COND_STOP) {
|
||||
return ProcessResult.OK;
|
||||
}
|
||||
else if (hashDBResult == IngestServiceFsContent.ProcessResult.ERROR) {
|
||||
} else if (hashDBResult == IngestServiceFsContent.ProcessResult.ERROR) {
|
||||
//notify depending service that keyword search (would) encountered error for this file
|
||||
return ProcessResult.ERROR;
|
||||
}
|
||||
|
||||
if (processedFiles == false)
|
||||
|
||||
if (processedFiles == false) {
|
||||
processedFiles = true;
|
||||
}
|
||||
|
||||
//check if time to commit and previous search is not running
|
||||
//commiting while searching causes performance issues
|
||||
@ -201,7 +199,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
caseHandle = Case.getCurrentCase().getSleuthkitCase();
|
||||
|
||||
this.managerProxy = managerProxy;
|
||||
|
||||
|
||||
//this deregisters previously registered listeners at every init()
|
||||
pcs = new PropertyChangeSupport(KeywordSearchIngestService.class);
|
||||
|
||||
@ -286,16 +284,16 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
//no need to check timer thread
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public synchronized boolean backgroundJobsCompleteListener(PropertyChangeListener l) {
|
||||
if (finalRunComplete == true)
|
||||
if (finalRunComplete == true) {
|
||||
return false;
|
||||
else {
|
||||
} else {
|
||||
pcs.addPropertyChangeListener(l);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private void commit() {
|
||||
@ -504,7 +502,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
}
|
||||
}
|
||||
|
||||
private class Searcher extends SwingWorker<Object,Void> {
|
||||
private class Searcher extends SwingWorker<Object, Void> {
|
||||
|
||||
private List<Keyword> keywords;
|
||||
private ProgressHandle progress;
|
||||
@ -547,52 +545,69 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
|
||||
KeywordSearchQuery del = null;
|
||||
|
||||
if (keywordQuery.isLiteral()) {
|
||||
boolean isRegex = !keywordQuery.isLiteral();
|
||||
if (!isRegex) {
|
||||
del = new LuceneQuery(keywordQuery);
|
||||
del.escape();
|
||||
} else {
|
||||
del = new TermComponentQuery(keywordQuery);
|
||||
}
|
||||
|
||||
List<FsContent> queryResult = null;
|
||||
Map<String, List<FsContent>> queryResult = null;
|
||||
|
||||
try {
|
||||
queryResult = del.performQuery();
|
||||
queryResult = del.performQueryPerTerm();
|
||||
} catch (Exception e) {
|
||||
logger.log(Level.INFO, "Error performing query: " + keywordQuery.getQuery(), e);
|
||||
continue;
|
||||
}
|
||||
|
||||
//calculate new results but substracting results already obtained in this run
|
||||
List<FsContent> newResults = new ArrayList<FsContent>();
|
||||
Map<Keyword, List<FsContent>> newResults = new HashMap<Keyword, List<FsContent>>();
|
||||
|
||||
List<FsContent> curResults = currentResults.get(keywordQuery);
|
||||
if (curResults == null) {
|
||||
currentResults.put(keywordQuery, queryResult);
|
||||
newResults = queryResult;
|
||||
} else {
|
||||
for (FsContent res : queryResult) {
|
||||
if (!curResults.contains(res)) {
|
||||
//add to new results
|
||||
newResults.add(res);
|
||||
for (String termResult : queryResult.keySet()) {
|
||||
List<FsContent> queryTermResults = queryResult.get(termResult);
|
||||
Keyword termResultK = new Keyword(termResult, !isRegex);
|
||||
List<FsContent> curTermResults = currentResults.get(termResultK);
|
||||
if (curTermResults == null) {
|
||||
currentResults.put(termResultK, queryTermResults);
|
||||
newResults.put(termResultK, queryTermResults);
|
||||
} else {
|
||||
//some fscontent hits already exist for this keyword
|
||||
for (FsContent res : queryTermResults) {
|
||||
if (!curTermResults.contains(res)) {
|
||||
//add to new results
|
||||
List<FsContent> newResultsFs = newResults.get(termResultK);
|
||||
if (newResultsFs == null) {
|
||||
newResultsFs = new ArrayList<FsContent>();
|
||||
newResults.put(termResultK, newResultsFs);
|
||||
}
|
||||
newResultsFs.add(res);
|
||||
curTermResults.add(res);
|
||||
}
|
||||
}
|
||||
}
|
||||
//update current result with new ones
|
||||
curResults.addAll(newResults);
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (!newResults.isEmpty()) {
|
||||
|
||||
//write results to BB
|
||||
Collection<BlackboardArtifact> newArtifacts = new ArrayList<BlackboardArtifact>(); //new artifacts to report
|
||||
for (FsContent hitFile : newResults) {
|
||||
if (this.isCancelled()) {
|
||||
return null;
|
||||
}
|
||||
Collection<KeywordWriteResult> written = del.writeToBlackBoard(hitFile, listName);
|
||||
for (KeywordWriteResult res : written) {
|
||||
newArtifacts.add(res.getArtifact());
|
||||
for (final Keyword hitTerm : newResults.keySet()) {
|
||||
List<FsContent> fsContentHits = newResults.get(hitTerm);
|
||||
for (final FsContent hitFile : fsContentHits) {
|
||||
if (this.isCancelled()) {
|
||||
return null;
|
||||
}
|
||||
KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, listName);
|
||||
if (written == null) {
|
||||
logger.log(Level.INFO, "BB artifact for keyword not written: " + hitTerm.toString());
|
||||
continue;
|
||||
}
|
||||
|
||||
newArtifacts.add(written.getArtifact());
|
||||
|
||||
//generate a data message for each artifact
|
||||
StringBuilder subjectSb = new StringBuilder();
|
||||
@ -606,7 +621,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
}
|
||||
subjectSb.append("<");
|
||||
String uniqueKey = null;
|
||||
BlackboardAttribute attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
|
||||
BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
|
||||
if (attr != null) {
|
||||
final String keyword = attr.getValueString();
|
||||
subjectSb.append(keyword);
|
||||
@ -625,7 +640,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
detailsSb.append("</tr>");
|
||||
|
||||
//preview
|
||||
attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
|
||||
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
|
||||
if (attr != null) {
|
||||
detailsSb.append("<tr>");
|
||||
detailsSb.append("<th>Preview</th>");
|
||||
@ -642,7 +657,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
|
||||
|
||||
//list
|
||||
attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID());
|
||||
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID());
|
||||
detailsSb.append("<tr>");
|
||||
detailsSb.append("<th>List</th>");
|
||||
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
|
||||
@ -650,7 +665,7 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
|
||||
//regex
|
||||
if (!keywordQuery.isLiteral()) {
|
||||
attr = res.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
|
||||
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
|
||||
if (attr != null) {
|
||||
detailsSb.append("<tr>");
|
||||
detailsSb.append("<th>RegEx</th>");
|
||||
@ -661,9 +676,10 @@ public final class KeywordSearchIngestService implements IngestServiceFsContent
|
||||
}
|
||||
detailsSb.append("</table>");
|
||||
|
||||
managerProxy.postMessage(IngestMessage.createDataMessage(++messageID, instance, subjectSb.toString(), detailsSb.toString(), uniqueKey, res.getArtifact()));
|
||||
}
|
||||
} //for each file hit
|
||||
managerProxy.postMessage(IngestMessage.createDataMessage(++messageID, instance, subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact()));
|
||||
|
||||
} //for each term hit
|
||||
}//for each file hit
|
||||
|
||||
//update artifact browser
|
||||
IngestManager.fireServiceDataEvent(new ServiceDataEvent(MODULE_NAME, ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
|
||||
|
@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.apache.solr.client.solrj.response.TermsResponse.Term;
|
||||
import org.sleuthkit.datamodel.FsContent;
|
||||
|
||||
@ -34,10 +35,20 @@ public interface KeywordSearchQuery {
|
||||
|
||||
/**
|
||||
* execute query and return results without publishing them
|
||||
* return results for all matching terms
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public List<FsContent> performQuery();
|
||||
|
||||
/**
|
||||
* execute query and return results without publishing them
|
||||
* return results per term
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Map<String,List<FsContent>> performQueryPerTerm();
|
||||
|
||||
|
||||
/**
|
||||
* execute the query and publish results
|
||||
@ -80,6 +91,16 @@ public interface KeywordSearchQuery {
|
||||
* @return collection of results (with cached bb artifacts/attributes) created and written
|
||||
*/
|
||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName);
|
||||
|
||||
/**
|
||||
* write results to blackboard per single term and file hit
|
||||
* this method is useful if something else should keep track of partial results to write
|
||||
* @param termHit term for only which to write results
|
||||
* @param newFsHit fscontent for which to write results for this hit
|
||||
* @param listName listname
|
||||
* @return collection of results (with cached bb artifacts/attributes) created and written
|
||||
*/
|
||||
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName);
|
||||
|
||||
}
|
||||
|
||||
|
@ -35,6 +35,7 @@ import org.sleuthkit.autopsy.datamodel.KeyValue;
|
||||
import org.sleuthkit.autopsy.keywordsearch.KeywordSearch.QueryType;
|
||||
import org.sleuthkit.datamodel.FsContent;
|
||||
|
||||
|
||||
/**
|
||||
* Query manager responsible for running appropriate queries and displaying results
|
||||
* for single, multi keyword queries, with detailed or collapsed results
|
||||
@ -143,10 +144,16 @@ public class KeywordSearchQueryManager implements KeywordSearchQuery {
|
||||
|
||||
@Override
|
||||
public List<FsContent> performQuery() {
|
||||
//not done here
|
||||
return null;
|
||||
throw new UnsupportedOperationException("performQuery() unsupported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, List<FsContent>> performQueryPerTerm() {
|
||||
throw new UnsupportedOperationException("performQueryPerTerm() unsupported");
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public boolean validate() {
|
||||
boolean allValid = true;
|
||||
@ -192,14 +199,16 @@ public class KeywordSearchQueryManager implements KeywordSearchQuery {
|
||||
|
||||
@Override
|
||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
||||
Collection<KeywordWriteResult> ret = new ArrayList<KeywordWriteResult>();
|
||||
for (KeywordSearchQuery q : queryDelegates) {
|
||||
ret.addAll(q.writeToBlackBoard(newFsHit, listName));
|
||||
}
|
||||
return ret;
|
||||
throw new UnsupportedOperationException("writeToBlackBoard() unsupported by manager");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName) {
|
||||
throw new UnsupportedOperationException("writeToBlackBoard() unsupported by manager");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
/**
|
||||
* custom KeyValue that also stores query object to execute
|
||||
*/
|
||||
|
@ -23,6 +23,7 @@ import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.logging.Level;
|
||||
@ -59,9 +60,8 @@ public class LuceneQuery implements KeywordSearchQuery {
|
||||
private Keyword keywordQuery = null;
|
||||
//use different highlight Solr fields for regex and literal search
|
||||
static final String HIGHLIGHT_FIELD_LITERAL = "content";
|
||||
//TODO change to content_ws and in Solr schema to stored="true" to improve regex highlight matching
|
||||
static final String HIGHLIGHT_FIELD_REGEX = "content";
|
||||
//static final String HIGHLIGHT_FIELD_REGEX = "content_ws";
|
||||
//static final String HIGHLIGHT_FIELD_REGEX = "content";
|
||||
static final String HIGHLIGHT_FIELD_REGEX = "content_ws";
|
||||
|
||||
public LuceneQuery(Keyword keywordQuery) {
|
||||
this(keywordQuery.getQuery());
|
||||
@ -160,6 +160,18 @@ public class LuceneQuery implements KeywordSearchQuery {
|
||||
return matches;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, List<FsContent>> performQueryPerTerm() {
|
||||
Map<String, List<FsContent>> results = new HashMap<String, List<FsContent>>();
|
||||
//in case of single term literal query there is only 1 term, so delegate to performQuery()
|
||||
results.put(query, performQuery());
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public void execute() {
|
||||
escape();
|
||||
@ -211,19 +223,28 @@ public class LuceneQuery implements KeywordSearchQuery {
|
||||
|
||||
@Override
|
||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
||||
List<KeywordWriteResult> ret = new ArrayList<KeywordWriteResult>();
|
||||
KeywordWriteResult written = writeToBlackBoard(query, newFsHit, listName);
|
||||
if (written != null)
|
||||
ret.add(written);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName) {
|
||||
final String MODULE_NAME = KeywordSearchIngestService.MODULE_NAME;
|
||||
|
||||
Collection<KeywordWriteResult> writeResults = new ArrayList<KeywordWriteResult>();
|
||||
KeywordWriteResult writeResult = null;
|
||||
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
|
||||
BlackboardArtifact bba = null;
|
||||
try {
|
||||
bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
||||
writeResult = new KeywordWriteResult(bba);
|
||||
writeResults.add(writeResult);
|
||||
} catch (Exception e) {
|
||||
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
|
||||
return writeResults;
|
||||
return null;
|
||||
}
|
||||
|
||||
String snippet = null;
|
||||
@ -236,7 +257,7 @@ public class LuceneQuery implements KeywordSearchQuery {
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "", KeywordSearchUtil.escapeForBlackBoard(snippet)));
|
||||
}
|
||||
//keyword
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", query));
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", termHit));
|
||||
//list
|
||||
if (listName == null) {
|
||||
listName = "";
|
||||
@ -249,17 +270,18 @@ public class LuceneQuery implements KeywordSearchQuery {
|
||||
if (keywordQuery != null) {
|
||||
BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
|
||||
if (selType != null) {
|
||||
attributes.add(new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", query));
|
||||
attributes.add(new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", termHit));
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
bba.addAttributes(attributes); //write out to bb
|
||||
writeResult.add(attributes);
|
||||
return writeResult;
|
||||
} catch (TskException e) {
|
||||
logger.log(Level.INFO, "Error adding bb attributes to artifact", e);
|
||||
}
|
||||
return writeResults;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -18,8 +18,6 @@
|
||||
*/
|
||||
package org.sleuthkit.autopsy.keywordsearch;
|
||||
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@ -30,7 +28,6 @@ import java.util.Map;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.PatternSyntaxException;
|
||||
import javax.swing.SwingWorker;
|
||||
@ -156,9 +153,73 @@ public class TermComponentQuery implements KeywordSearchQuery {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
||||
public KeywordWriteResult writeToBlackBoard(String termHit, FsContent newFsHit, String listName) {
|
||||
final String MODULE_NAME = KeywordSearchIngestService.MODULE_NAME;
|
||||
|
||||
//snippet
|
||||
String snippet = null;
|
||||
try {
|
||||
snippet = LuceneQuery.querySnippet(KeywordSearchUtil.escapeLuceneQuery(termHit, true, false), newFsHit.getId(), true);
|
||||
} catch (Exception e) {
|
||||
logger.log(Level.INFO, "Error querying snippet: " + termHit, e);
|
||||
return null;
|
||||
}
|
||||
|
||||
if (snippet == null || snippet.equals("")) {
|
||||
return null;
|
||||
}
|
||||
|
||||
//there is match actually in this file, create artifact only then
|
||||
BlackboardArtifact bba = null;
|
||||
KeywordWriteResult writeResult = null;
|
||||
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
|
||||
try {
|
||||
bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
||||
writeResult = new KeywordWriteResult(bba);
|
||||
} catch (Exception e) {
|
||||
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
//regex match
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", termHit));
|
||||
//list
|
||||
if (listName == null) {
|
||||
listName = "";
|
||||
}
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID(), MODULE_NAME, "", listName));
|
||||
|
||||
//preview
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "", snippet));
|
||||
|
||||
//regex keyword
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "", termsQuery));
|
||||
|
||||
//selector TODO move to general info artifact
|
||||
/*
|
||||
if (keywordQuery != null) {
|
||||
BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
|
||||
if (selType != null) {
|
||||
BlackboardAttribute selAttr = new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", regexMatch);
|
||||
attributes.add(selAttr);
|
||||
}
|
||||
} */
|
||||
|
||||
try {
|
||||
bba.addAttributes(attributes);
|
||||
writeResult.add(attributes);
|
||||
return writeResult;
|
||||
} catch (TskException e) {
|
||||
logger.log(Level.INFO, "Error adding bb attributes for terms search artifact", e);
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<KeywordWriteResult> writeToBlackBoard(FsContent newFsHit, String listName) {
|
||||
Collection<KeywordWriteResult> writeResults = new ArrayList<KeywordWriteResult>();
|
||||
|
||||
//get unique term matches, all cases
|
||||
@ -169,70 +230,46 @@ public class TermComponentQuery implements KeywordSearchQuery {
|
||||
}
|
||||
|
||||
for (String regexMatch : matches.keySet()) {
|
||||
//snippet
|
||||
String snippet = null;
|
||||
try {
|
||||
snippet = LuceneQuery.querySnippet(KeywordSearchUtil.escapeLuceneQuery(regexMatch, true, false), newFsHit.getId(), true);
|
||||
} catch (Exception e) {
|
||||
logger.log(Level.INFO, "Error querying snippet: " + regexMatch, e);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (snippet == null || snippet.equals("")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//there is match actually in this file, create artifact only then
|
||||
BlackboardArtifact bba = null;
|
||||
KeywordWriteResult writeResult = null;
|
||||
Collection<BlackboardAttribute> attributes = new ArrayList<BlackboardAttribute>();
|
||||
try {
|
||||
bba = newFsHit.newArtifact(ARTIFACT_TYPE.TSK_KEYWORD_HIT);
|
||||
writeResult = new KeywordWriteResult(bba);
|
||||
writeResults.add(writeResult);
|
||||
} catch (Exception e) {
|
||||
logger.log(Level.INFO, "Error adding bb artifact for keyword hit", e);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
//regex match
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID(), MODULE_NAME, "", regexMatch));
|
||||
//list
|
||||
if (listName == null) {
|
||||
listName = "";
|
||||
}
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_SET.getTypeID(), MODULE_NAME, "", listName));
|
||||
|
||||
//preview
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID(), MODULE_NAME, "", snippet));
|
||||
|
||||
//regex keyword
|
||||
attributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID(), MODULE_NAME, "", termsQuery));
|
||||
|
||||
//selector TODO move to general info artifact
|
||||
/*
|
||||
if (keywordQuery != null) {
|
||||
BlackboardAttribute.ATTRIBUTE_TYPE selType = keywordQuery.getType();
|
||||
if (selType != null) {
|
||||
BlackboardAttribute selAttr = new BlackboardAttribute(selType.getTypeID(), MODULE_NAME, "", regexMatch);
|
||||
attributes.add(selAttr);
|
||||
}
|
||||
} */
|
||||
|
||||
try {
|
||||
bba.addAttributes(attributes);
|
||||
writeResult.add(attributes);
|
||||
} catch (TskException e) {
|
||||
logger.log(Level.INFO, "Error adding bb attributes for terms search artifact", e);
|
||||
}
|
||||
|
||||
|
||||
KeywordWriteResult written = writeToBlackBoard(regexMatch, newFsHit, listName);
|
||||
if (written != null)
|
||||
writeResults.add(written);
|
||||
} //for each term
|
||||
|
||||
return writeResults;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, List<FsContent>> performQueryPerTerm() {
|
||||
Map<String, List<FsContent>> results = new HashMap<String, List<FsContent>>();
|
||||
|
||||
final SolrQuery q = createQuery();
|
||||
terms = executeQuery(q);
|
||||
|
||||
|
||||
for (Term term : terms) {
|
||||
final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm(), true, false);
|
||||
if (termS.contains("*")) {
|
||||
continue;
|
||||
}
|
||||
|
||||
StringBuilder filesQueryB = new StringBuilder();
|
||||
filesQueryB.append(TERMS_SEARCH_FIELD).append(":").append(termS);
|
||||
final String queryStr = filesQueryB.toString();
|
||||
|
||||
LuceneQuery filesQuery = new LuceneQuery(queryStr);
|
||||
try {
|
||||
List<FsContent> subResults = filesQuery.performQuery();
|
||||
results.put(term.getTerm(), subResults);
|
||||
} catch (RuntimeException e) {
|
||||
logger.log(Level.SEVERE, "Error executing Solr query,", e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* return collapsed matches with all files for the query
|
||||
* without per match breakdown
|
||||
|
Loading…
x
Reference in New Issue
Block a user