Clean up and better document QueryResults class

This commit is contained in:
Richard Cordovano 2017-10-14 07:52:36 -04:00
parent 284e6f64fe
commit 8ac6e71afe
4 changed files with 170 additions and 94 deletions

View File

@ -104,13 +104,13 @@ class HighlightedText implements IndexedText {
/** /**
* This constructor is used when keyword hits are accessed from the ad-hoc * This constructor is used when keyword hits are accessed from the ad-hoc
* search results. In that case we have the entire QueryResults object and * search results. In that case we have the entire QueryResults object and
* need to arrange the paging. need to arrange the paging.
* *
* @param objectId The objectID of the content whose text will be * @param objectId The objectID of the content whose text will be
* highlighted. * highlighted.
* @param QueryResults The QueryResults for the ad-hoc search from whose * @param QueryResults The QueryResults for the ad-hoc search from whose
* results a selection was made leading to this results a selection was made leading to this
* HighlightedText. HighlightedText.
*/ */
HighlightedText(long objectId, QueryResults hits) { HighlightedText(long objectId, QueryResults hits) {
this.objectId = objectId; this.objectId = objectId;

View File

@ -331,7 +331,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
final String queryDisp = queryStr.length() > QUERY_DISPLAY_LEN ? queryStr.substring(0, QUERY_DISPLAY_LEN - 1) + " ..." : queryStr; final String queryDisp = queryStr.length() > QUERY_DISPLAY_LEN ? queryStr.substring(0, QUERY_DISPLAY_LEN - 1) + " ..." : queryStr;
try { try {
progress = ProgressHandle.createHandle(NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.progress.saving", queryDisp), () -> BlackboardResultWriter.this.cancel(true)); progress = ProgressHandle.createHandle(NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.progress.saving", queryDisp), () -> BlackboardResultWriter.this.cancel(true));
hits.writeAllHitsToBlackBoard(progress, null, this, false); hits.process(progress, null, this, false);
} finally { } finally {
finalizeWorker(); finalizeWorker();
} }

View File

@ -45,195 +45,272 @@ import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskCoreException;
/** /**
* Stores the results from running a Solr query (which could contain multiple * Stores and processes the results of a keyword search query. Processing
* keywords). * includes posting keyword hit artifacts to the blackboard, sending messages
* * about the search hits to the ingest inbox, and publishing an event to notify
* subscribers of the blackboard posts.
*/ */
class QueryResults { class QueryResults {
private static final Logger logger = Logger.getLogger(QueryResults.class.getName()); private static final Logger logger = Logger.getLogger(QueryResults.class.getName());
private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName(); private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName();
/** private final KeywordSearchQuery query;
* The query that generated the results.
*/
private final KeywordSearchQuery keywordSearchQuery;
/**
* A map of keywords to keyword hits.
*/
private final Map<Keyword, List<KeywordHit>> results = new HashMap<>(); private final Map<Keyword, List<KeywordHit>> results = new HashMap<>();
/**
* Constructs a object that stores and processes the results of a keyword
* search query. Processing includes posting keyword hit artifacts to the
* blackboard, sending messages about the search hits to the ingest inbox,
* and publishing an event to notify subscribers of the blackboard posts.
*
* @param query The query.
*/
QueryResults(KeywordSearchQuery query) { QueryResults(KeywordSearchQuery query) {
this.keywordSearchQuery = query; this.query = query;
} }
/**
* Gets the keyword search query that generated the results stored in this
* object.
*
* @return The query.
*/
KeywordSearchQuery getQuery() {
return query;
}
/**
* Adds the keyword hits for a keyword to the hits that are stored in this
* object. All calls to this method MUST be completed before calling the
* process method.
*
* @param keyword The keyword,
* @param hits The hits.
*/
void addResult(Keyword keyword, List<KeywordHit> hits) { void addResult(Keyword keyword, List<KeywordHit> hits) {
results.put(keyword, hits); results.put(keyword, hits);
} }
KeywordSearchQuery getQuery() { /**
return keywordSearchQuery; * Gets the keyword hits stored in this object for a given keyword.
} *
* @param keyword The keyword.
*
* @return The keyword hits.
*/
List<KeywordHit> getResults(Keyword keyword) { List<KeywordHit> getResults(Keyword keyword) {
return results.get(keyword); return results.get(keyword);
} }
/**
* Gets the set of unique keywords for which keyword hits have been stored
* in this object.
*
* @return
*/
Set<Keyword> getKeywords() { Set<Keyword> getKeywords() {
return results.keySet(); return results.keySet();
} }
/** /**
* Writes the keyword hits encapsulated in this query result to the * Processes the keyword hits stored in this object by osting keyword hit
* blackboard. Makes one artifact per keyword per searched object (file or * artifacts to the blackboard, sending messages about the search hits to
* artifact), i.e., if a keyword is found several times in the object, only * the ingest inbox, and publishing an event to notify subscribers of the
* one artifact is created. * blackboard posts.
* *
* @param progress Can be null. * Makes one artifact per keyword per searched text source object (file or
* @param subProgress Can be null. * artifact), i.e., if a keyword is found several times in the text
* @param worker The Swing worker that is writing the hits, needed to * extracted from the source object, only one artifact is created.
* support cancellation. *
* @param notifyInbox Whether or not write a message to the ingest messages * This method ASSUMES that the processing is being done using a SwingWorker
* inbox. * that should be checked for task cancellation.
*
* All calls to the addResult method MUST be completed before calling this
* method.
*
* @param progress A progress indicator that reports the number of
* keywords processed. Can be null.
* @param subProgress A progress contributor that reports the keyword
* currently being processed. Can be null.
* @param worker The SwingWorker that is being used to do the
* processing, will be checked for task cancellation
* before processing each keyword.
* @param notifyInbox Whether or not to write a message to the ingest
* messages inbox if there is a keyword hit in the text
* exrtacted from the text source object.
* *
* @return The artifacts that were created.
*/ */
Collection<BlackboardArtifact> writeAllHitsToBlackBoard(ProgressHandle progress, ProgressContributor subProgress, SwingWorker<?, ?> worker, boolean notifyInbox) { void process(ProgressHandle progress, ProgressContributor subProgress, SwingWorker<?, ?> worker, boolean notifyInbox) {
final Collection<BlackboardArtifact> newArtifacts = new ArrayList<>(); /*
if (progress != null) { * Initialize the progress indicator to the number of keywords that will
* be processed.
*/
if (null != progress) {
progress.start(getKeywords().size()); progress.start(getKeywords().size());
} }
int unitProgress = 0;
/*
* Process the keyword hits for each keyword.
*/
int keywordsProcessed = 0;
final Collection<BlackboardArtifact> hitArtifacts = new ArrayList<>();
for (final Keyword keyword : getKeywords()) { for (final Keyword keyword : getKeywords()) {
/*
* Cancellation check.
*/
if (worker.isCancelled()) { if (worker.isCancelled()) {
logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keyword.getSearchTerm()); //NON-NLS logger.log(Level.INFO, "Processing cancelled, exiting before processing search term {0}", keyword.getSearchTerm()); //NON-NLS
break; break;
} }
// Update progress object(s), if any /*
* Update the progress indicator and the show the current keyword
* via the progress contributor.
*/
if (progress != null) { if (progress != null) {
progress.progress(keyword.toString(), unitProgress); progress.progress(keyword.toString(), keywordsProcessed);
} }
if (subProgress != null) { if (subProgress != null) {
String hitDisplayStr = keyword.getSearchTerm(); String hitDisplayStr = keyword.getSearchTerm();
if (hitDisplayStr.length() > 50) { if (hitDisplayStr.length() > 50) {
hitDisplayStr = hitDisplayStr.substring(0, 49) + "..."; hitDisplayStr = hitDisplayStr.substring(0, 49) + "...";
} }
subProgress.progress(keywordSearchQuery.getKeywordList().getName() + ": " + hitDisplayStr, unitProgress); subProgress.progress(query.getKeywordList().getName() + ": " + hitDisplayStr, keywordsProcessed);
} }
for (KeywordHit hit : getOneHitPerObject(keyword)) { /*
String termString = keyword.getSearchTerm(); * Reduce the hits for this keyword to one hit per text source
* object so that only one hit artifact is generated per text source
* object, no matter how many times the keyword was actually found.
*/
for (KeywordHit hit : getOneHitPerTextSourceObject(keyword)) {
/*
* Get a snippet (preview) for the hit. Regex queries always
* have snippets made from the content_str pulled back from Solr
* for executing the search. Other types of queries may or may
* not have snippets yet.
*/
String snippet = hit.getSnippet(); String snippet = hit.getSnippet();
if (StringUtils.isBlank(snippet)) { if (StringUtils.isBlank(snippet)) {
final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString); final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(keyword.getSearchTerm());
try { try {
/* snippet = LuceneQuery.querySnippet(snippetQuery, hit.getSolrObjectId(), hit.getChunkId(), !query.isLiteral(), true);
* this doesn't work for regex queries... But that is
* okay because regex queries always have snippets made
* from the content_str field we pull back from Solr
*/
snippet = LuceneQuery.querySnippet(snippetQuery, hit.getSolrObjectId(), hit.getChunkId(), !keywordSearchQuery.isLiteral(), true);
} catch (NoOpenCoreException e) { } catch (NoOpenCoreException e) {
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); //NON-NLS logger.log(Level.SEVERE, "Solr core closed while executing snippet query " + snippetQuery, e); //NON-NLS
//no reason to continue break; // Stop processing.
break;
} catch (Exception e) { } catch (Exception e) {
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); //NON-NLS logger.log(Level.SEVERE, "Error executing snippet query " + snippetQuery, e); //NON-NLS
continue; continue; // Try processing the next hit.
} }
} }
/*
* Get the content (file or artifact) that is the text source
* for the hit.
*/
Content content = null; Content content = null;
try { try {
SleuthkitCase tskCase = Case.getCurrentCase().getSleuthkitCase(); SleuthkitCase tskCase = Case.getCurrentCase().getSleuthkitCase();
content = tskCase.getContentById(hit.getContentID()); content = tskCase.getContentById(hit.getContentID());
} catch (TskCoreException | IllegalStateException tskCoreException) { } catch (TskCoreException | IllegalStateException tskCoreException) {
logger.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", tskCoreException); //NON-NLS logger.log(Level.SEVERE, "Failed to get text source object for ", tskCoreException); //NON-NLS
return null;
} }
BlackboardArtifact writeResult = keywordSearchQuery.writeSingleFileHitsToBlackBoard(content, keyword, hit, snippet, keywordSearchQuery.getKeywordList().getName());
if (writeResult != null) { /*
newArtifacts.add(writeResult); * Post an artifact for the hit to the blackboard.
*/
BlackboardArtifact artifact = query.writeSingleFileHitsToBlackBoard(content, keyword, hit, snippet, query.getKeywordList().getName());
if (null == artifact) {
logger.log(Level.SEVERE, "Error posting keyword hit artifact for keyword {0} in {1} to the blackboard", new Object[]{keyword.toString(), content}); //NON-NLS
}
/*
* Send an ingest inbox message for the hit.
*/
if (null != artifact) {
hitArtifacts.add(artifact);
if (notifyInbox) { if (notifyInbox) {
try { try {
writeSingleFileInboxMessage(writeResult, content); writeSingleFileInboxMessage(artifact, content);
} catch (TskCoreException ex) { } catch (TskCoreException ex) {
logger.log(Level.WARNING, "Error posting message to Ingest Inbox", ex); //NON-NLS logger.log(Level.SEVERE, "Error sending message to ingest messages inbox", ex); //NON-NLS
} }
} }
} else {
logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{content, keyword.toString()}); //NON-NLS
} }
} }
++unitProgress;
++keywordsProcessed;
} }
// Update artifact browser /*
if (!newArtifacts.isEmpty()) { * Publish an event to notify subscribers of the blackboard posts. The
newArtifacts.stream() * artifacts are grouped by type, since they may contain both
//group artifacts by type * TSK_KEYWORD_HIT artifacts and TSK_ACCOUNT artifacts (for credit card
* account number hits).
*/
if (!hitArtifacts.isEmpty()) {
hitArtifacts.stream()
// Group artifacts by type
.collect(Collectors.groupingBy(BlackboardArtifact::getArtifactTypeID)) .collect(Collectors.groupingBy(BlackboardArtifact::getArtifactTypeID))
//for each type send an event // For each type send an event
.forEach((typeID, artifacts) .forEach((typeID, artifacts)
-> IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(MODULE_NAME, BlackboardArtifact.ARTIFACT_TYPE.fromID(typeID), artifacts))); -> IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(MODULE_NAME, BlackboardArtifact.ARTIFACT_TYPE.fromID(typeID), artifacts)));
} }
return newArtifacts;
} }
/** /**
* Gets the first hit of the keyword. * Reduce the hits for a given keyword to one hit per text source object so
* that only one hit artifact is generated per text source object, no matter
* how many times the keyword was actually found.
* *
* @param keyword * @param keyword The keyword.
* *
* @return Collection<KeywordHit> containing KeywordHits with lowest * @return Collection<KeywordHit> The reduced set of keyword hits.
* SolrObjectID-ChunkID pairs.
*/ */
private Collection<KeywordHit> getOneHitPerObject(Keyword keyword) { private Collection<KeywordHit> getOneHitPerTextSourceObject(Keyword keyword) {
HashMap<Long, KeywordHit> hits = new HashMap<>(); /*
* For each Solr document (chunk) for a text source object, return only
// create a list of KeywordHits. KeywordHits with lowest chunkID is added the the list. * a single keyword hit from the first chunk of text (the one with the
for (KeywordHit hit : getResults(keyword)) { * lowest chunk id).
*/
HashMap< Long, KeywordHit> hits = new HashMap<>();
getResults(keyword).forEach((hit) -> {
if (!hits.containsKey(hit.getSolrObjectId())) { if (!hits.containsKey(hit.getSolrObjectId())) {
hits.put(hit.getSolrObjectId(), hit); hits.put(hit.getSolrObjectId(), hit);
} else if (hit.getChunkId() < hits.get(hit.getSolrObjectId()).getChunkId()) { } else if (hit.getChunkId() < hits.get(hit.getSolrObjectId()).getChunkId()) {
hits.put(hit.getSolrObjectId(), hit); hits.put(hit.getSolrObjectId(), hit);
} }
} });
return hits.values(); return hits.values();
} }
/** /**
* Generate and post an ingest inbox message for the given keyword in the * Send an ingest inbox message indicating that there was a keyword hit in
* given content. * the given text source object.
* *
* @param artifact The keyword hit artifact. * @param artifact The keyword hit artifact for the hit.
* @param hitContent The content that the hit is in. * @param hitContent The text source object.
* *
* @throws TskCoreException If there is a problem generating or posting the * @throws TskCoreException If there is a problem generating or send the
* inbox message. * inbox message.
*/ */
private void writeSingleFileInboxMessage(BlackboardArtifact artifact, Content hitContent) throws TskCoreException { private void writeSingleFileInboxMessage(BlackboardArtifact artifact, Content hitContent) throws TskCoreException {
StringBuilder subjectSb = new StringBuilder(); StringBuilder subjectSb = new StringBuilder(1024);
StringBuilder detailsSb = new StringBuilder(); if (!query.isLiteral()) {
if (!keywordSearchQuery.isLiteral()) {
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl")); subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl"));
} else { } else {
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl")); subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl"));
} }
StringBuilder detailsSb = new StringBuilder(1024);
String uniqueKey = null; String uniqueKey = null;
BlackboardAttribute attr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD)); BlackboardAttribute attr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD));
if (attr != null) { if (attr != null) {
final String keyword = attr.getValueString(); final String keyword = attr.getValueString();
subjectSb.append(keyword); subjectSb.append(keyword);
uniqueKey = keyword.toLowerCase(); uniqueKey = keyword.toLowerCase();
//details
detailsSb.append("<table border='0' cellpadding='4' width='280'>"); //NON-NLS detailsSb.append("<table border='0' cellpadding='4' width='280'>"); //NON-NLS
//hit
detailsSb.append("<tr>"); //NON-NLS detailsSb.append("<tr>"); //NON-NLS
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitThLbl")); detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitThLbl"));
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(keyword)).append("</td>"); //NON-NLS detailsSb.append("<td>").append(EscapeUtil.escapeHtml(keyword)).append("</td>"); //NON-NLS
@ -270,7 +347,7 @@ class QueryResults {
} }
//regex //regex
if (!keywordSearchQuery.isLiteral()) { if (!query.isLiteral()) {
attr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP)); attr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP));
if (attr != null) { if (attr != null) {
detailsSb.append("<tr>"); //NON-NLS detailsSb.append("<tr>"); //NON-NLS
@ -279,7 +356,6 @@ class QueryResults {
detailsSb.append("</tr>"); //NON-NLS detailsSb.append("</tr>"); //NON-NLS
} }
} }
detailsSb.append("</table>"); //NON-NLS detailsSb.append("</table>"); //NON-NLS
IngestServices.getInstance().postMessage(IngestMessage.createDataMessage(MODULE_NAME, subjectSb.toString(), detailsSb.toString(), uniqueKey, artifact)); IngestServices.getInstance().postMessage(IngestMessage.createDataMessage(MODULE_NAME, subjectSb.toString(), detailsSb.toString(), uniqueKey, artifact));

View File

@ -492,7 +492,7 @@ public final class SearchRunner {
subProgresses[keywordsSearched].progress(keywordList.getName() + ": " + queryDisplayStr, unitProgress); subProgresses[keywordsSearched].progress(keywordList.getName() + ": " + queryDisplayStr, unitProgress);
// Create blackboard artifacts // Create blackboard artifacts
newResults.writeAllHitsToBlackBoard(null, subProgresses[keywordsSearched], this, keywordList.getIngestMessages()); newResults.process(null, subProgresses[keywordsSearched], this, keywordList.getIngestMessages());
} //if has results } //if has results