diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java index b8bb93b240..6836483b30 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java @@ -104,13 +104,13 @@ class HighlightedText implements IndexedText { /** * This constructor is used when keyword hits are accessed from the ad-hoc * search results. In that case we have the entire QueryResults object and - * need to arrange the paging. + need to arrange the paging. * * @param objectId The objectID of the content whose text will be * highlighted. * @param QueryResults The QueryResults for the ad-hoc search from whose - * results a selection was made leading to this - * HighlightedText. + results a selection was made leading to this + HighlightedText. */ HighlightedText(long objectId, QueryResults hits) { this.objectId = objectId; diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java index ac7cfae557..a48b40e756 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java @@ -331,7 +331,7 @@ class KeywordSearchResultFactory extends ChildFactory { final String queryDisp = queryStr.length() > QUERY_DISPLAY_LEN ? queryStr.substring(0, QUERY_DISPLAY_LEN - 1) + " ..." : queryStr; try { progress = ProgressHandle.createHandle(NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.progress.saving", queryDisp), () -> BlackboardResultWriter.this.cancel(true)); - hits.writeAllHitsToBlackBoard(progress, null, this, false); + hits.process(progress, null, this, false); } finally { finalizeWorker(); } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java index 48fdb38f10..516b96eaab 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java @@ -45,195 +45,272 @@ import org.sleuthkit.datamodel.SleuthkitCase; import org.sleuthkit.datamodel.TskCoreException; /** - * Stores the results from running a Solr query (which could contain multiple - * keywords). - * + * Stores and processes the results of a keyword search query. Processing + * includes posting keyword hit artifacts to the blackboard, sending messages + * about the search hits to the ingest inbox, and publishing an event to notify + * subscribers of the blackboard posts. */ class QueryResults { private static final Logger logger = Logger.getLogger(QueryResults.class.getName()); private static final String MODULE_NAME = KeywordSearchModuleFactory.getModuleName(); - /** - * The query that generated the results. - */ - private final KeywordSearchQuery keywordSearchQuery; - - /** - * A map of keywords to keyword hits. - */ + private final KeywordSearchQuery query; private final Map> results = new HashMap<>(); + /** + * Constructs a object that stores and processes the results of a keyword + * search query. Processing includes posting keyword hit artifacts to the + * blackboard, sending messages about the search hits to the ingest inbox, + * and publishing an event to notify subscribers of the blackboard posts. + * + * @param query The query. + */ QueryResults(KeywordSearchQuery query) { - this.keywordSearchQuery = query; + this.query = query; } + /** + * Gets the keyword search query that generated the results stored in this + * object. + * + * @return The query. + */ + KeywordSearchQuery getQuery() { + return query; + } + + /** + * Adds the keyword hits for a keyword to the hits that are stored in this + * object. All calls to this method MUST be completed before calling the + * process method. + * + * @param keyword The keyword, + * @param hits The hits. + */ void addResult(Keyword keyword, List hits) { results.put(keyword, hits); } - KeywordSearchQuery getQuery() { - return keywordSearchQuery; - } - + /** + * Gets the keyword hits stored in this object for a given keyword. + * + * @param keyword The keyword. + * + * @return The keyword hits. + */ List getResults(Keyword keyword) { return results.get(keyword); } + /** + * Gets the set of unique keywords for which keyword hits have been stored + * in this object. + * + * @return + */ Set getKeywords() { return results.keySet(); } /** - * Writes the keyword hits encapsulated in this query result to the - * blackboard. Makes one artifact per keyword per searched object (file or - * artifact), i.e., if a keyword is found several times in the object, only - * one artifact is created. + * Processes the keyword hits stored in this object by osting keyword hit + * artifacts to the blackboard, sending messages about the search hits to + * the ingest inbox, and publishing an event to notify subscribers of the + * blackboard posts. * - * @param progress Can be null. - * @param subProgress Can be null. - * @param worker The Swing worker that is writing the hits, needed to - * support cancellation. - * @param notifyInbox Whether or not write a message to the ingest messages - * inbox. + * Makes one artifact per keyword per searched text source object (file or + * artifact), i.e., if a keyword is found several times in the text + * extracted from the source object, only one artifact is created. + * + * This method ASSUMES that the processing is being done using a SwingWorker + * that should be checked for task cancellation. + * + * All calls to the addResult method MUST be completed before calling this + * method. + * + * @param progress A progress indicator that reports the number of + * keywords processed. Can be null. + * @param subProgress A progress contributor that reports the keyword + * currently being processed. Can be null. + * @param worker The SwingWorker that is being used to do the + * processing, will be checked for task cancellation + * before processing each keyword. + * @param notifyInbox Whether or not to write a message to the ingest + * messages inbox if there is a keyword hit in the text + * exrtacted from the text source object. * - * @return The artifacts that were created. */ - Collection writeAllHitsToBlackBoard(ProgressHandle progress, ProgressContributor subProgress, SwingWorker worker, boolean notifyInbox) { - final Collection newArtifacts = new ArrayList<>(); - if (progress != null) { + void process(ProgressHandle progress, ProgressContributor subProgress, SwingWorker worker, boolean notifyInbox) { + /* + * Initialize the progress indicator to the number of keywords that will + * be processed. + */ + if (null != progress) { progress.start(getKeywords().size()); } - int unitProgress = 0; + /* + * Process the keyword hits for each keyword. + */ + int keywordsProcessed = 0; + final Collection hitArtifacts = new ArrayList<>(); for (final Keyword keyword : getKeywords()) { + /* + * Cancellation check. + */ if (worker.isCancelled()) { - logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keyword.getSearchTerm()); //NON-NLS + logger.log(Level.INFO, "Processing cancelled, exiting before processing search term {0}", keyword.getSearchTerm()); //NON-NLS break; } - // Update progress object(s), if any + /* + * Update the progress indicator and the show the current keyword + * via the progress contributor. + */ if (progress != null) { - progress.progress(keyword.toString(), unitProgress); + progress.progress(keyword.toString(), keywordsProcessed); } if (subProgress != null) { String hitDisplayStr = keyword.getSearchTerm(); if (hitDisplayStr.length() > 50) { hitDisplayStr = hitDisplayStr.substring(0, 49) + "..."; } - subProgress.progress(keywordSearchQuery.getKeywordList().getName() + ": " + hitDisplayStr, unitProgress); + subProgress.progress(query.getKeywordList().getName() + ": " + hitDisplayStr, keywordsProcessed); } - for (KeywordHit hit : getOneHitPerObject(keyword)) { - String termString = keyword.getSearchTerm(); + /* + * Reduce the hits for this keyword to one hit per text source + * object so that only one hit artifact is generated per text source + * object, no matter how many times the keyword was actually found. + */ + for (KeywordHit hit : getOneHitPerTextSourceObject(keyword)) { + /* + * Get a snippet (preview) for the hit. Regex queries always + * have snippets made from the content_str pulled back from Solr + * for executing the search. Other types of queries may or may + * not have snippets yet. + */ String snippet = hit.getSnippet(); if (StringUtils.isBlank(snippet)) { - final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString); + final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(keyword.getSearchTerm()); try { - /* - * this doesn't work for regex queries... But that is - * okay because regex queries always have snippets made - * from the content_str field we pull back from Solr - */ - snippet = LuceneQuery.querySnippet(snippetQuery, hit.getSolrObjectId(), hit.getChunkId(), !keywordSearchQuery.isLiteral(), true); + snippet = LuceneQuery.querySnippet(snippetQuery, hit.getSolrObjectId(), hit.getChunkId(), !query.isLiteral(), true); } catch (NoOpenCoreException e) { - logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); //NON-NLS - //no reason to continue - break; + logger.log(Level.SEVERE, "Solr core closed while executing snippet query " + snippetQuery, e); //NON-NLS + break; // Stop processing. } catch (Exception e) { - logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); //NON-NLS - continue; + logger.log(Level.SEVERE, "Error executing snippet query " + snippetQuery, e); //NON-NLS + continue; // Try processing the next hit. } } + + /* + * Get the content (file or artifact) that is the text source + * for the hit. + */ Content content = null; try { SleuthkitCase tskCase = Case.getCurrentCase().getSleuthkitCase(); content = tskCase.getContentById(hit.getContentID()); } catch (TskCoreException | IllegalStateException tskCoreException) { - logger.log(Level.SEVERE, "Error adding artifact for keyword hit to blackboard", tskCoreException); //NON-NLS - return null; + logger.log(Level.SEVERE, "Failed to get text source object for ", tskCoreException); //NON-NLS } - BlackboardArtifact writeResult = keywordSearchQuery.writeSingleFileHitsToBlackBoard(content, keyword, hit, snippet, keywordSearchQuery.getKeywordList().getName()); - if (writeResult != null) { - newArtifacts.add(writeResult); + + /* + * Post an artifact for the hit to the blackboard. + */ + BlackboardArtifact artifact = query.writeSingleFileHitsToBlackBoard(content, keyword, hit, snippet, query.getKeywordList().getName()); + if (null == artifact) { + logger.log(Level.SEVERE, "Error posting keyword hit artifact for keyword {0} in {1} to the blackboard", new Object[]{keyword.toString(), content}); //NON-NLS + } + + /* + * Send an ingest inbox message for the hit. + */ + if (null != artifact) { + hitArtifacts.add(artifact); if (notifyInbox) { try { - writeSingleFileInboxMessage(writeResult, content); + writeSingleFileInboxMessage(artifact, content); } catch (TskCoreException ex) { - logger.log(Level.WARNING, "Error posting message to Ingest Inbox", ex); //NON-NLS + logger.log(Level.SEVERE, "Error sending message to ingest messages inbox", ex); //NON-NLS } } - } else { - logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{content, keyword.toString()}); //NON-NLS } } - ++unitProgress; + + ++keywordsProcessed; } - // Update artifact browser - if (!newArtifacts.isEmpty()) { - newArtifacts.stream() - //group artifacts by type + /* + * Publish an event to notify subscribers of the blackboard posts. The + * artifacts are grouped by type, since they may contain both + * TSK_KEYWORD_HIT artifacts and TSK_ACCOUNT artifacts (for credit card + * account number hits). + */ + if (!hitArtifacts.isEmpty()) { + hitArtifacts.stream() + // Group artifacts by type .collect(Collectors.groupingBy(BlackboardArtifact::getArtifactTypeID)) - //for each type send an event + // For each type send an event .forEach((typeID, artifacts) -> IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(MODULE_NAME, BlackboardArtifact.ARTIFACT_TYPE.fromID(typeID), artifacts))); } - - return newArtifacts; } /** - * Gets the first hit of the keyword. + * Reduce the hits for a given keyword to one hit per text source object so + * that only one hit artifact is generated per text source object, no matter + * how many times the keyword was actually found. * - * @param keyword + * @param keyword The keyword. * - * @return Collection containing KeywordHits with lowest - * SolrObjectID-ChunkID pairs. + * @return Collection The reduced set of keyword hits. */ - private Collection getOneHitPerObject(Keyword keyword) { - HashMap hits = new HashMap<>(); - - // create a list of KeywordHits. KeywordHits with lowest chunkID is added the the list. - for (KeywordHit hit : getResults(keyword)) { + private Collection getOneHitPerTextSourceObject(Keyword keyword) { + /* + * For each Solr document (chunk) for a text source object, return only + * a single keyword hit from the first chunk of text (the one with the + * lowest chunk id). + */ + HashMap< Long, KeywordHit> hits = new HashMap<>(); + getResults(keyword).forEach((hit) -> { if (!hits.containsKey(hit.getSolrObjectId())) { hits.put(hit.getSolrObjectId(), hit); } else if (hit.getChunkId() < hits.get(hit.getSolrObjectId()).getChunkId()) { hits.put(hit.getSolrObjectId(), hit); } - } + }); return hits.values(); } /** - * Generate and post an ingest inbox message for the given keyword in the - * given content. + * Send an ingest inbox message indicating that there was a keyword hit in + * the given text source object. * - * @param artifact The keyword hit artifact. - * @param hitContent The content that the hit is in. + * @param artifact The keyword hit artifact for the hit. + * @param hitContent The text source object. * - * @throws TskCoreException If there is a problem generating or posting the + * @throws TskCoreException If there is a problem generating or send the * inbox message. */ private void writeSingleFileInboxMessage(BlackboardArtifact artifact, Content hitContent) throws TskCoreException { - StringBuilder subjectSb = new StringBuilder(); - StringBuilder detailsSb = new StringBuilder(); - - if (!keywordSearchQuery.isLiteral()) { + StringBuilder subjectSb = new StringBuilder(1024); + if (!query.isLiteral()) { subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl")); } else { subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl")); } + StringBuilder detailsSb = new StringBuilder(1024); String uniqueKey = null; BlackboardAttribute attr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD)); if (attr != null) { final String keyword = attr.getValueString(); subjectSb.append(keyword); uniqueKey = keyword.toLowerCase(); - //details detailsSb.append(""); //NON-NLS - //hit detailsSb.append(""); //NON-NLS detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitThLbl")); detailsSb.append(""); //NON-NLS @@ -270,7 +347,7 @@ class QueryResults { } //regex - if (!keywordSearchQuery.isLiteral()) { + if (!query.isLiteral()) { attr = artifact.getAttribute(new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP)); if (attr != null) { detailsSb.append(""); //NON-NLS @@ -279,7 +356,6 @@ class QueryResults { detailsSb.append(""); //NON-NLS } } - detailsSb.append("
").append(EscapeUtil.escapeHtml(keyword)).append("
"); //NON-NLS IngestServices.getInstance().postMessage(IngestMessage.createDataMessage(MODULE_NAME, subjectSb.toString(), detailsSb.toString(), uniqueKey, artifact)); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java index c9b95a770b..b998294fe4 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java @@ -492,7 +492,7 @@ public final class SearchRunner { subProgresses[keywordsSearched].progress(keywordList.getName() + ": " + queryDisplayStr, unitProgress); // Create blackboard artifacts - newResults.writeAllHitsToBlackBoard(null, subProgresses[keywordsSearched], this, keywordList.getIngestMessages()); + newResults.process(null, subProgresses[keywordsSearched], this, keywordList.getIngestMessages()); } //if has results