From 2217eb0f98bf49b19a0afdb62d20af85fc5dfaec Mon Sep 17 00:00:00 2001 From: esaunders Date: Thu, 19 Jan 2017 17:20:02 -0500 Subject: [PATCH 1/3] Removed getOneHitPerObject() since Searcher.filterResults() guarantees that we will get a single hit per object (for the lowest numbered chunk). --- .../autopsy/keywordsearch/QueryResults.java | 25 +------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java index 316e4f3717..c5741753df 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java @@ -131,7 +131,7 @@ class QueryResults { subProgress.progress(keywordList.getName() + ": " + hitDisplayStr, unitProgress); } - for (KeywordHit hit : getOneHitPerObject(keyword)) { + for (KeywordHit hit : getResults(keyword)) { String termString = keyword.getSearchTerm(); final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString); String snippet; @@ -174,29 +174,6 @@ class QueryResults { return newArtifacts; } - /** - * Gets the first hit of the keyword. - * - * @param keyword - * - * @return Collection containing KeywordHits with lowest - * SolrObjectID-ChunkID pairs. - */ - private Collection getOneHitPerObject(Keyword keyword) { - - HashMap hits = new HashMap<>(); - - // create a list of KeywordHits. KeywordHits with lowest chunkID is added the the list. - for (KeywordHit hit : getResults(keyword)) { - if (!hits.containsKey(hit.getSolrObjectId())) { - hits.put(hit.getSolrObjectId(), hit); - } else if (hit.getChunkId() < hits.get(hit.getSolrObjectId()).getChunkId()) { - hits.put(hit.getSolrObjectId(), hit); - } - } - return hits.values(); - } - /** * Generate an ingest inbox message for given keyword in given file * From b1c0815c4f7516392d5f4510f05e2a207a9ff220 Mon Sep 17 00:00:00 2001 From: esaunders Date: Thu, 19 Jan 2017 17:20:43 -0500 Subject: [PATCH 2/3] Made KeywordHit implement Comparable so that lists of hits can be sorted. --- .../autopsy/keywordsearch/KeywordHit.java | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java index 5425048ee4..0f00da2409 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java @@ -30,7 +30,7 @@ import org.sleuthkit.datamodel.TskCoreException; * keyword was found and the file available to clients. Artifact keyword hits * also make the artifact available to clients. */ -class KeywordHit { +class KeywordHit implements Comparable { private final String solrDocumentId; private final long solrObjectId; @@ -140,4 +140,23 @@ class KeywordHit { return hash; } + @Override + public int compareTo(KeywordHit o) { + if (this.solrObjectId < o.solrObjectId) { + // Out object id is less than the other object id + return -1; + } else if (this.solrObjectId == o.solrObjectId) { + // Hits have same object id + if (this.chunkId < o.chunkId) { + // Our chunk id is lower than the other chunk id + return -1; + } else { + // Our chunk id is either greater than or equal to the other chunk id + return this.chunkId == o.chunkId ? 0 : 1; + } + } else { + // Our object id is greater than the other object id + return 1; + } + } } From d4a4542d0429dfae77124ef225b18b2f19a7f526 Mon Sep 17 00:00:00 2001 From: esaunders Date: Thu, 19 Jan 2017 17:27:46 -0500 Subject: [PATCH 3/3] Modified filterResults() to return the hits for the lowest chunk associated with an object, assuming we haven't seen a hit for the keyword in the object before. --- .../sleuthkit/autopsy/keywordsearch/SearchRunner.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java index 989f1ac71b..8c3a450b27 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java @@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -580,8 +581,9 @@ public final class SearchRunner { * previously seen a hit for the keyword. * * @param queryResult The results returned by a keyword search. - * @return The set of hits found by the most recent search for objects - * that have not previously had a hit. + * @return A unique set of hits found by the most recent search for objects + * that have not previously had a hit. The hits will be for the lowest + * numbered chunk associated with the object. * */ private QueryResults filterResults(QueryResults queryResult) { @@ -596,6 +598,10 @@ public final class SearchRunner { // This may well include duplicates of hits we've seen in earlier periodic searches. List queryTermResults = queryResult.getResults(keyword); + // Sort the hits for this keyword so that we are always + // guaranteed to return the hit for the lowest chunk. + Collections.sort(queryTermResults); + // This will be used to build up the hits we haven't seen before // for this keyword. List newUniqueHits = new ArrayList<>();