diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java index 5425048ee4..0f00da2409 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java @@ -30,7 +30,7 @@ import org.sleuthkit.datamodel.TskCoreException; * keyword was found and the file available to clients. Artifact keyword hits * also make the artifact available to clients. */ -class KeywordHit { +class KeywordHit implements Comparable { private final String solrDocumentId; private final long solrObjectId; @@ -140,4 +140,23 @@ class KeywordHit { return hash; } + @Override + public int compareTo(KeywordHit o) { + if (this.solrObjectId < o.solrObjectId) { + // Out object id is less than the other object id + return -1; + } else if (this.solrObjectId == o.solrObjectId) { + // Hits have same object id + if (this.chunkId < o.chunkId) { + // Our chunk id is lower than the other chunk id + return -1; + } else { + // Our chunk id is either greater than or equal to the other chunk id + return this.chunkId == o.chunkId ? 0 : 1; + } + } else { + // Our object id is greater than the other object id + return 1; + } + } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java index 316e4f3717..c5741753df 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java @@ -131,7 +131,7 @@ class QueryResults { subProgress.progress(keywordList.getName() + ": " + hitDisplayStr, unitProgress); } - for (KeywordHit hit : getOneHitPerObject(keyword)) { + for (KeywordHit hit : getResults(keyword)) { String termString = keyword.getSearchTerm(); final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString); String snippet; @@ -174,29 +174,6 @@ class QueryResults { return newArtifacts; } - /** - * Gets the first hit of the keyword. - * - * @param keyword - * - * @return Collection containing KeywordHits with lowest - * SolrObjectID-ChunkID pairs. - */ - private Collection getOneHitPerObject(Keyword keyword) { - - HashMap hits = new HashMap<>(); - - // create a list of KeywordHits. KeywordHits with lowest chunkID is added the the list. - for (KeywordHit hit : getResults(keyword)) { - if (!hits.containsKey(hit.getSolrObjectId())) { - hits.put(hit.getSolrObjectId(), hit); - } else if (hit.getChunkId() < hits.get(hit.getSolrObjectId()).getChunkId()) { - hits.put(hit.getSolrObjectId(), hit); - } - } - return hits.values(); - } - /** * Generate an ingest inbox message for given keyword in given file * diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java index 989f1ac71b..8c3a450b27 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java @@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -580,8 +581,9 @@ public final class SearchRunner { * previously seen a hit for the keyword. * * @param queryResult The results returned by a keyword search. - * @return The set of hits found by the most recent search for objects - * that have not previously had a hit. + * @return A unique set of hits found by the most recent search for objects + * that have not previously had a hit. The hits will be for the lowest + * numbered chunk associated with the object. * */ private QueryResults filterResults(QueryResults queryResult) { @@ -596,6 +598,10 @@ public final class SearchRunner { // This may well include duplicates of hits we've seen in earlier periodic searches. List queryTermResults = queryResult.getResults(keyword); + // Sort the hits for this keyword so that we are always + // guaranteed to return the hit for the lowest chunk. + Collections.sort(queryTermResults); + // This will be used to build up the hits we haven't seen before // for this keyword. List newUniqueHits = new ArrayList<>();