From e1546bd51043b4931e265efebadfcc5936e7839d Mon Sep 17 00:00:00 2001 From: esaunders Date: Tue, 7 Nov 2017 16:12:06 -0500 Subject: [PATCH] Only create one KeywordHit instance per document for a given hit. --- .../sleuthkit/autopsy/keywordsearch/RegexQuery.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index 9c92cdea5d..a0383ef03b 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -236,6 +236,8 @@ final class RegexQuery implements KeywordSearchQuery { private List createKeywordHits(SolrDocument solrDoc) throws TskCoreException { + final HashMap keywordsFoundInThisDocument = new HashMap<>(); + List hits = new ArrayList<>(); final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString(); final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString()); @@ -283,9 +285,14 @@ final class RegexQuery implements KeywordSearchQuery { hit = hit.replaceAll("[^0-9]$", ""); } - // Optimization to reduce the number of String objects created. + // We will only create one KeywordHit instance per document for + // a given hit. + if (keywordsFoundInThisDocument.containsKey(hit)) { + continue; + } + keywordsFoundInThisDocument.put(hit, hit); + if (keywordsFoundAcrossAllDocuments.containsKey(hit)) { - // Use an existing String reference if it exists. hit = keywordsFoundAcrossAllDocuments.get(hit); } else { keywordsFoundAcrossAllDocuments.put(hit, hit);