From 6abe26d6de23c51880eb2dfc94807048e973c07c Mon Sep 17 00:00:00 2001
From: millmanorama <millmanorama@gmail.com>
Date: Wed, 15 Feb 2017 13:48:43 +0100
Subject: [PATCH] finish implementing all the cases for highlighting

---
 .../keywordsearch/ExtractedContentViewer.java |  40 ++--
 .../keywordsearch/HighlightedText.java        | 187 ++++++++----------
 .../KeywordSearchFilterNode.java              |   2 +-
 .../KeywordSearchResultFactory.java           |  25 +--
 .../autopsy/keywordsearch/LuceneQuery.java    |   4 +-
 5 files changed, 102 insertions(+), 156 deletions(-)

diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
index 36f187d9b6..0a8c15a728 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
@@ -110,15 +110,13 @@ public class ExtractedContentViewer implements DataContentViewer {
             BlackboardArtifact artifact = nodeLookup.lookup(BlackboardArtifact.class);
             if (hits != null) {
                 highlightedHitText = new HighlightedText(content.getId(), hits);
-            } else {
-                if (artifact != null && artifact.getArtifactTypeID()
-                        == BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT.getTypeID()) {
-                    // if the artifact is an account artifact, get an account text .
-                    highlightedHitText = getAccountsText(content, nodeLookup);
-                } else if (artifact != null && artifact.getArtifactTypeID()
-                        == BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT.getTypeID()) {
-                    highlightedHitText = new HighlightedText(artifact);
-                }
+            } else if (artifact != null && artifact.getArtifactTypeID()
+                    == BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT.getTypeID()) {
+                // if the artifact is an account artifact, get an account text .
+                highlightedHitText = getAccountsText(content, nodeLookup);
+            } else if (artifact != null && artifact.getArtifactTypeID()
+                    == BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT.getTypeID()) {
+                highlightedHitText = new HighlightedText(artifact);
             }
             if (highlightedHitText != null) {
                 indexedTextSources.add(highlightedHitText);
@@ -298,16 +296,16 @@ public class ExtractedContentViewer implements DataContentViewer {
             return false;
         }
 
-        /**
-         * Is there any marked up indexed text in the look up of this node? This
-         * will be the case if the node is for a keyword hit artifact produced
-         * by either an ad hoc keyword search result (keyword search toolbar
-         * widgets) or a keyword search by the keyword search ingest module.
-         */
-        Collection<? extends IndexedText> sources = node.getLookup().lookupAll(IndexedText.class);
-        if (sources.isEmpty() == false) {
-            return true;
-        }
+//        /**
+//         * Is there any marked up indexed text in the look up of this node? This
+//         * will be the case if the node is for a keyword hit artifact produced
+//         * by either an ad hoc keyword search result (keyword search toolbar
+//         * widgets) or a keyword search by the keyword search ingest module.
+//         */
+//        Collection<? extends IndexedText> sources = node.getLookup().lookupAll(IndexedText.class);
+//        if (sources.isEmpty() == false) {
+//            return true;
+//        }
 
         /*
          * Is there a credit card artifact in the lookup
@@ -315,7 +313,9 @@ public class ExtractedContentViewer implements DataContentViewer {
         Collection<? extends BlackboardArtifact> artifacts = node.getLookup().lookupAll(BlackboardArtifact.class);
         if (artifacts != null) {
             for (BlackboardArtifact art : artifacts) {
-                if (art.getArtifactTypeID() == BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT.getTypeID()) {
+                final int artifactTypeID = art.getArtifactTypeID();
+                if (artifactTypeID == TSK_ACCOUNT.getTypeID()
+                        || artifactTypeID == TSK_KEYWORD_HIT.getTypeID()) {
                     return true;
                 }
             }
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java
index 157f303b64..cea6295f96 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java
@@ -18,7 +18,6 @@
  */
 package org.sleuthkit.autopsy.keywordsearch;
 
-import com.ibm.icu.text.UnicodeSet;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
@@ -28,7 +27,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
-import java.util.function.Function;
 import java.util.logging.Level;
 import java.util.stream.Collectors;
 import org.apache.commons.lang.StringEscapeUtils;
@@ -40,14 +38,13 @@ import org.apache.solr.common.SolrDocumentList;
 import org.openide.util.Exceptions;
 import org.openide.util.NbBundle;
 import org.openide.util.NbBundle.Messages;
-import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
 import org.sleuthkit.autopsy.coreutils.Version;
 import org.sleuthkit.autopsy.keywordsearch.KeywordQueryFilter.FilterType;
+import org.sleuthkit.autopsy.keywordsearch.KeywordSearch.QueryType;
 import org.sleuthkit.datamodel.BlackboardArtifact;
 import org.sleuthkit.datamodel.BlackboardAttribute;
-import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.TskCoreException;
 
 /**
@@ -89,6 +86,7 @@ class HighlightedText implements IndexedText {
     private boolean isPageInfoLoaded = false;
     private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
     private BlackboardArtifact artifact;
+    private KeywordSearch.QueryType qt;
 
     /**
      * This constructor is used when keyword hits are accessed from the ad-hoc
@@ -123,24 +121,51 @@ class HighlightedText implements IndexedText {
     }
 
     private void loadPageInfoFromArtifact() throws TskCoreException, NumberFormatException {
+        final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
+        this.keywords.add(keyword);
 
-        KeywordSearch.QueryType qt = KeywordSearch.QueryType.values()[artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE).getValueInt()];
-        this.keywords.add(artifact.getAttribute(TSK_KEYWORD).getValueString());
-        String chunkIDsString = artifact.getAttribute(TSK_KEYWORD_HIT_DOCUMENT_IDS).getValueString();
-        Set<String> chunkIDs = Arrays.stream(chunkIDsString.split(",")).map(StringUtils::strip).collect(Collectors.toSet());
-        for (String solrDocumentId : chunkIDs) {
-            int chunkID;
-            final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR);
-            if (-1 != separatorIndex) {
+        final BlackboardAttribute qtAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE);
 
-                chunkID = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1));
-            } else {
+        qt = (qtAttribute != null)
+                ? KeywordSearch.QueryType.values()[qtAttribute.getValueInt()] : null;
 
-                chunkID = 0;
+        final BlackboardAttribute docIDsArtifact = artifact.getAttribute(TSK_KEYWORD_HIT_DOCUMENT_IDS);
+
+        if (qt == QueryType.REGEX && docIDsArtifact != null) {
+            //regex search records the chunks in the artifact
+            String chunkIDsString = docIDsArtifact.getValueString();
+            Set<String> chunkIDs = Arrays.stream(chunkIDsString.split(",")).map(StringUtils::strip).collect(Collectors.toSet());
+            for (String solrDocumentId : chunkIDs) {
+                int chunkID;
+                final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR);
+                if (-1 != separatorIndex) {
+                    chunkID = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1));
+                } else {
+
+                    chunkID = 0;
+                }
+                pages.add(chunkID);
+                numberOfHitsPerPage.put(chunkID, 0);
+                currentHitPerPage.put(chunkID, 0);
+            }
+            this.currentPage = pages.stream().sorted().findFirst().orElse(1);
+            isPageInfoLoaded = true;
+        } else {
+            /*
+             * non-regex searches don't record the chunks in the artifacts, so
+             * we need to look them up
+             */
+            Keyword keywordQuery = new Keyword(keyword, true);
+            KeywordSearchQuery chunksQuery
+                    = new LuceneQuery(new KeywordList(Arrays.asList(keywordQuery)), keywordQuery);
+            chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId));
+            try {
+                hits = chunksQuery.performQuery();
+                loadPageInfoFromHits();
+            } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
+                logger.log(Level.SEVERE, "Could not perform the query to get chunk info and get highlights:" + keywordQuery.getSearchTerm(), ex); //NON-NLS
+                MessageNotifyUtil.Notify.error(Bundle.HighlightedText_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage());
             }
-            pages.add(chunkID);
-            numberOfHitsPerPage.put(chunkID, 0);
-            currentHitPerPage.put(chunkID, 0);
         }
     }
 
@@ -154,45 +179,6 @@ class HighlightedText implements IndexedText {
      *
      * @return
      */
-    static private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, Content content) {
-        if (literal_query) {
-            //literal, treat as non-regex, non-term component query
-            return constructEscapedSolrQuery(query.getQueryString());
-        } else //construct a Solr query using aggregated terms to get highlighting
-        //the query is executed later on demand
-        {
-            if (queryResults.getKeywords().size() == 1) {
-                //simple case, no need to process subqueries and do special escaping
-                Keyword keyword = queryResults.getKeywords().iterator().next();
-                return constructEscapedSolrQuery(keyword.getSearchTerm());
-            } else {
-                //find terms for this content hit
-                List<Keyword> hitTerms = new ArrayList<>();
-                for (Keyword keyword : queryResults.getKeywords()) {
-                    for (KeywordHit hit : queryResults.getResults(keyword)) {
-                        if (hit.getContent().equals(content)) {
-                            hitTerms.add(keyword);
-                            break; //go to next term
-                        }
-                    }
-                }
-
-                StringBuilder highlightQuery = new StringBuilder();
-                final int lastTerm = hitTerms.size() - 1;
-                int curTerm = 0;
-                for (Keyword term : hitTerms) {
-                    //escape subqueries, MAKE SURE they are not escaped again later
-                    highlightQuery.append(constructEscapedSolrQuery(term.getSearchTerm()));
-                    if (lastTerm != curTerm) {
-                        highlightQuery.append(" "); //acts as OR ||
-                    }
-
-                    ++curTerm;
-                }
-                return highlightQuery.toString();
-            }
-        }
-    }
 
     /**
      * Constructs a complete, escaped Solr query that is ready to be used.
@@ -236,9 +222,7 @@ class HighlightedText implements IndexedText {
              */ loadPageInfoFromArtifact();
         } else if (hasChunks) {
             // if the file has chunks, get pages with hits, sorted
-            if (loadPageInfoFromHits()) {
-                //JMTOD: look at error handeling and return values...
-            }
+            loadPageInfoFromHits();
         } else {
             //non-regex, no chunks
             this.numberPages = 1;
@@ -246,29 +230,12 @@ class HighlightedText implements IndexedText {
             numberOfHitsPerPage.put(1, 0);
             pages.add(1);
             currentHitPerPage.put(1, 0);
+            isPageInfoLoaded = true;
         }
-        isPageInfoLoaded = true;
+
     }
 
-    private boolean loadPageInfoFromHits() {
-//        /*
-//         * If this is being called from the artifacts / dir tree, then we need
-//         * to perform the search to get the highlights.
-//         */
-//        if (hits == null) {
-//
-//            Keyword keywordQuery = new Keyword(keywordHitQuery, true);
-//            KeywordSearchQuery chunksQuery 
-//                    = new LuceneQuery(new KeywordList(Arrays.asList(keywordQuery)), keywordQuery);
-//            chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId));
-//            try {
-//                hits = chunksQuery.performQuery();
-//            } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
-//                logger.log(Level.SEVERE, "Could not perform the query to get chunk info and get highlights:" + keywordQuery.getSearchTerm(), ex); //NON-NLS
-//                MessageNotifyUtil.Notify.error(Bundle.HighlightedText_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage());
-//                return true;
-//            }
-////        }
+    private void loadPageInfoFromHits() {
         //organize the hits by page, filter as needed
         TreeSet<Integer> pagesSorted = new TreeSet<>();
 
@@ -277,8 +244,9 @@ class HighlightedText implements IndexedText {
                 int chunkID = hit.getChunkId();
                 if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
                     pagesSorted.add(chunkID);
-
-                    this.keywords.add(hit.getHit());
+                    if (StringUtils.isNotBlank(hit.getHit())) {
+                        this.keywords.add(hit.getHit());
+                    }
                 }
             }
         }
@@ -293,7 +261,7 @@ class HighlightedText implements IndexedText {
             pages.add(page);
             currentHitPerPage.put(page, 0); //set current hit to 0th
         }
-        return false;
+        isPageInfoLoaded = true;
     }
 
     @Override
@@ -410,26 +378,29 @@ class HighlightedText implements IndexedText {
         }
         final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
 
-//        if (isRegex) {
-        q.setQuery(filterQuery);
-        q.addField(Server.Schema.CONTENT_STR.toString());
-//        } else {
-//            // input query has already been properly constructed and escaped
-//            q.setQuery(keywordHitQuery);
-//            q.addField(Server.Schema.TEXT.toString());
-//            q.addFilterQuery(filterQuery);
-//            q.addHighlightField(LuceneQuery.HIGHLIGHT_FIELD);
-//            q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
-//
-//            //tune the highlighter
-//            q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
-//            q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
-//            q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
-//            q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
-//
-//            //docs says makes sense for the original Highlighter only, but not really
-//            q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
-//        }
+        if (artifact != null && qt == QueryType.REGEX) {
+            q.setQuery(filterQuery);
+            q.addField(Server.Schema.CONTENT_STR.toString());
+        } else {
+            final String highlightQuery = keywords.stream()
+                    .map(HighlightedText::constructEscapedSolrQuery)
+                    .collect(Collectors.joining(" "));
+
+            q.setQuery(highlightQuery);
+            q.addField(Server.Schema.TEXT.toString());
+            q.addFilterQuery(filterQuery);
+            q.addHighlightField(LuceneQuery.HIGHLIGHT_FIELD);
+            q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
+
+            //tune the highlighter
+            q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
+            q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
+            q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
+            q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
+
+            //docs says makes sense for the original Highlighter only, but not really
+            q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
+        }
         try {
             QueryResponse response = solrServer.query(q, METHOD.POST);
 
@@ -532,8 +503,7 @@ class HighlightedText implements IndexedText {
         for (String unquotedKeyword : keywords) {
             int textOffset = 0;
             int hitOffset;
-
-            while ((hitOffset = text.indexOf(unquotedKeyword, textOffset)) != -1) {
+            while ((hitOffset = StringUtils.indexOfIgnoreCase(text, unquotedKeyword, textOffset)) != -1) {
                 // Append the portion of text up to (but not including) the hit.
                 highlightedText.append(text.substring(textOffset, hitOffset));
                 // Add in the highlighting around the keyword.
@@ -542,12 +512,11 @@ class HighlightedText implements IndexedText {
                 highlightedText.append(HIGHLIGHT_POST);
 
                 // Advance the text offset past the keyword.
-                textOffset = hitOffset + unquotedKeyword.length() + 1;
+                textOffset = hitOffset + unquotedKeyword.length();
             }
-
+            // Append the remainder of text field
+            highlightedText.append(text.substring(textOffset, text.length()));
             if (highlightedText.length() > 0) {
-                // Append the remainder of text field and return.
-                highlightedText.append(text.substring(textOffset, text.length()));
 
             } else {
                 return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchFilterNode.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchFilterNode.java
index bd51e89c95..4da8ccb7fc 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchFilterNode.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchFilterNode.java
@@ -45,7 +45,7 @@ import org.sleuthkit.datamodel.File;
  */
 class KeywordSearchFilterNode extends FilterNode {
 
-    KeywordSearchFilterNode(HighlightedText highlights, Node original) {
+    KeywordSearchFilterNode(QueryResults highlights, Node original) {
         super(original, null, new ProxyLookup(Lookups.singleton(highlights), original.getLookup()));
     }
 
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java
index 56dbd49fbf..4d7af13785 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java
@@ -147,7 +147,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
 
         int hitNumber = 0;
         List<KeyValueQueryContent> tempList = new ArrayList<>();
-//        final SetMultimap<Long, KeywordHit> orgnizeByDocID = orgnizeByDocID(queryResults);
         for (KeywordHit hit : getOneHitPerObject(queryResults)) {
 
             /**
@@ -169,12 +168,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
                 properties.put(TSK_KEYWORD_PREVIEW.getDisplayName(), hit.getSnippet());
             }
 
-            //JMTODO: I don't understand this comment or the below code... 
-            //@@@ USE ConentHit in UniqueFileMap instead of the below search
-            //get unique match result files
-            // BC: @@@ THis is really ineffecient.  We should keep track of this when
-            // we flattened the list of files to the unique files.            
-//            final String highlightQueryEscaped = getHighlightQuery(queryRequest, queryRequest.isLiteral(), queryResults, content);
             String hitName = hit.isArtifactHit()
                     ? hit.getArtifact().getDisplayName() + " Artifact" //NON-NLS
                     : contentName;
@@ -220,18 +213,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
         return hits.values();
     }
 
-    SetMultimap<Long, KeywordHit> orgnizeByDocID(QueryResults queryResults) {
-        SetMultimap<Long, KeywordHit> hits = TreeMultimap.create(Long::compare, Comparator.comparing(KeywordHit::getChunkId));
-
-        for (Keyword keyWord : queryResults.getKeywords()) {
-            for (KeywordHit hit : queryResults.getResults(keyWord)) {
-
-                hits.put(hit.getSolrObjectId(), hit);
-            }
-        }
-        return hits;
-    }
-
     @Override
     protected Node createNodeForKey(KeyValueQueryContent key) {
         final Content content = key.getContent();
@@ -240,9 +221,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
         Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content));
 
         //wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization
-        // store the data in HighlightedMatchesSource so that it can be looked up (in content viewer)
-        HighlightedText highlights = new HighlightedText(key.getSolrObjectId(), hits);
-        return new KeywordSearchFilterNode(highlights, kvNode);
+        return new KeywordSearchFilterNode(hits, kvNode);
     }
 
     /**
@@ -277,8 +256,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
 
             this.hits = hits;
             this.query = query;
-//            boolean isRegex = hits.getQuery().isLiteral() == false;
-//            this.chunkIDs = chunkIDs;
         }
 
         Content getContent() {
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
index 4eb6773ad7..93a55e7a62 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
@@ -239,7 +239,7 @@ class LuceneQuery implements KeywordSearchQuery {
                         for (Object content_obj : content) {
                             String content_str = (String) content_obj;
                             //for new schemas, check that the hit is before the chunk/window boundary.
-                            int firstOccurence = StringUtils.indexOf(content_str.toLowerCase(), strippedQueryString.toLowerCase());
+                            int firstOccurence = StringUtils.indexOfIgnoreCase(content_str, strippedQueryString);
                             //there is no chunksize field for "parent" entries in the index
                             if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
                                 matches.add(createKeywordtHit(highlightResponse, docId));
@@ -324,7 +324,7 @@ class LuceneQuery implements KeywordSearchQuery {
             }
         }
 
-        return new KeywordHit(docId, snippet);
+        return new KeywordHit(docId, snippet, keywordString);
     }
 
     /**