Merge pull request #2279 from eugene7646/kws

1878 Fixed a bug where indexed text content viewer erroneously reported "no keyword hits"
2025-07-17 02:07:42 +00:00 · 2016-07-15 17:15:16 -04:00 · 2016-07-15 17:15:16 -04:00 · ef7226e1bc
commit ef7226e1bc
parent 46aed12d9b 53c0b90580
3 changed files with 33 additions and 53 deletions
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties
@ -64,7 +64,7 @@ ExtractedContentViewer.getTitle=Indexed Text
 ExtractedContentViewer.getSolrContent.knownFileMsg=<p style\=''font-style\:italic''>{0} is a known file (based on MD5 hash) and does not have text in the index.</p>
 ExtractedContentViewer.getSolrContent.noTxtYetMsg=<p style\=''font-style\:italic''>{0} does not have text in the index.<br/>It may have no text, not been analyzed yet, or keyword search was not enabled during ingest.</p>
 ExtractedContentViewer.getSolrContent.txtBodyItal=<span style\=''font-style\:italic''>{0}</span>
-HighlightedMatchesSource.getMarkup.noMatchMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>There were no keyword hits on this page. <br />Keyword could have been in file name. <br />Advance to another page for hits or choose Extracted Text to view original text..</span></pre></html>
+HighlightedMatchesSource.getMarkup.noMatchMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>There were no keyword hits on this page. <br />Keyword could have been in file name. <br />Advance to another page for hits or to view original text choose File Text <br />in the drop down menu to the right..</span></pre></html>
 HighlightedMatchesSource.getMarkup.queryFailedMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>Failed to retrieve keyword hit results. <br />Confirm that Autopsy can connect to the Solr server. <br /></span></pre></html>
 HighlightedMatchesSource.toString=Search Results
 Installer.reportPortError=Indexing server port {0} is not available.  Check if your security software does not block {1} and consider changing {2} in {3} property file in the application user folder. Then try rebooting your system if another process was causing the conflict.
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java
@ -298,45 +298,17 @@ class HighlightedText implements IndexedText, TextMarkupLookup {

        String highLightField = null;

-        String highlightQuery = keywordHitQuery;
-
        if (isRegex) {
            highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
-            //escape special lucene chars if not already escaped (if not a compound query)
-            //TODO a better way to mark it a compound highlight query
-            final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
-            if (!highlightQuery.contains(findSubstr)) {
-                highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
-            }
        } else {
            highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
-            //escape special lucene chars always for literal queries query
-            highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
        }

        SolrQuery q = new SolrQuery();
        q.setShowDebugInfo(DEBUG); //debug

-        String queryStr = null;
-
-        if (isRegex) {
-            StringBuilder sb = new StringBuilder();
-            sb.append(highLightField).append(":");
-            if (group) {
-                sb.append("\"");
-            }
-            sb.append(highlightQuery);
-            if (group) {
-                sb.append("\"");
-            }
-            queryStr = sb.toString();
-        } else {
-            //use default field, simplifies query
-            //always force grouping/quotes
-            queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
-        }
-
-        q.setQuery(queryStr);
+        // input query has already been properly constructed and escaped
+        q.setQuery(keywordHitQuery);

        String contentIdStr = Long.toString(this.objectId);
        if (hasChunks) {
@ -367,7 +339,6 @@ class HighlightedText implements IndexedText, TextMarkupLookup {
            Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
            if (responseHighlightID == null) {
                return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
-
            }
            List<String> contentHighlights = responseHighlightID.get(highLightField);
            if (contentHighlights == null) {
@ -379,7 +350,8 @@ class HighlightedText implements IndexedText, TextMarkupLookup {

                return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
            }
-        } catch (NoOpenCoreException | KeywordSearchModuleException ex) {
+        } catch (Exception ex) {
+            logger.log(Level.WARNING, "Error executing Solr highlighting query: " + keywordHitQuery, ex); //NON-NLS
            return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg");
        }
    }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java
@ -247,54 +247,62 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
     * @return
     */
    private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, Content content) {
-        String highlightQueryEscaped;
        if (literal_query) {
            //literal, treat as non-regex, non-term component query
-            highlightQueryEscaped = query.getQueryString();
+            return constructEscapedSolrQuery(query.getQueryString(), literal_query);
        } else {
            //construct a Solr query using aggregated terms to get highlighting
            //the query is executed later on demand
-            StringBuilder highlightQuery = new StringBuilder();
-
            if (queryResults.getKeywords().size() == 1) {
                //simple case, no need to process subqueries and do special escaping
                Keyword term = queryResults.getKeywords().iterator().next();
-                highlightQuery.append(term.toString());
+                return constructEscapedSolrQuery(term.getQuery(), literal_query);
            } else {
                //find terms for this content hit
-                List<String> hitTerms = new ArrayList<>();
+                List<Keyword> hitTerms = new ArrayList<>();
                for (Keyword keyword : queryResults.getKeywords()) {
                    for (KeywordHit hit : queryResults.getResults(keyword)) {
                        if (hit.getContent().equals(content)) {
-                            hitTerms.add(keyword.toString());
+                            hitTerms.add(keyword);
                            break; //go to next term
                        }
                    }
                }

+                StringBuilder highlightQuery = new StringBuilder();
                final int lastTerm = hitTerms.size() - 1;
                int curTerm = 0;
-                for (String term : hitTerms) {
-                    //escape subqueries, they shouldn't be escaped again later
-                    final String termS = KeywordSearchUtil.escapeLuceneQuery(term);
-                    highlightQuery.append("\"");
-                    highlightQuery.append(termS);
-                    highlightQuery.append("\"");
+                for (Keyword term : hitTerms) {
+                    //escape subqueries, MAKE SURE they are not escaped again later
+                    highlightQuery.append(constructEscapedSolrQuery(term.getQuery(), literal_query));
                    if (lastTerm != curTerm) {
                        highlightQuery.append(" "); //acts as OR ||
-                        //force HIGHLIGHT_FIELD_REGEX index and stored content
-                        //in each term after first. First term taken care by HighlightedMatchesSource
-                        highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":");
                    }

                    ++curTerm;
                }
+                return highlightQuery.toString();
            }
-            //String highlightQueryEscaped = KeywordSearchUtil.escapeLuceneQuery(highlightQuery.toString());
-            highlightQueryEscaped = highlightQuery.toString();
        }
-
-        return highlightQueryEscaped;
+    }
+    
+    /**
+     * Constructs a complete, escaped Solr query that is ready to be used.
+     * 
+     * @param query keyword term to be searched for
+     * @param literal_query flag whether query is literal or regex
+     * @return Solr query string
+     */
+    private String constructEscapedSolrQuery(String query, boolean literal_query) {
+        StringBuilder highlightQuery = new StringBuilder();
+        String highLightField;
+        if (literal_query) {
+            highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
+        } else {
+            highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
+        }
+        highlightQuery.append(highLightField).append(":").append("\"").append(KeywordSearchUtil.escapeLuceneQuery(query)).append("\"");
+        return highlightQuery.toString();
    }

    @Override