Increase query accuracy and reduce query postprocessing needed (at least for case insensitive queries) by quering only the ws delim Solr field in the aggregate query to get file ids

2025-07-06 21:00:22 +00:00 · 2012-01-10 17:12:29 -05:00 · 2012-01-10 17:12:29 -05:00 · 52b29a2892
commit 52b29a2892
parent 6a6753868e
5 changed files with 113 additions and 86 deletions
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedMatchesSource.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedMatchesSource.java
@ -57,11 +57,11 @@ class HighlightedMatchesSource implements MarkupSource {
    public String getMarkup() {

        SolrQuery q = new SolrQuery();
-        final String queryEscaped = KeywordSearchUtil.escapeLuceneQuery(solrQuery, true);
+        final String queryEscaped = KeywordSearchUtil.escapeLuceneQuery(solrQuery, true, false);

        q.setQuery(queryEscaped);
        q.addFilterQuery("id:" + content.getId());
-        q.addHighlightField("content");
+        q.addHighlightField("content"); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)
        q.setHighlightSimplePre(HIGHLIGHT_PRE);
        q.setHighlightSimplePost(HIGHLIGHT_POST);
        q.setHighlightFragsize(0); // don't fragment the highlight
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java
@ -70,8 +70,7 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
            public String toString() {
                return "Match";
            }
-        },
-    }
+        },}
    private Presentation presentation;
    private Collection<String> queries;
    private Collection<KeyValueThing> things;
@ -149,6 +148,7 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
            childFactory = new ResultCollapsedChildFactory(thing);
            final Node ret = new KeyValueNode(thing, Children.create(childFactory, true));
            SwingUtilities.invokeLater(new Runnable() {
+
                @Override
                public void run() {
                    //DataResultViewerTable view = Utilities.actionsGlobalContext().lookup(DataResultViewerTable.class);
@ -199,11 +199,11 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
            final int lastTerm = terms.size() - 1;
            int curTerm = 0;
            for (Term term : terms) {
-                final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm(), true);
+                final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm(), true, false);
                if (!termS.contains("*")) {
                    highlightQuery.append(termS);
                    if (lastTerm != curTerm) {
-                        highlightQuery.append(" ");
+                        highlightQuery.append(" "); //acts as OR ||
                    }
                }
            }
@ -304,25 +304,27 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {

                final String contentStr = KeywordSearch.getServer().getCore().getSolrContent(content);

-                //make sure the file contains a match (this gets rid of large number of false positives)
-                //TODO option in GUI to include approximate matches (faster)
-                boolean matchFound = false;
+                //postprocess
+                //make sure Solr result contains a match (this gets rid of large number of false positives)
+                boolean postprocess = true;
+                boolean matchFound = true;
+                if (postprocess) {
                    if (contentStr != null) {//if not null, some error getting from Solr, handle it by not filtering out
                        //perform java regex to validate match from Solr
                        String origQuery = thingContent.getQuery();
                        
-                    //escape the regex query because it may contain special characters from the previous match
-                    //since it's a match result, we can assume literal pattern
+                        //since query is a match result, we can assume literal pattern
                        origQuery = Pattern.quote(origQuery);
                        Pattern p = Pattern.compile(origQuery, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);

                        Matcher m = p.matcher(contentStr);
                        matchFound = m.find();
                    }
+                }

                if (matchFound) {
                    Node kvNode = new KeyValueNode(thingContent, Children.LEAF);
-                    //wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization
+                    //wrap in KeywordSearchFilterNode for the markup content
                    HighlightedMatchesSource highlights = new HighlightedMatchesSource(content, query);
                    return new KeywordSearchFilterNode(highlights, kvNode, query);
                } else {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchUtil.java
@ -16,7 +16,6 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-
 package org.sleuthkit.autopsy.keywordsearch;

 import java.awt.Component;
@ -31,8 +30,10 @@ import org.sleuthkit.datamodel.TskException;

 public class KeywordSearchUtil {

-    public enum DIALOG_MESSAGE_TYPE {ERROR, WARN, INFO};
+    public enum DIALOG_MESSAGE_TYPE {

+        ERROR, WARN, INFO
+    };
    private static final Logger logger = Logger.getLogger(KeywordSearchUtil.class.getName());

    public static String buildDirName(FsContent f) {
@ -65,7 +66,7 @@ public class KeywordSearchUtil {
     * such as /+-&|!(){}[]^"~*?:\ and treat the whole query as literal word
     * @return encoded query
     */
-    public static String escapeLuceneQuery(String query, boolean escapeLuceneChars) {
+    public static String escapeLuceneQuery(String query, boolean escapeLuceneChars, boolean encode) {
        String queryEscaped = null;
        String inputString = query;

@ -79,27 +80,29 @@ public class KeywordSearchUtil {
                }
                sb.append(c);
            }
-            inputString = sb.toString();
+            queryEscaped = inputString = sb.toString();
        }

+        if (encode) {
            try {
                queryEscaped = URLEncoder.encode(inputString, "UTF-8");
-        }
-        catch (UnsupportedEncodingException ex) {
+            } catch (UnsupportedEncodingException ex) {
                logger.log(Level.SEVERE, "Error escaping URL query, should not happen.", ex);
                queryEscaped = query;
            }
+        }
        return queryEscaped;
    }

-    
    public static void displayDialog(final String title, final String message, final DIALOG_MESSAGE_TYPE type) {
        int messageType;
-        if (type == DIALOG_MESSAGE_TYPE.ERROR)
+        if (type == DIALOG_MESSAGE_TYPE.ERROR) {
            messageType = JOptionPane.ERROR_MESSAGE;
-        else if (type == DIALOG_MESSAGE_TYPE.WARN)
+        } else if (type == DIALOG_MESSAGE_TYPE.WARN) {
            messageType = JOptionPane.WARNING_MESSAGE;
-        else messageType = JOptionPane.INFORMATION_MESSAGE;
+        } else {
+            messageType = JOptionPane.INFORMATION_MESSAGE;
+        }

        final Component parentComponent = null; // Use default window frame.
        JOptionPane.showMessageDialog(
@ -108,4 +111,20 @@ public class KeywordSearchUtil {
                title,
                messageType);
    }
+
+    public static boolean displayConfirmDialog(final String title, final String message, final DIALOG_MESSAGE_TYPE type) {
+        int messageType;
+        if (type == DIALOG_MESSAGE_TYPE.ERROR) {
+            messageType = JOptionPane.ERROR_MESSAGE;
+        } else if (type == DIALOG_MESSAGE_TYPE.WARN) {
+            messageType = JOptionPane.WARNING_MESSAGE;
+        } else {
+            messageType = JOptionPane.INFORMATION_MESSAGE;
+        }
+        if (JOptionPane.showConfirmDialog(null, message, title, JOptionPane.YES_NO_OPTION, messageType) == JOptionPane.YES_OPTION) {
+            return true;
+        } else {
+            return false;
+        }
+    }
 }
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
@ -56,7 +56,7 @@ public class LuceneQuery implements KeywordSearchQuery {

    @Override
    public void escape() {
-        queryEscaped = KeywordSearchUtil.escapeLuceneQuery(query, true);
+        queryEscaped = KeywordSearchUtil.escapeLuceneQuery(query, true, true);
        isEscaped = true;
    }

--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermComponentQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermComponentQuery.java
@ -168,11 +168,12 @@ public class TermComponentQuery implements KeywordSearchQuery {
        final int lastTerm = terms.size() - 1;
        int curTerm = 0;
        for (Term term : terms) {
-            final String termS = term.getTerm();
+            final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm(), true, false);
+            //final String termS = term.getTerm();
            if (!termS.contains("*")) {
-                filesQueryB.append(termS);
+                filesQueryB.append(TERMS_SEARCH_FIELD).append(":").append(termS);
                if (curTerm != lastTerm) {
-                    filesQueryB.append(" ");
+                    filesQueryB.append(" "); //acts as OR ||
                }
            }
            ++curTerm;
@ -181,7 +182,7 @@ public class TermComponentQuery implements KeywordSearchQuery {
        
        if (!terms.isEmpty()) {
            LuceneQuery filesQuery = new LuceneQuery(filesQueryB.toString());
-            filesQuery.escape();
+            //filesQuery.escape();
            try {
                uniqueMatches = filesQuery.performQuery();
            } catch (RuntimeException e) {
@ -190,8 +191,10 @@ public class TermComponentQuery implements KeywordSearchQuery {
        }


+        //result postprocessing
        //filter out non-matching files using the original query (whether literal or not)
-        //TODO this could be costly, for now just testing how it performs
+        boolean postprocess = false;
+        if (postprocess) {
            for (FsContent f : uniqueMatches) {
                Pattern p = Pattern.compile(queryEscaped, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
                final String contentStr = KeywordSearch.getServer().getCore().getSolrContent(f);
@ -200,6 +203,9 @@ public class TermComponentQuery implements KeywordSearchQuery {
                    results.add(f);
                }
            }
+        } else {
+            results.addAll(uniqueMatches);
+        }