allow regex queries to match at start and end of content. fix seperation of file name and content

2025-07-16 01:37:43 +00:00 · 2017-02-02 15:12:40 +01:00 · 2017-02-02 15:12:40 +01:00 · 7dea03bf59
commit 7dea03bf59
parent 3f9b161a71
1 changed files with 5 additions and 4 deletions
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java
@ -238,7 +238,7 @@ final class RegexQuery implements KeywordSearchQuery {
        final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
        final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());

-        ArrayList<String> content_str = (ArrayList<String>) solrDoc.get(Server.Schema.CONTENT_STR.toString());
+        final Collection<Object> content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString());

        // By default, we create keyword hits on whitespace or punctuation character boundaries.
        // Having a set of well defined boundary characters produces hits that can
@ -250,13 +250,14 @@ final class RegexQuery implements KeywordSearchQuery {
        String keywordTokenRegex
                = // If the given search string starts with .*, we ignore our default
                // boundary prefix characters
-                (queryStringContainsWildcardPrefix ? "" : BOUNDARY_CHARS) //NON-NLS
+                (queryStringContainsWildcardPrefix ? "" : "(^|" + BOUNDARY_CHARS + ")") //NON-NLS
                + keywordString
                // If the given search string ends with .*, we ignore our default
                // boundary suffix characters
-                + (queryStringContainsWildcardSuffix ? "" : BOUNDARY_CHARS); //NON-NLS
+                + (queryStringContainsWildcardSuffix ? "" : "($|" + BOUNDARY_CHARS + ")"); //NON-NLS

-        for (String content : content_str) {
+        for (Object content_obj : content_str) {
+            String content = (String) content_obj;
            Matcher hitMatcher = Pattern.compile(keywordTokenRegex).matcher(content);
            int offset = 0;