From 7dea03bf599b18f2e45efe8adc77aa7f89e16a83 Mon Sep 17 00:00:00 2001 From: millmanorama Date: Thu, 2 Feb 2017 15:12:40 +0100 Subject: [PATCH] allow regex queries to match at start and end of content. fix seperation of file name and content --- .../org/sleuthkit/autopsy/keywordsearch/RegexQuery.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index f188890591..aa56741ec5 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -238,7 +238,7 @@ final class RegexQuery implements KeywordSearchQuery { final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString(); final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString()); - ArrayList content_str = (ArrayList) solrDoc.get(Server.Schema.CONTENT_STR.toString()); + final Collection content_str = solrDoc.getFieldValues(Server.Schema.CONTENT_STR.toString()); // By default, we create keyword hits on whitespace or punctuation character boundaries. // Having a set of well defined boundary characters produces hits that can @@ -250,13 +250,14 @@ final class RegexQuery implements KeywordSearchQuery { String keywordTokenRegex = // If the given search string starts with .*, we ignore our default // boundary prefix characters - (queryStringContainsWildcardPrefix ? "" : BOUNDARY_CHARS) //NON-NLS + (queryStringContainsWildcardPrefix ? "" : "(^|" + BOUNDARY_CHARS + ")") //NON-NLS + keywordString // If the given search string ends with .*, we ignore our default // boundary suffix characters - + (queryStringContainsWildcardSuffix ? "" : BOUNDARY_CHARS); //NON-NLS + + (queryStringContainsWildcardSuffix ? "" : "($|" + BOUNDARY_CHARS + ")"); //NON-NLS - for (String content : content_str) { + for (Object content_obj : content_str) { + String content = (String) content_obj; Matcher hitMatcher = Pattern.compile(keywordTokenRegex).matcher(content); int offset = 0;