From d3b3efa657d257ee43a5c391a8c35dc3ed8efb05 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 15 Jul 2016 12:30:12 -0400 Subject: [PATCH] Fixed several bugs in Solr highlighting query construction and escaping --- .../keywordsearch/HighlightedText.java | 21 ++++++++------- .../KeywordSearchResultFactory.java | 26 +++++++++++++------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java index 4bc5a5897a..1ed1941a1b 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java @@ -298,26 +298,28 @@ class HighlightedText implements IndexedText, TextMarkupLookup { String highLightField = null; - String highlightQuery = keywordHitQuery; + //String highlightQuery = keywordHitQuery; if (isRegex) { highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX; //escape special lucene chars if not already escaped (if not a compound query) //TODO a better way to mark it a compound highlight query - final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":"; - if (!highlightQuery.contains(findSubstr)) { - highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery); - } + + // ELDEBUG: + //final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":"; + //if (!highlightQuery.contains(findSubstr)) { + // highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery); + //} } else { highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL; //escape special lucene chars always for literal queries query - highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery); + //highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery); } SolrQuery q = new SolrQuery(); q.setShowDebugInfo(DEBUG); //debug - String queryStr = null; + /*String queryStr = null; if (isRegex) { StringBuilder sb = new StringBuilder(); @@ -334,9 +336,10 @@ class HighlightedText implements IndexedText, TextMarkupLookup { //use default field, simplifies query //always force grouping/quotes queryStr = KeywordSearchUtil.quoteQuery(highlightQuery); - } + }*/ - q.setQuery(queryStr); + //q.setQuery(queryStr); + q.setQuery(keywordHitQuery); String contentIdStr = Long.toString(this.objectId); if (hasChunks) { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java index 48ccf10e9b..aabe37f9fc 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java @@ -247,19 +247,20 @@ class KeywordSearchResultFactory extends ChildFactory { * @return */ private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, Content content) { - String highlightQueryEscaped; + //String highlightQueryEscaped; + StringBuilder highlightQuery = new StringBuilder(); if (literal_query) { //literal, treat as non-regex, non-term component query - highlightQueryEscaped = query.getQueryString(); + highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_LITERAL).append(":").append(KeywordSearchUtil.escapeLuceneQuery(query.getQueryString())); } else { //construct a Solr query using aggregated terms to get highlighting //the query is executed later on demand - StringBuilder highlightQuery = new StringBuilder(); if (queryResults.getKeywords().size() == 1) { //simple case, no need to process subqueries and do special escaping Keyword term = queryResults.getKeywords().iterator().next(); - highlightQuery.append(term.toString()); + //highlightQuery.append(term.toString()); + highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":").append(KeywordSearchUtil.escapeLuceneQuery(term.toString())); } else { //find terms for this content hit List hitTerms = new ArrayList<>(); @@ -276,26 +277,35 @@ class KeywordSearchResultFactory extends ChildFactory { int curTerm = 0; for (String term : hitTerms) { //escape subqueries, they shouldn't be escaped again later + //StringBuilder currentKeywordQuery = new StringBuilder(); + //currentKeywordQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":").append(KeywordSearchUtil.escapeLuceneQuery(term)); + //highlightQuery.append(KeywordSearchUtil.quoteQuery(currentKeywordQuery.toString())); + + highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":").append(KeywordSearchUtil.escapeLuceneQuery(term)); + /*final String termS = KeywordSearchUtil.escapeLuceneQuery(term); highlightQuery.append("\""); highlightQuery.append(termS); highlightQuery.append("\"");*/ - highlightQuery.append(term); // ELDEBUG + + //highlightQuery.append(term); // ELDEBUG if (lastTerm != curTerm) { highlightQuery.append(" "); //acts as OR || //force HIGHLIGHT_FIELD_REGEX index and stored content //in each term after first. First term taken care by HighlightedMatchesSource - highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":"); + + //highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":"); } ++curTerm; } } //String highlightQueryEscaped = KeywordSearchUtil.escapeLuceneQuery(highlightQuery.toString()); - highlightQueryEscaped = highlightQuery.toString(); + + //highlightQueryEscaped = highlightQuery.toString(); } - return highlightQueryEscaped; + return highlightQuery.toString(); } @Override