Merge pull request #2279 from eugene7646/kws

1878 Fixed a bug where indexed text content viewer erroneously reported "no keyword hits"
This commit is contained in:
Richard Cordovano 2016-07-15 17:15:16 -04:00 committed by GitHub
commit ef7226e1bc
3 changed files with 33 additions and 53 deletions

View File

@ -64,7 +64,7 @@ ExtractedContentViewer.getTitle=Indexed Text
ExtractedContentViewer.getSolrContent.knownFileMsg=<p style\=''font-style\:italic''>{0} is a known file (based on MD5 hash) and does not have text in the index.</p>
ExtractedContentViewer.getSolrContent.noTxtYetMsg=<p style\=''font-style\:italic''>{0} does not have text in the index.<br/>It may have no text, not been analyzed yet, or keyword search was not enabled during ingest.</p>
ExtractedContentViewer.getSolrContent.txtBodyItal=<span style\=''font-style\:italic''>{0}</span>
HighlightedMatchesSource.getMarkup.noMatchMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>There were no keyword hits on this page. <br />Keyword could have been in file name. <br />Advance to another page for hits or choose Extracted Text to view original text..</span></pre></html>
HighlightedMatchesSource.getMarkup.noMatchMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>There were no keyword hits on this page. <br />Keyword could have been in file name. <br />Advance to another page for hits or to view original text choose File Text <br />in the drop down menu to the right..</span></pre></html>
HighlightedMatchesSource.getMarkup.queryFailedMsg=<html><pre><span style\\\\\='background\\\\\:yellow'>Failed to retrieve keyword hit results. <br />Confirm that Autopsy can connect to the Solr server. <br /></span></pre></html>
HighlightedMatchesSource.toString=Search Results
Installer.reportPortError=Indexing server port {0} is not available. Check if your security software does not block {1} and consider changing {2} in {3} property file in the application user folder. Then try rebooting your system if another process was causing the conflict.

View File

@ -298,45 +298,17 @@ class HighlightedText implements IndexedText, TextMarkupLookup {
String highLightField = null;
String highlightQuery = keywordHitQuery;
if (isRegex) {
highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
//escape special lucene chars if not already escaped (if not a compound query)
//TODO a better way to mark it a compound highlight query
final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
if (!highlightQuery.contains(findSubstr)) {
highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
}
} else {
highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
//escape special lucene chars always for literal queries query
highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
}
SolrQuery q = new SolrQuery();
q.setShowDebugInfo(DEBUG); //debug
String queryStr = null;
if (isRegex) {
StringBuilder sb = new StringBuilder();
sb.append(highLightField).append(":");
if (group) {
sb.append("\"");
}
sb.append(highlightQuery);
if (group) {
sb.append("\"");
}
queryStr = sb.toString();
} else {
//use default field, simplifies query
//always force grouping/quotes
queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
}
q.setQuery(queryStr);
// input query has already been properly constructed and escaped
q.setQuery(keywordHitQuery);
String contentIdStr = Long.toString(this.objectId);
if (hasChunks) {
@ -367,7 +339,6 @@ class HighlightedText implements IndexedText, TextMarkupLookup {
Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
if (responseHighlightID == null) {
return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
}
List<String> contentHighlights = responseHighlightID.get(highLightField);
if (contentHighlights == null) {
@ -379,7 +350,8 @@ class HighlightedText implements IndexedText, TextMarkupLookup {
return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
}
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
} catch (Exception ex) {
logger.log(Level.WARNING, "Error executing Solr highlighting query: " + keywordHitQuery, ex); //NON-NLS
return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg");
}
}

View File

@ -247,54 +247,62 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* @return
*/
private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, Content content) {
String highlightQueryEscaped;
if (literal_query) {
//literal, treat as non-regex, non-term component query
highlightQueryEscaped = query.getQueryString();
return constructEscapedSolrQuery(query.getQueryString(), literal_query);
} else {
//construct a Solr query using aggregated terms to get highlighting
//the query is executed later on demand
StringBuilder highlightQuery = new StringBuilder();
if (queryResults.getKeywords().size() == 1) {
//simple case, no need to process subqueries and do special escaping
Keyword term = queryResults.getKeywords().iterator().next();
highlightQuery.append(term.toString());
return constructEscapedSolrQuery(term.getQuery(), literal_query);
} else {
//find terms for this content hit
List<String> hitTerms = new ArrayList<>();
List<Keyword> hitTerms = new ArrayList<>();
for (Keyword keyword : queryResults.getKeywords()) {
for (KeywordHit hit : queryResults.getResults(keyword)) {
if (hit.getContent().equals(content)) {
hitTerms.add(keyword.toString());
hitTerms.add(keyword);
break; //go to next term
}
}
}
StringBuilder highlightQuery = new StringBuilder();
final int lastTerm = hitTerms.size() - 1;
int curTerm = 0;
for (String term : hitTerms) {
//escape subqueries, they shouldn't be escaped again later
final String termS = KeywordSearchUtil.escapeLuceneQuery(term);
highlightQuery.append("\"");
highlightQuery.append(termS);
highlightQuery.append("\"");
for (Keyword term : hitTerms) {
//escape subqueries, MAKE SURE they are not escaped again later
highlightQuery.append(constructEscapedSolrQuery(term.getQuery(), literal_query));
if (lastTerm != curTerm) {
highlightQuery.append(" "); //acts as OR ||
//force HIGHLIGHT_FIELD_REGEX index and stored content
//in each term after first. First term taken care by HighlightedMatchesSource
highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":");
}
++curTerm;
}
return highlightQuery.toString();
}
//String highlightQueryEscaped = KeywordSearchUtil.escapeLuceneQuery(highlightQuery.toString());
highlightQueryEscaped = highlightQuery.toString();
}
return highlightQueryEscaped;
}
/**
* Constructs a complete, escaped Solr query that is ready to be used.
*
* @param query keyword term to be searched for
* @param literal_query flag whether query is literal or regex
* @return Solr query string
*/
private String constructEscapedSolrQuery(String query, boolean literal_query) {
StringBuilder highlightQuery = new StringBuilder();
String highLightField;
if (literal_query) {
highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
} else {
highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
}
highlightQuery.append(highLightField).append(":").append("\"").append(KeywordSearchUtil.escapeLuceneQuery(query)).append("\"");
return highlightQuery.toString();
}
@Override