Fixed several bugs in Solr highlighting query construction and escaping

This commit is contained in:
Eugene Livis 2016-07-15 12:30:12 -04:00
parent b29681356d
commit d3b3efa657
2 changed files with 30 additions and 17 deletions

View File

@ -298,26 +298,28 @@ class HighlightedText implements IndexedText, TextMarkupLookup {
String highLightField = null;
String highlightQuery = keywordHitQuery;
//String highlightQuery = keywordHitQuery;
if (isRegex) {
highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
//escape special lucene chars if not already escaped (if not a compound query)
//TODO a better way to mark it a compound highlight query
final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
if (!highlightQuery.contains(findSubstr)) {
highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
}
// ELDEBUG:
//final String findSubstr = LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":";
//if (!highlightQuery.contains(findSubstr)) {
// highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
//}
} else {
highLightField = LuceneQuery.HIGHLIGHT_FIELD_LITERAL;
//escape special lucene chars always for literal queries query
highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
//highlightQuery = KeywordSearchUtil.escapeLuceneQuery(highlightQuery);
}
SolrQuery q = new SolrQuery();
q.setShowDebugInfo(DEBUG); //debug
String queryStr = null;
/*String queryStr = null;
if (isRegex) {
StringBuilder sb = new StringBuilder();
@ -334,9 +336,10 @@ class HighlightedText implements IndexedText, TextMarkupLookup {
//use default field, simplifies query
//always force grouping/quotes
queryStr = KeywordSearchUtil.quoteQuery(highlightQuery);
}
}*/
q.setQuery(queryStr);
//q.setQuery(queryStr);
q.setQuery(keywordHitQuery);
String contentIdStr = Long.toString(this.objectId);
if (hasChunks) {

View File

@ -247,19 +247,20 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
* @return
*/
private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, Content content) {
String highlightQueryEscaped;
//String highlightQueryEscaped;
StringBuilder highlightQuery = new StringBuilder();
if (literal_query) {
//literal, treat as non-regex, non-term component query
highlightQueryEscaped = query.getQueryString();
highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_LITERAL).append(":").append(KeywordSearchUtil.escapeLuceneQuery(query.getQueryString()));
} else {
//construct a Solr query using aggregated terms to get highlighting
//the query is executed later on demand
StringBuilder highlightQuery = new StringBuilder();
if (queryResults.getKeywords().size() == 1) {
//simple case, no need to process subqueries and do special escaping
Keyword term = queryResults.getKeywords().iterator().next();
highlightQuery.append(term.toString());
//highlightQuery.append(term.toString());
highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":").append(KeywordSearchUtil.escapeLuceneQuery(term.toString()));
} else {
//find terms for this content hit
List<String> hitTerms = new ArrayList<>();
@ -276,26 +277,35 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
int curTerm = 0;
for (String term : hitTerms) {
//escape subqueries, they shouldn't be escaped again later
//StringBuilder currentKeywordQuery = new StringBuilder();
//currentKeywordQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":").append(KeywordSearchUtil.escapeLuceneQuery(term));
//highlightQuery.append(KeywordSearchUtil.quoteQuery(currentKeywordQuery.toString()));
highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":").append(KeywordSearchUtil.escapeLuceneQuery(term));
/*final String termS = KeywordSearchUtil.escapeLuceneQuery(term);
highlightQuery.append("\"");
highlightQuery.append(termS);
highlightQuery.append("\"");*/
highlightQuery.append(term); // ELDEBUG
//highlightQuery.append(term); // ELDEBUG
if (lastTerm != curTerm) {
highlightQuery.append(" "); //acts as OR ||
//force HIGHLIGHT_FIELD_REGEX index and stored content
//in each term after first. First term taken care by HighlightedMatchesSource
highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":");
//highlightQuery.append(LuceneQuery.HIGHLIGHT_FIELD_REGEX).append(":");
}
++curTerm;
}
}
//String highlightQueryEscaped = KeywordSearchUtil.escapeLuceneQuery(highlightQuery.toString());
highlightQueryEscaped = highlightQuery.toString();
//highlightQueryEscaped = highlightQuery.toString();
}
return highlightQueryEscaped;
return highlightQuery.toString();
}
@Override