In addition to standard encoding of queries before sending over net, perform escaping of Lucene special characters to always treat Solr queries as words. This improves files query accuracy and highlighting dramatically and we assume user has no Lucene knowledge anyways.

This commit is contained in:
adam-m 2012-01-06 15:06:19 -05:00
parent 39e1c15e09
commit 6afe67afa0
5 changed files with 35 additions and 7 deletions

View File

@ -57,7 +57,7 @@ class HighlightedMatchesSource implements MarkupSource {
public String getMarkup() {
SolrQuery q = new SolrQuery();
final String queryEscaped = KeywordSearchUtil.escapeLuceneQuery(solrQuery);
final String queryEscaped = KeywordSearchUtil.escapeLuceneQuery(solrQuery, true);
q.setQuery(queryEscaped);
q.addFilterQuery("id:" + content.getId());

View File

@ -185,7 +185,7 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
StringBuilder highlightQuery = new StringBuilder();
Collection<Term> terms = tcq.getTerms();
for (Term term : terms) {
final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm(), true);
highlightQuery.append(termS);
highlightQuery.append(" ");
}

View File

@ -58,10 +58,32 @@ public class KeywordSearchUtil {
return dirName;
}
public static String escapeLuceneQuery(String query) {
/**
* Perform standard escaping / encoding into UTF-8 before sending over net
* @param query to be encoded
* @param escapeLuceneChars if true perform first escaping of Lucene specific special chars
* such as /+-&|!(){}[]^"~*?:\ and treat the whole query as literal word
* @return encoded query
*/
public static String escapeLuceneQuery(String query, boolean escapeLuceneChars) {
String queryEscaped = null;
String inputString = query;
if (escapeLuceneChars == true) {
final String ESCAPE_CHARS = "/+-&|!(){}[]^\"~*?:\\";
StringBuilder sb = new StringBuilder();
for (int i = 0; i< inputString.length(); ++i) {
char c = inputString.charAt(i);
if (ESCAPE_CHARS.contains(Character.toString(c)) ) {
sb.append("\\");
}
sb.append(c);
}
inputString = sb.toString();
}
try {
queryEscaped = URLEncoder.encode(query, "UTF-8");
queryEscaped = URLEncoder.encode(inputString, "UTF-8");
}
catch (UnsupportedEncodingException ex) {
logger.log(Level.SEVERE, "Error escaping URL query, should not happen.", ex);
@ -70,6 +92,7 @@ public class KeywordSearchUtil {
return queryEscaped;
}
public static void displayDialog(final String title, final String message, final DIALOG_MESSAGE_TYPE type) {
int messageType;
if (type == DIALOG_MESSAGE_TYPE.ERROR)

View File

@ -56,7 +56,7 @@ public class LuceneQuery implements KeywordSearchQuery {
@Override
public void escape() {
queryEscaped = KeywordSearchUtil.escapeLuceneQuery(query);
queryEscaped = KeywordSearchUtil.escapeLuceneQuery(query, true);
isEscaped = true;
}

View File

@ -165,11 +165,15 @@ public class TermComponentQuery implements KeywordSearchQuery {
//it's much more efficient and should yield the same file IDs as per match queries
//requires http POST query method due to potentially large query size
StringBuilder filesQueryB = new StringBuilder();
final int lastTerm = terms.size() -1;
int curTerm = 0;
for (Term term : terms) {
//final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
//final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm(), true);
final String termS = term.getTerm();
filesQueryB.append(termS);
filesQueryB.append(" ");
if (curTerm != lastTerm)
filesQueryB.append(" ");
++curTerm;
}
List<FsContent> uniqueMatches = new ArrayList<FsContent>();
@ -194,6 +198,7 @@ public class TermComponentQuery implements KeywordSearchQuery {
results.add(f);
}
}
return results;