Merge pull request #2425 from esaunders/kws_highlighting_fix

Kws highlighting and multi page hit fixes
This commit is contained in:
Richard Cordovano 2016-12-07 16:44:08 -05:00 committed by GitHub
commit f11f46a48f
2 changed files with 2 additions and 36 deletions

View File

@ -84,7 +84,7 @@ class HighlightedText implements IndexedText, TextMarkupLookup {
//when the results are not known and need to requery to get hits
HighlightedText(long objectId, String solrQuery, boolean isRegex, String originalQuery) {
this(objectId, solrQuery, isRegex);
this(objectId, KeywordSearchUtil.quoteQuery(solrQuery), isRegex);
this.originalQuery = originalQuery;
}

View File

@ -206,7 +206,6 @@ class LuceneQuery implements KeywordSearchQuery {
QueryResponse response;
SolrDocumentList resultList;
Map<String, Map<String, List<String>>> highlightResponse;
Set<SolrDocument> uniqueSolrDocumentsWithHits;
response = solrServer.query(q, METHOD.POST);
@ -215,9 +214,6 @@ class LuceneQuery implements KeywordSearchQuery {
// objectId_chunk -> "text" -> List of previews
highlightResponse = response.getHighlighting();
// get the unique set of files with hits
uniqueSolrDocumentsWithHits = filterOneHitPerDocument(resultList);
// cycle through results in sets of MAX_RESULTS
for (int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
q.setStart(start);
@ -232,7 +228,7 @@ class LuceneQuery implements KeywordSearchQuery {
return matches;
}
for (SolrDocument resultDoc : uniqueSolrDocumentsWithHits) {
for (SolrDocument resultDoc : resultList) {
KeywordHit contentHit;
try {
contentHit = createKeywordtHit(resultDoc, highlightResponse, sleuthkitCase);
@ -297,36 +293,6 @@ class LuceneQuery implements KeywordSearchQuery {
return q;
}
/**
* Create the minimum set of documents. Ignores chunk IDs. Only one hit per
* file in results.
*
* @param resultList
*
* @return
*/
private Set<SolrDocument> filterOneHitPerDocument(SolrDocumentList resultList) {
// sort the list so that we consistently pick the same chunk each time.
// note this sort is doing a string comparison and not an integer comparison, so
// chunk 10 will be smaller than chunk 9.
Collections.sort(resultList, new Comparator<SolrDocument>() {
@Override
public int compare(SolrDocument left, SolrDocument right) {
// ID is in the form of ObjectId_Chunk
String leftID = left.getFieldValue(Server.Schema.ID.toString()).toString();
String rightID = right.getFieldValue(Server.Schema.ID.toString()).toString();
return leftID.compareTo(rightID);
}
});
// NOTE: We could probably just iterate through the list and compare each ID with the
// previous ID to get the unique documents faster than using this set now that the list
// is sorted.
Set<SolrDocument> solrDocumentsWithMatches = new TreeSet<>(new SolrDocumentComparatorIgnoresChunkId());
solrDocumentsWithMatches.addAll(resultList);
return solrDocumentsWithMatches;
}
private KeywordHit createKeywordtHit(SolrDocument solrDoc, Map<String, Map<String, List<String>>> highlightResponse, SleuthkitCase caseDb) throws TskException {
/**
* Get the first snippet from the document if keyword search is