From 23ec64c45918b5b824951d55ffd97f945b88818f Mon Sep 17 00:00:00 2001 From: Jeff Wallace Date: Fri, 6 Dec 2013 15:19:22 -0500 Subject: [PATCH] Cleaned up performQuery --- .../autopsy/keywordsearch/LuceneQuery.java | 222 ++++++++++-------- 1 file changed, 121 insertions(+), 101 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java index 5a177b580b..4b4a1228be 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java @@ -34,6 +34,7 @@ import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.TermsResponse.Term; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.openide.util.Exceptions; import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.coreutils.EscapeUtil; import org.sleuthkit.autopsy.coreutils.Version; @@ -132,7 +133,7 @@ public class LuceneQuery implements KeywordSearchQuery { public Map> performQuery() throws NoOpenCoreException { Map> results = new HashMap>(); //in case of single term literal query there is only 1 term - results.put(keywordString, performLuceneQuery(false)); + results.put(keywordString, performLuceneQuery(true)); return results; } @@ -197,15 +198,55 @@ public class LuceneQuery implements KeywordSearchQuery { */ private List performLuceneQuery(boolean snippets) throws NoOpenCoreException { - List matches = new ArrayList(); - + List matches = new ArrayList<>(); boolean allMatchesFetched = false; - final Server solrServer = KeywordSearch.getServer(); + + SolrQuery q = createAndConfigureSolrQuery(snippets); + for (int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) { + q.setStart(start); + + try { + QueryResponse response = solrServer.query(q, METHOD.POST); + SolrDocumentList resultList = response.getResults(); + Map>> highlightResponse = response.getHighlighting(); + Set solrDocumentsWithMatches = filterDuplicateSolrDocuments(resultList); + + allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound(); + + SleuthkitCase sleuthkitCase; + try { + sleuthkitCase = Case.getCurrentCase().getSleuthkitCase(); + } catch (IllegalStateException ex) { + //no case open, must be just closed + return matches; + } + + for (SolrDocument resultDoc : solrDocumentsWithMatches) { + ContentHit contentHit; + try { + contentHit = createContentHitFromQueryResults(resultDoc, highlightResponse, snippets, sleuthkitCase); + } catch (TskException ex) { + return matches; + } + matches.add(contentHit); + } + + } catch (NoOpenCoreException ex) { + logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex); + throw ex; + } catch (KeywordSearchModuleException ex) { + logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex); + } + + } + return matches; + } + + private SolrQuery createAndConfigureSolrQuery(boolean snippets) { SolrQuery q = new SolrQuery(); q.setShowDebugInfo(DEBUG); //debug - //set query, force quotes/grouping around all literal queries final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped); String theQueryStr = groupedQuery; @@ -215,14 +256,15 @@ public class LuceneQuery implements KeywordSearchQuery { sb.append(field).append(":").append(groupedQuery); theQueryStr = sb.toString(); } - q.setQuery(theQueryStr); q.setRows(MAX_RESULTS); + if (snippets) { q.setFields(Server.Schema.ID.toString(), Server.Schema.CONTENT.toString()); } else { q.setFields(Server.Schema.ID.toString()); } + for (KeywordQueryFilter filter : filters) { q.addFilterQuery(filter.toString()); } @@ -247,109 +289,63 @@ public class LuceneQuery implements KeywordSearchQuery { //analyze all content SLOW! consider lowering q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); } + + return q; + } - for (int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) { - q.setStart(start); + private Set filterDuplicateSolrDocuments(SolrDocumentList resultList) { + Set solrDocumentsWithMatches = new TreeSet<>(new SolrDocumentComparator()); + solrDocumentsWithMatches.addAll(resultList); + return solrDocumentsWithMatches; + } + + private ContentHit createContentHitFromQueryResults(SolrDocument resultDoc, Map>> highlightResponse, boolean snippets, SleuthkitCase sc) throws TskException { + ContentHit chit; + final String resultID = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString(); + final int sepIndex = resultID.indexOf(Server.ID_CHUNK_SEP); + String snippet = ""; + if (snippets) { + try { + snippet = highlightResponse.get(resultID).get(Server.Schema.CONTENT.toString()).get(0); + snippet = EscapeUtil.unEscapeHtml(snippet).trim(); + } catch (NullPointerException ex) { + snippet = ""; + } + } + if (sepIndex != -1) { + //file chunk result + final long fileID = Long.parseLong(resultID.substring(0, sepIndex)); + final int chunkId = Integer.parseInt(resultID.substring(sepIndex + 1)); + //logger.log(Level.INFO, "file id: " + fileID + ", chunkID: " + chunkId); try { - QueryResponse response = solrServer.query(q, METHOD.POST); - SolrDocumentList resultList = response.getResults(); - Map>> highlightResponse = response.getHighlighting(); - long results = resultList.getNumFound(); - Set solrDocumentsWithMatches = new TreeSet<>( - new Comparator() { - @Override - public int compare(SolrDocument left, SolrDocument right) { - String idName = Server.Schema.ID.toString(); - String leftID = left.getFieldValue(idName).toString(); - int index = leftID.indexOf(Server.ID_CHUNK_SEP); - if (index != -1) { - leftID = leftID.substring(0, index); - } - - String rightID = right.getFieldValue(idName).toString(); - index = rightID.indexOf(Server.ID_CHUNK_SEP); - if (index != -1) { - rightID = rightID.substring(0, index); - } - - return leftID.compareTo(rightID); - } - }); - solrDocumentsWithMatches.addAll(resultList); - allMatchesFetched = start + MAX_RESULTS >= results; - - SleuthkitCase sc = null; - try { - sc = Case.getCurrentCase().getSleuthkitCase(); - } catch (IllegalStateException ex) { - //no case open, must be just closed - return matches; + AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID); + chit = new ContentHit(resultAbstractFile, chunkId); + if (snippet.isEmpty() == false) { + chit.setSnippet(snippet); } - - for (SolrDocument resultDoc : solrDocumentsWithMatches) { - final String resultID = (String) resultDoc.getFieldValue(Server.Schema.ID.toString()); - - final int sepIndex = resultID.indexOf(Server.ID_CHUNK_SEP); - - String snippet = ""; - if (snippets) { - try { - snippet = highlightResponse.get(resultID).get(Server.Schema.CONTENT.toString()).get(0); - snippet = EscapeUtil.unEscapeHtml(snippet).trim(); - } catch (NullPointerException ex) { - snippet = ""; - } - } - - if (sepIndex != -1) { - //file chunk result - final long fileID = Long.parseLong(resultID.substring(0, sepIndex)); - final int chunkId = Integer.parseInt(resultID.substring(sepIndex + 1)); - //logger.log(Level.INFO, "file id: " + fileID + ", chunkID: " + chunkId); - - try { - AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID); - ContentHit chit = new ContentHit(resultAbstractFile, chunkId); - if (snippet.isEmpty() == false) { - chit.setSnippet(snippet); - } - matches.add(chit); - } catch (TskException ex) { - logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex); - //something wrong with case/db - return matches; - } - - } else { - final long fileID = Long.parseLong(resultID); - - try { - AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID); - ContentHit chit = new ContentHit(resultAbstractFile); - if (snippet.isEmpty() == false) { - chit.setSnippet(snippet); - } - matches.add(chit); - } catch (TskException ex) { - logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex); - //something wrong with case/db - return matches; - } - } - - } - - - } catch (NoOpenCoreException ex) { - logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex); + } catch (TskException ex) { + logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex); + //something wrong with case/db throw ex; - } catch (KeywordSearchModuleException ex) { - logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex); } + } else { + final long fileID = Long.parseLong(resultID); + + try { + AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID); + chit = new ContentHit(resultAbstractFile); + if (snippet.isEmpty() == false) { + chit.setSnippet(snippet); + } + } catch (TskException ex) { + logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex); + //something wrong with case/db + throw ex; + } } - return matches; + return chit; } /** @@ -461,4 +457,28 @@ public class LuceneQuery implements KeywordSearchQuery { return ""; } } + + /** + * Compares SolrDocuments based on their ID's. Two SolrDocuments with + * different chunk numbers are considered equal. + */ + private class SolrDocumentComparator implements Comparator { + @Override + public int compare(SolrDocument left, SolrDocument right) { + String idName = Server.Schema.ID.toString(); + String leftID = left.getFieldValue(idName).toString(); + int index = leftID.indexOf(Server.ID_CHUNK_SEP); + if (index != -1) { + leftID = leftID.substring(0, index); + } + + String rightID = right.getFieldValue(idName).toString(); + index = rightID.indexOf(Server.ID_CHUNK_SEP); + if (index != -1) { + rightID = rightID.substring(0, index); + } + + return leftID.compareTo(rightID); + } + } }