Cleaned up performQuery

2025-07-19 11:07:43 +00:00 · 2013-12-06 15:19:22 -05:00 · 2013-12-06 15:19:22 -05:00 · 23ec64c459
commit 23ec64c459
parent ec24c347bc
1 changed files with 121 additions and 101 deletions
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
@ -34,6 +34,7 @@ import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.TermsResponse.Term;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.openide.util.Exceptions;
 import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.coreutils.EscapeUtil;
 import org.sleuthkit.autopsy.coreutils.Version;
@ -132,7 +133,7 @@ public class LuceneQuery implements KeywordSearchQuery {
    public Map<String, List<ContentHit>> performQuery() throws NoOpenCoreException {
        Map<String, List<ContentHit>> results = new HashMap<String, List<ContentHit>>();
        //in case of single term literal query there is only 1 term
-        results.put(keywordString, performLuceneQuery(false));
+        results.put(keywordString, performLuceneQuery(true));
        return results;
    }
@ -197,15 +198,55 @@ public class LuceneQuery implements KeywordSearchQuery {
     */
    private List<ContentHit> performLuceneQuery(boolean snippets) throws NoOpenCoreException {
-        List<ContentHit> matches = new ArrayList<ContentHit>();
+        List<ContentHit> matches = new ArrayList<>();
        boolean allMatchesFetched = false;
        final Server solrServer = KeywordSearch.getServer();
        SolrQuery q = createAndConfigureSolrQuery(snippets);
        for (int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
            q.setStart(start);
            try {
                QueryResponse response = solrServer.query(q, METHOD.POST);
                SolrDocumentList resultList = response.getResults();
                Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
                Set<SolrDocument> solrDocumentsWithMatches = filterDuplicateSolrDocuments(resultList);
                allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound();
                SleuthkitCase sleuthkitCase;
                try {
                    sleuthkitCase = Case.getCurrentCase().getSleuthkitCase();
                } catch (IllegalStateException ex) {
                    //no case open, must be just closed
                    return matches;
                }
                for (SolrDocument resultDoc : solrDocumentsWithMatches) {
                    ContentHit contentHit;
                    try {
                        contentHit = createContentHitFromQueryResults(resultDoc, highlightResponse, snippets, sleuthkitCase);
                    } catch (TskException ex) {
                        return matches;
                    }
                    matches.add(contentHit);
                }
            } catch (NoOpenCoreException ex) {
                logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex);
                throw ex;
            } catch (KeywordSearchModuleException ex) {
                logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex);
            }
        }
        return matches;
    }
    private SolrQuery createAndConfigureSolrQuery(boolean snippets) {
        SolrQuery q = new SolrQuery();
        q.setShowDebugInfo(DEBUG); //debug
        //set query, force quotes/grouping around all literal queries
        final String groupedQuery = KeywordSearchUtil.quoteQuery(keywordStringEscaped);
        String theQueryStr = groupedQuery;
@ -215,14 +256,15 @@ public class LuceneQuery implements KeywordSearchQuery {
            sb.append(field).append(":").append(groupedQuery);
            theQueryStr = sb.toString();
        }
        q.setQuery(theQueryStr);
        q.setRows(MAX_RESULTS);
        if (snippets) {
            q.setFields(Server.Schema.ID.toString(), Server.Schema.CONTENT.toString());
        } else {
            q.setFields(Server.Schema.ID.toString());
        }
        for (KeywordQueryFilter filter : filters) {
            q.addFilterQuery(filter.toString());
        }
@ -247,109 +289,63 @@ public class LuceneQuery implements KeywordSearchQuery {
            //analyze all content SLOW! consider lowering
            q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED);
        }
        return q;
    }
-        for (int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) {
+    private Set<SolrDocument> filterDuplicateSolrDocuments(SolrDocumentList resultList) {
-            q.setStart(start);
+        Set<SolrDocument> solrDocumentsWithMatches = new TreeSet<>(new SolrDocumentComparator());
        solrDocumentsWithMatches.addAll(resultList);
        return solrDocumentsWithMatches;
    }
    private ContentHit createContentHitFromQueryResults(SolrDocument resultDoc, Map<String, Map<String, List<String>>> highlightResponse, boolean snippets, SleuthkitCase sc) throws TskException {
        ContentHit chit;
        final String resultID = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
        final int sepIndex = resultID.indexOf(Server.ID_CHUNK_SEP);
        String snippet = "";
        if (snippets) {
            try {
                snippet = highlightResponse.get(resultID).get(Server.Schema.CONTENT.toString()).get(0);
                snippet = EscapeUtil.unEscapeHtml(snippet).trim();
            } catch (NullPointerException ex) {
                snippet = "";
            }
        }
        if (sepIndex != -1) {
            //file chunk result
            final long fileID = Long.parseLong(resultID.substring(0, sepIndex));
            final int chunkId = Integer.parseInt(resultID.substring(sepIndex + 1));
            //logger.log(Level.INFO, "file id: " + fileID + ", chunkID: " + chunkId);
            try {
-                QueryResponse response = solrServer.query(q, METHOD.POST);
+                AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID);
-                SolrDocumentList resultList = response.getResults();
+                chit = new ContentHit(resultAbstractFile, chunkId);
-                Map<String, Map<String, List<String>>> highlightResponse = response.getHighlighting();
+                if (snippet.isEmpty() == false) {
-                long results = resultList.getNumFound();
+                    chit.setSnippet(snippet);
                Set<SolrDocument> solrDocumentsWithMatches = new TreeSet<>( 
                        new Comparator<SolrDocument>() {
                            @Override
                            public int compare(SolrDocument left, SolrDocument right) {
                                String idName = Server.Schema.ID.toString();
                                String leftID = left.getFieldValue(idName).toString();
                                int index = leftID.indexOf(Server.ID_CHUNK_SEP);
                                if (index != -1) {
                                    leftID = leftID.substring(0, index);
                                }
                                String rightID = right.getFieldValue(idName).toString();
                                index = rightID.indexOf(Server.ID_CHUNK_SEP);
                                if (index != -1) {
                                    rightID = rightID.substring(0, index);
                                }
                                return leftID.compareTo(rightID);
                            }
                });
                solrDocumentsWithMatches.addAll(resultList);
                allMatchesFetched = start + MAX_RESULTS >= results;
                SleuthkitCase sc = null;
                try {
                    sc = Case.getCurrentCase().getSleuthkitCase();
                } catch (IllegalStateException ex) {
                    //no case open, must be just closed
                    return matches;
                }
-
+            } catch (TskException ex) {
-                for (SolrDocument resultDoc : solrDocumentsWithMatches) {
+                logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex);
-                    final String resultID = (String) resultDoc.getFieldValue(Server.Schema.ID.toString());
+                //something wrong with case/db
                    final int sepIndex = resultID.indexOf(Server.ID_CHUNK_SEP);
                    String snippet = "";
                    if (snippets) {
                        try {
                            snippet = highlightResponse.get(resultID).get(Server.Schema.CONTENT.toString()).get(0);
                            snippet = EscapeUtil.unEscapeHtml(snippet).trim();
                        } catch (NullPointerException ex) {
                            snippet = "";
                        }
                    }
                    if (sepIndex != -1) {
                        //file chunk result
                        final long fileID = Long.parseLong(resultID.substring(0, sepIndex));
                        final int chunkId = Integer.parseInt(resultID.substring(sepIndex + 1));
                        //logger.log(Level.INFO, "file id: " + fileID + ", chunkID: " + chunkId);
                        try {
                            AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID);
                            ContentHit chit = new ContentHit(resultAbstractFile, chunkId);
                            if (snippet.isEmpty() == false) {
                                chit.setSnippet(snippet);
                            }
                            matches.add(chit);
                        } catch (TskException ex) {
                            logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex);
                            //something wrong with case/db
                            return matches;
                        }
                    } else {
                        final long fileID = Long.parseLong(resultID);
                        try {
                            AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID);
                            ContentHit chit = new ContentHit(resultAbstractFile);
                            if (snippet.isEmpty() == false) {
                                chit.setSnippet(snippet);
                            }
                            matches.add(chit);
                        } catch (TskException ex) {
                            logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex);
                            //something wrong with case/db
                            return matches;
                        }
                    }
                }
            } catch (NoOpenCoreException ex) {
                logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex);
                throw ex;
            } catch (KeywordSearchModuleException ex) {
                logger.log(Level.WARNING, "Error executing Lucene Solr Query: " + keywordString, ex);
            }
        } else {
            final long fileID = Long.parseLong(resultID);
            try {
                AbstractFile resultAbstractFile = sc.getAbstractFileById(fileID);
                chit = new ContentHit(resultAbstractFile);
                if (snippet.isEmpty() == false) {
                    chit.setSnippet(snippet);
                }
            } catch (TskException ex) {
                logger.log(Level.WARNING, "Could not get the AbstractFile for keyword hit, ", ex);
                //something wrong with case/db
                throw ex;
            }
        }
-        return matches;
+        return chit;
    }
    /**
@ -461,4 +457,28 @@ public class LuceneQuery implements KeywordSearchQuery {
            return "";
        }
    }
    /**
     * Compares SolrDocuments based on their ID's. Two SolrDocuments with
     * different chunk numbers are considered equal.
     */
    private class SolrDocumentComparator implements Comparator<SolrDocument> {
        @Override
        public int compare(SolrDocument left, SolrDocument right) {
            String idName = Server.Schema.ID.toString();
            String leftID = left.getFieldValue(idName).toString();
            int index = leftID.indexOf(Server.ID_CHUNK_SEP);
            if (index != -1) {
                leftID = leftID.substring(0, index);
            }
            String rightID = right.getFieldValue(idName).toString();
            index = rightID.indexOf(Server.ID_CHUNK_SEP);
            if (index != -1) {
                rightID = rightID.substring(0, index);
            }
            return leftID.compareTo(rightID);
        }
    }
 }