From 42511628a12b5d524d21bc108609346a249b9f8a Mon Sep 17 00:00:00 2001 From: millmanorama Date: Sat, 11 Mar 2017 15:38:28 +0100 Subject: [PATCH] replace broken paging with cursor based paging --- .../autopsy/keywordsearch/LuceneQuery.java | 73 ++++++++++--------- .../autopsy/keywordsearch/RegexQuery.java | 12 ++- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java index 7d98d3c3ff..dcd485582c 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java @@ -27,17 +27,23 @@ import java.util.logging.Level; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrRequest; import org.apache.solr.client.solrj.SolrRequest.METHOD; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.params.CursorMarkParams; +import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.EscapeUtil; import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; import org.sleuthkit.autopsy.coreutils.Version; +import static org.sleuthkit.autopsy.keywordsearch.RegexQuery.LOGGER; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE; import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute.ATTRIBUTE_TYPE; +import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskException; /** @@ -54,7 +60,7 @@ class LuceneQuery implements KeywordSearchQuery { private KeywordList keywordList = null; private final List filters = new ArrayList<>(); private String field = null; - private static final int MAX_RESULTS = 20000; + private static final int MAX_RESULTS_PER_CURSOR_MARK = 512; static final int SNIPPET_LENGTH = 50; static final String HIGHLIGHT_FIELD = Server.Schema.TEXT.toString(); @@ -65,7 +71,7 @@ class LuceneQuery implements KeywordSearchQuery { * * @param keyword */ - public LuceneQuery(KeywordList keywordList, Keyword keyword) { + LuceneQuery(KeywordList keywordList, Keyword keyword) { this.keywordList = keywordList; this.originalKeyword = keyword; @@ -89,7 +95,7 @@ class LuceneQuery implements KeywordSearchQuery { public void setSubstringQuery() { // Note that this is not a full substring search. Normally substring // searches will be done with TermComponentQuery objects instead. - keywordStringEscaped = keywordStringEscaped + "*"; + keywordStringEscaped += "*"; } @Override @@ -122,15 +128,15 @@ class LuceneQuery implements KeywordSearchQuery { public QueryResults performQuery() throws KeywordSearchModuleException, NoOpenCoreException { QueryResults results = new QueryResults(this); //in case of single term literal query there is only 1 term - boolean showSnippets = KeywordSearchSettings.getShowSnippets(); - results.addResult(new Keyword(keywordString, true), performLuceneQuery(showSnippets)); + results.addResult(new Keyword(keywordString, true), + performLuceneQuery(KeywordSearchSettings.getShowSnippets())); return results; } @Override public boolean validate() { - return keywordString != null && !keywordString.equals(""); + return StringUtils.isNotBlank(keywordString); } @Override @@ -180,7 +186,7 @@ class LuceneQuery implements KeywordSearchQuery { bba.addAttributes(attributes); //write out to bb writeResult.add(attributes); return writeResult; - } catch (TskException e) { + } catch (TskCoreException e) { logger.log(Level.WARNING, "Error adding bb attributes to artifact", e); //NON-NLS } return null; @@ -191,46 +197,41 @@ class LuceneQuery implements KeywordSearchQuery { * * @param snippets True if results should have a snippet * - * @return list of ContentHit objects. One per file with hit (ignores + * @return list of KeywordHit objects. One per file with hit (ignores * multiple hits of the word in the same doc) * - * @throws NoOpenCoreException */ private List performLuceneQuery(boolean snippets) throws KeywordSearchModuleException, NoOpenCoreException { List matches = new ArrayList<>(); - boolean allMatchesFetched = false; final Server solrServer = KeywordSearch.getServer(); + double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion()); - SolrQuery q = createAndConfigureSolrQuery(snippets); - QueryResponse response; - SolrDocumentList resultList; - Map>> highlightResponse; - - response = solrServer.query(q, METHOD.POST); - - resultList = response.getResults(); - // objectId_chunk -> "text" -> List of previews - highlightResponse = response.getHighlighting(); + SolrQuery solrQuery = createAndConfigureSolrQuery(snippets); final String strippedQueryString = StringUtils.strip(getQueryString(), "\""); - // cycle through results in sets of MAX_RESULTS - for (int start = 0; !allMatchesFetched; start = start + MAX_RESULTS) { - q.setStart(start); + String cursorMark = CursorMarkParams.CURSOR_MARK_START; + boolean allResultsProcessed = false; - allMatchesFetched = start + MAX_RESULTS >= resultList.getNumFound(); + while (!allResultsProcessed) { + solrQuery.set(CursorMarkParams.CURSOR_MARK_PARAM, cursorMark); + QueryResponse response = solrServer.query(solrQuery, SolrRequest.METHOD.POST); + SolrDocumentList resultList = response.getResults(); + // objectId_chunk -> "text" -> List of previews + Map>> highlightResponse = response.getHighlighting(); for (SolrDocument resultDoc : resultList) { try { - /* for each result, check that the first occurence of that + /* + * for each result, check that the first occurence of that * term is before the window. if all the ocurences start * within the window, don't record them for this chunk, they - * will get picked up in the next one. */ + * will get picked up in the next one. + */ final String docId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString(); final Integer chunkSize = (Integer) resultDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString()); final Collection content = resultDoc.getFieldValues(Server.Schema.CONTENT_STR.toString()); - double indexSchemaVersion = NumberUtils.toDouble(KeywordSearch.getServer().getIndexInfo().getSchemaVersion()); if (indexSchemaVersion < 2.0) { //old schema versions don't support chunk_size or the content_str fields, so just accept hits matches.add(createKeywordtHit(highlightResponse, docId)); @@ -250,11 +251,17 @@ class LuceneQuery implements KeywordSearchQuery { return matches; } } + String nextCursorMark = response.getNextCursorMark(); + if (cursorMark.equals(nextCursorMark)) { + allResultsProcessed = true; + } + cursorMark = nextCursorMark; + } return matches; } - /** + /* * Create the query object for the stored keyword * * @param snippets True if query should request snippets @@ -271,17 +278,17 @@ class LuceneQuery implements KeywordSearchQuery { // Run the query against an optional alternative field. if (field != null) { //use the optional field - StringBuilder sb = new StringBuilder(); - sb.append(field).append(":").append(theQueryStr); - theQueryStr = sb.toString(); + theQueryStr = field + ":" + theQueryStr; } q.setQuery(theQueryStr); - q.setRows(MAX_RESULTS); + q.setRows(MAX_RESULTS_PER_CURSOR_MARK); + // Setting the sort order is necessary for cursor based paging to work. + q.setSort(SolrQuery.SortClause.asc(Server.Schema.ID.toString())); q.setFields(Server.Schema.ID.toString(), Server.Schema.CHUNK_SIZE.toString(), Server.Schema.CONTENT_STR.toString()); - q.addSort(Server.Schema.ID.toString(), SolrQuery.ORDER.asc); + for (KeywordQueryFilter filter : filters) { q.addFilterQuery(filter.toString()); } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index fcf0881272..c7004b1fa7 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -81,7 +81,7 @@ final class RegexQuery implements KeywordSearchQuery { private final Keyword originalKeyword; // The regular expression originalKeyword used to perform the search. private String field = Server.Schema.CONTENT_STR.toString(); private final String keywordString; - static final private int MAX_RESULTS = 512; + static final private int MAX_RESULTS_PER_CURSOR_MARK = 512; private boolean escaped; private String escapedQuery; @@ -154,7 +154,6 @@ final class RegexQuery implements KeywordSearchQuery { @Override public QueryResults performQuery() throws NoOpenCoreException { - QueryResults results = new QueryResults(this); final Server solrServer = KeywordSearch.getServer(); SolrQuery solrQuery = new SolrQuery(); @@ -187,12 +186,12 @@ final class RegexQuery implements KeywordSearchQuery { .map(KeywordQueryFilter::toString) .forEach(solrQuery::addFilterQuery); - solrQuery.setRows(MAX_RESULTS); + solrQuery.setRows(MAX_RESULTS_PER_CURSOR_MARK); // Setting the sort order is necessary for cursor based paging to work. solrQuery.setSort(SortClause.asc(Server.Schema.ID.toString())); String cursorMark = CursorMarkParams.CURSOR_MARK_START; - SolrDocumentList resultList = null; + SolrDocumentList resultList ; boolean allResultsProcessed = false; while (!allResultsProcessed) { @@ -218,15 +217,14 @@ final class RegexQuery implements KeywordSearchQuery { } cursorMark = nextCursorMark; } catch (KeywordSearchModuleException ex) { - LOGGER.log(Level.SEVERE, "Error executing Lucene Solr Query: " + keywordString, ex); //NON-NLS + LOGGER.log(Level.SEVERE, "Error executing Regex Solr Query: " + keywordString, ex); //NON-NLS MessageNotifyUtil.Notify.error(NbBundle.getMessage(Server.class, "Server.query.exception.msg", keywordString), ex.getCause().getMessage()); } } - + QueryResults results = new QueryResults(this); for (Keyword k : hitsMultiMap.keySet()) { results.addResult(k, hitsMultiMap.get(k)); } - return results; }