From e25f1473bdcc8fbff3ccfa0770490e66a3d85b18 Mon Sep 17 00:00:00 2001 From: millmanorama Date: Mon, 20 Feb 2017 13:47:30 +0100 Subject: [PATCH] more cleanup --- .../keywordsearch/HighlightedText.java | 189 ++++++++---------- 1 file changed, 86 insertions(+), 103 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java index f727c8de31..ef4c596f2e 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java @@ -30,6 +30,7 @@ import java.util.Set; import java.util.TreeSet; import java.util.logging.Level; import java.util.stream.Collectors; +import javax.annotation.concurrent.GuardedBy; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.SolrQuery; @@ -56,6 +57,8 @@ class HighlightedText implements IndexedText { private static final Logger logger = Logger.getLogger(HighlightedText.class.getName()); + private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT); + private static final BlackboardAttribute.Type TSK_KEYWORD_HIT_DOCUMENT_IDS = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_HIT_DOCUMENT_IDS); private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_TYPE = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_TYPE); private static final BlackboardAttribute.Type TSK_KEYWORD = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD); @@ -67,25 +70,32 @@ class HighlightedText implements IndexedText { final private Server solrServer = KeywordSearch.getServer(); private final long objectId; + /* + * The keywords to highlight + */ private final Set keywords = new HashSet<>(); - private int numberPages = 0; - private int currentPage = 0; + private int numberPages; + private int currentPage; + + @GuardedBy("this") + private boolean isPageInfoLoaded = false; - private boolean hasChunks = false; /** - * stores all pages/chunks that have hits as key, and number of hits as a - * value, or 0 if yet unknown + * stores the chunk number all pages/chunks that have hits as key, and + * number of hits as a value, or 0 if not yet known */ private final LinkedHashMap numberOfHitsPerPage = new LinkedHashMap<>(); /* * stored page num -> current hit number mapping */ private final HashMap currentHitPerPage = new HashMap<>(); + /* + * List of unique page/chunk numbers with hits + */ private final List pages = new ArrayList<>(); + private QueryResults hits = null; //original hits that may get passed in - private boolean isPageInfoLoaded = false; - private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT); private BlackboardArtifact artifact; private KeywordSearch.QueryType qt; private boolean isLiteral; @@ -95,10 +105,11 @@ class HighlightedText implements IndexedText { * search results. In that case we have the entire QueryResults object and * need to arrange the paging. * - * @param objectId - * @param originalQuery The original query string that produced the hit. If - * isRegex is true, this will be the regular expression - * that produced the hit. + * @param objectId The objectID of the content whose text will be + * highlighted. + * @param QueryResults The QueryResults for the ad-hoc search from whose + * results a selection was made leading to this + * HighlightedText. */ HighlightedText(long objectId, QueryResults hits) { this.objectId = objectId; @@ -108,13 +119,10 @@ class HighlightedText implements IndexedText { /** * This constructor is used when keyword hits are accessed from the "Keyword * Hits" node in the directory tree in Autopsy. In that case we have the - * keyword hit artifact which has the chunks for which a hit had previously - * been found to work out the paging for. + * keyword hit artifact which has the chunks (as + * TSK_KEYWORD_HIT_DOCUMENT_IDS attribute) to use to work out the paging. * - * - * @param artifact - * - * @throws TskCoreException + * @param artifact The artifact that was selected. */ HighlightedText(BlackboardArtifact artifact) { this.artifact = artifact; @@ -122,14 +130,48 @@ class HighlightedText implements IndexedText { } - private void loadPageInfoFromArtifact() throws TskCoreException, NumberFormatException { + /** + * This method figures out which pages / chunks have hits. Invoking it a + * second time has no effect. + */ + @Messages({"HighlightedText.query.exception.msg=Could not perform the query to get chunk info and get highlights:"}) + synchronized private void loadPageInfo() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException { + if (isPageInfoLoaded) { + return; + } + + this.numberPages = solrServer.queryNumFileChunks(this.objectId); + + if (artifact != null) { + loadPageInfoFromArtifact(); + } else if (numberPages != 0) { + // if the file has chunks, get pages with hits, sorted + loadPageInfoFromHits(); + } else { + //non-artifact, no chunks, everything is easy. + this.numberPages = 1; + this.currentPage = 1; + numberOfHitsPerPage.put(1, 0); + pages.add(1); + currentHitPerPage.put(1, 0); + isPageInfoLoaded = true; + } + } + + /** + * Figure out the paging info from the artifact that was used to create this + * HighlightedText + * + * @throws TskCoreException + */ + synchronized private void loadPageInfoFromArtifact() throws TskCoreException, KeywordSearchModuleException, NoOpenCoreException { final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString(); this.keywords.add(keyword); - final BlackboardAttribute qtAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE); - - qt = (qtAttribute != null) - ? KeywordSearch.QueryType.values()[qtAttribute.getValueInt()] : null; + //get the QueryType (if available) + final BlackboardAttribute queryTypetAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE); + qt = (queryTypetAttribute != null) + ? KeywordSearch.QueryType.values()[queryTypetAttribute.getValueInt()] : null; final BlackboardAttribute docIDsArtifact = artifact.getAttribute(TSK_KEYWORD_HIT_DOCUMENT_IDS); @@ -139,14 +181,10 @@ class HighlightedText implements IndexedText { String chunkIDsString = docIDsArtifact.getValueString(); Set chunkIDs = Arrays.stream(chunkIDsString.split(",")).map(StringUtils::strip).collect(Collectors.toSet()); for (String solrDocumentId : chunkIDs) { - int chunkID; final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR); - if (-1 != separatorIndex) { - chunkID = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1)); - } else { + int chunkID = (-1 == separatorIndex) ? 0 + : Integer.parseInt(solrDocumentId.substring(separatorIndex + 1)); - chunkID = 0; - } pages.add(chunkID); numberOfHitsPerPage.put(chunkID, 0); currentHitPerPage.put(chunkID, 0); @@ -160,85 +198,18 @@ class HighlightedText implements IndexedText { * we need to look them up */ Keyword keywordQuery = new Keyword(keyword, true); - KeywordSearchQuery chunksQuery - = new LuceneQuery(new KeywordList(Arrays.asList(keywordQuery)), keywordQuery); + KeywordSearchQuery chunksQuery = new LuceneQuery(new KeywordList(Arrays.asList(keywordQuery)), keywordQuery); chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId)); - try { - hits = chunksQuery.performQuery(); - loadPageInfoFromHits(); - } catch (KeywordSearchModuleException | NoOpenCoreException ex) { - logger.log(Level.SEVERE, "Could not perform the query to get chunk info and get highlights:" + keywordQuery.getSearchTerm(), ex); //NON-NLS - MessageNotifyUtil.Notify.error(Bundle.HighlightedText_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage()); - } - } - } - /** - * Return the string used to later have SOLR highlight the document with. - * - * @param query - * @param literal_query - * @param queryResults - * @param file - * - * @return - */ - /** - * Constructs a complete, escaped Solr query that is ready to be used. - * - * @param query keyword term to be searched for - * @param literal_query flag whether query is literal or regex - * - * @return Solr query string - */ - static private String constructEscapedSolrQuery(String query) { - return LuceneQuery.HIGHLIGHT_FIELD + ":" + "\"" + KeywordSearchUtil.escapeLuceneQuery(query) + "\""; - } - - /** - * The main goal of this method is to figure out which pages / chunks have - * hits. - */ - @Messages({"HighlightedText.query.exception.msg=Could not perform the query to get chunk info and get highlights:"}) - private void loadPageInfo() throws TskCoreException { - if (isPageInfoLoaded) { - return; - } - - try { - this.numberPages = solrServer.queryNumFileChunks(this.objectId); - } catch (KeywordSearchModuleException | NoOpenCoreException ex) { - logger.log(Level.WARNING, "Could not get number pages for content: {0}", this.objectId); //NON-NLS - return; - } - - if (this.numberPages == 0) { - hasChunks = false; - } else { - hasChunks = true; - } - - if (artifact != null) { - /* - * this could go in the constructor but is here to keep it near the - * functionaly similar code for non regex searches - */ loadPageInfoFromArtifact(); - } else if (hasChunks) { - // if the file has chunks, get pages with hits, sorted + hits = chunksQuery.performQuery(); loadPageInfoFromHits(); - } else { - //non-regex, no chunks - this.numberPages = 1; - this.currentPage = 1; - numberOfHitsPerPage.put(1, 0); - pages.add(1); - currentHitPerPage.put(1, 0); - isPageInfoLoaded = true; } - } - private void loadPageInfoFromHits() { + /** + * Load the paging info from the QueryResults object. + */ + synchronized private void loadPageInfoFromHits() { isLiteral = hits.getQuery().isLiteral(); //organize the hits by page, filter as needed TreeSet pagesSorted = new TreeSet<>(); @@ -265,6 +236,18 @@ class HighlightedText implements IndexedText { isPageInfoLoaded = true; } + /** + * Constructs a complete, escaped Solr query that is ready to be used. + * + * @param query keyword term to be searched for + * @param literal_query flag whether query is literal or regex + * + * @return Solr query string + */ + static private String constructEscapedSolrQuery(String query) { + return LuceneQuery.HIGHLIGHT_FIELD + ":" + "\"" + KeywordSearchUtil.escapeLuceneQuery(query) + "\""; + } + @Override public int getNumberPages() { //return number of pages that have hits @@ -369,7 +352,7 @@ class HighlightedText implements IndexedText { q.setShowDebugInfo(DEBUG); //debug String contentIdStr = Long.toString(this.objectId); - if (hasChunks) { + if (numberPages != 0) { final String chunkID = Integer.toString(this.currentPage); contentIdStr += "0".equals(chunkID) ? "" : "_" + chunkID; } @@ -432,7 +415,7 @@ class HighlightedText implements IndexedText { return "
" + highlightedContent + "
"; //NON-NLS } catch (Exception ex) { - logger.log(Level.WARNING, "Error getting highlighted text for " + objectId, ex); //NON-NLS + logger.log(Level.SEVERE, "Error getting highlighted text for " + objectId, ex); //NON-NLS return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg"); } }