From bcc45285ee22a8f9a243b0651d5542b49266f3d8 Mon Sep 17 00:00:00 2001
From: jmillman <jmillman@basistech.com>
Date: Thu, 28 Jul 2016 17:07:09 -0400
Subject: [PATCH] cleanup AccountsText even more.

---
 .../autopsy/keywordsearch/AccountsText.java   | 224 +++++++++---------
 .../keywordsearch/ExtractedContentViewer.java |   2 +-
 .../autopsy/keywordsearch/Server.java         |  32 +--
 3 files changed, 127 insertions(+), 131 deletions(-)
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AccountsText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AccountsText.java
index 40b5e1430e..173b3f8186 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AccountsText.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AccountsText.java
@@ -19,7 +19,6 @@
 package org.sleuthkit.autopsy.keywordsearch;
 
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
@@ -31,6 +30,7 @@ import java.util.logging.Level;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
+import org.apache.commons.lang.StringUtils;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrRequest.METHOD;
 import org.apache.solr.client.solrj.response.QueryResponse;
@@ -38,39 +38,48 @@ import org.apache.solr.common.SolrDocument;
 import org.openide.util.NbBundle;
 import org.sleuthkit.autopsy.coreutils.Logger;
 import org.sleuthkit.autopsy.coreutils.Version;
-import org.sleuthkit.autopsy.datamodel.TextMarkupLookup;
 
 /**
  * Highlights account hits for a given document. Knows about pages and such for
  * the content viewer.
+ *
+ * Note: This class started as a copy-and-paste of HighlightedText, but it
+ * proved too messy to modify HighlightedText to work for accounts also. This
+ * and HighlightedText are very similar and could probably use some refactoring
+ * to reduce code duplication.
  */
-class AccountsText implements IndexedText, TextMarkupLookup {
+class AccountsText implements IndexedText {
 
     private static final Logger LOGGER = Logger.getLogger(AccountsText.class.getName());
+    private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
+
     private static final String HIGHLIGHT_PRE = "<span style='background:yellow'>"; //NON-NLS
     private static final String HIGHLIGHT_POST = "</span>"; //NON-NLS
-    private static final String ANCHOR_PREFIX = AccountsText.class.getName() + "_";
+    private static final String ANCHOR_NAME_PREFIX = AccountsText.class.getName() + "_";
+
+    private static final String INSERT_PREFIX = "<a name='" + ANCHOR_NAME_PREFIX; //NON-NLS
+    private static final String INSERT_POSTFIX = "'></a>$0"; //$0 will insert current regex match  //NON-NLS
+    private static final Pattern ANCHOR_DETECTION_PATTERN = Pattern.compile(HIGHLIGHT_PRE);
+
+    private static final String HIGHLIGHT_FIELD = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
 
-    private final String solrDocumentId;
-    private final Set<String> keywords = new HashSet<>();
     private final Server solrServer;
-    private int numberPagesForFile = 0;
-    private int currentPage = 0;
-    private boolean hasChunks = false;
-    //stores all pages/chunks that have hits as key, and number of hits as a value, or 0 if yet unknown
-    private final LinkedHashMap<Integer, Integer> numberOfHitsPerPage = new LinkedHashMap<>();
-    //stored page num -> current hit number mapping
-    private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
-    private final List<Integer> pages = new ArrayList<>();
-    private boolean isPageInfoLoaded = false;
-    private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
-    private final String displayName;
+    private final String solrDocumentId;
     private final long solrObjectId;
     private final Integer chunkId;
+    private final Set<String> keywords = new HashSet<>();
+    private final String displayName;
+    private final String queryString;
 
-    String getDisplayName() {
-        return displayName;
-    }
+    private boolean isPageInfoLoaded = false;
+    private int numberPagesForFile = 0;
+    private int currentPage = 0;
+    //list of pages, used for iterating back and forth.  Only stores pages with hits
+    private final List<Integer> pages = new ArrayList<>();
+    //map from page/chunk to number of hits. value is 0 if not yet known.
+    private final LinkedHashMap<Integer, Integer> numberOfHitsPerPage = new LinkedHashMap<>();
+    //map from page/chunk number to current hit on that page.
+    private final HashMap<Integer, Integer> currentHitPerPage = new HashMap<>();
 
     @NbBundle.Messages({
         "AccountsText.creditCardNumber=Credit Card Number",
@@ -78,6 +87,13 @@ class AccountsText implements IndexedText, TextMarkupLookup {
     AccountsText(String objectId, Set<String> keywords) {
         this.solrDocumentId = objectId;
         this.keywords.addAll(keywords);
+
+        //build the query string
+        this.queryString = HIGHLIGHT_FIELD + ":"
+                + keywords.stream()
+                .map(keyword -> "/.*?" + KeywordSearchUtil.escapeLuceneQuery(keyword) + ".*?/")//surround each "keyword" with match anything regex.
+                .collect(Collectors.joining(" ")); //collect as space separated string
+
         this.solrServer = KeywordSearch.getServer();
 
         final int separatorIndex = solrDocumentId.indexOf(Server.ID_CHUNK_SEP);
@@ -123,22 +139,24 @@ class AccountsText implements IndexedText, TextMarkupLookup {
     }
 
     @Override
+    @NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.")
     public int nextPage() {
         if (hasNextPage()) {
             currentPage = pages.get(pages.indexOf(this.currentPage) + 1);
             return currentPage;
         } else {
-            throw new IllegalStateException(NbBundle.getMessage(AccountsText.class, "HighlightedMatchesSource.nextPage.exception.msg"));
+            throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
         }
     }
 
     @Override
+    @NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.")
     public int previousPage() {
         if (hasPreviousPage()) {
             currentPage = pages.get(pages.indexOf(this.currentPage) - 1);
             return currentPage;
         } else {
-            throw new IllegalStateException(NbBundle.getMessage(AccountsText.class, "HighlightedMatchesSource.previousPage.exception.msg"));
+            throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
         }
     }
 
@@ -161,20 +179,22 @@ class AccountsText implements IndexedText, TextMarkupLookup {
     }
 
     @Override
+    @NbBundle.Messages("AccountsText.nextItem.exception.msg=No next item.")
     public int nextItem() {
         if (hasNextItem()) {
             return currentHitPerPage.merge(currentPage, 1, Integer::sum);
         } else {
-            throw new IllegalStateException(NbBundle.getMessage(AccountsText.class, "HighlightedMatchesSource.nextItem.exception.msg"));
+            throw new IllegalStateException(Bundle.AccountsText_nextItem_exception_msg());
         }
     }
 
     @Override
+    @NbBundle.Messages("AccountsText.previousItem.exception.msg=No previous item.")
     public int previousItem() {
         if (hasPreviousItem()) {
             return currentHitPerPage.merge(currentPage, -1, Integer::sum);
         } else {
-            throw new IllegalStateException(NbBundle.getMessage(AccountsText.class, "HighlightedMatchesSource.previousItem.exception.msg"));
+            throw new IllegalStateException(Bundle.AccountsText_previousItem_exception_msg());
         }
     }
 
@@ -193,54 +213,43 @@ class AccountsText implements IndexedText, TextMarkupLookup {
     }
 
     /**
-     * The main goal of this method is to figure out which pages / chunks have
-     * hits.
+     * Initialize this object with information about which pages/chunks have
+     * hits. Multiple calls will not change the initial results.
      */
     synchronized private void loadPageInfo() {
         if (isPageInfoLoaded) {
             return;
         }
-        if (chunkId != null) {
-            //if a chunk is specified, only show that chunk/page
-            this.numberPagesForFile = 1;
-            hasChunks = false;
-            //no chunks
+        if (chunkId != null) {//if a chunk is specified, only show that chunk/page
             this.numberPagesForFile = 1;
             this.currentPage = chunkId;
-            numberOfHitsPerPage.put(chunkId, 0);
-            pages.add(chunkId);
-            currentHitPerPage.put(chunkId, 0);
+            this.numberOfHitsPerPage.put(chunkId, 0);
+            this.pages.add(chunkId);
+            this.currentHitPerPage.put(chunkId, 0);
         } else {
-            hasChunks = true;
             try {
                 this.numberPagesForFile = solrServer.queryNumFileChunks(this.solrObjectId);
             } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
-                LOGGER.log(Level.WARNING, "Could not get number pages for content: {0}", this.solrDocumentId); //NON-NLS
+                LOGGER.log(Level.WARNING, "Could not get number pages for content " + this.solrDocumentId, ex); //NON-NLS
                 return;
             }
 
             //if has chunks, get pages with hits
             TreeSet<Integer> sortedPagesWithHits = new TreeSet<>();
-            //extract pages of interest, sorted
-
             SolrQuery q = new SolrQuery();
             q.setShowDebugInfo(DEBUG); //debug
-            String query = keywords.stream().map(keyword -> "/.*" + KeywordSearchUtil.escapeLuceneQuery(keyword) + ".*/").collect(Collectors.joining(" "));
-            q.setQuery(LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":" + query);
-            q.setFields("id");
-            if (chunkId == null) {
-                q.addFilterQuery(Server.Schema.ID.toString() + ":" + this.solrObjectId + "_*");
-            } else {
-                q.addFilterQuery(Server.Schema.ID.toString() + ":" + this.solrDocumentId);
-            }
+            q.setQuery(queryString);
+            q.setFields(Server.Schema.ID.toString());  //for this case we only need the document ids
+            q.addFilterQuery(Server.Schema.ID.toString() + ":" + this.solrObjectId + Server.ID_CHUNK_SEP + "*");
+
             try {
                 QueryResponse response = solrServer.query(q, METHOD.POST);
                 for (SolrDocument resultDoc : response.getResults()) {
                     final String resultDocumentId = resultDoc.getFieldValue(Server.Schema.ID.toString()).toString();
                     // Put the solr chunk id in the map
-                    final int separatorIndex = resultDocumentId.indexOf(Server.ID_CHUNK_SEP);
-                    if (-1 != separatorIndex) {
-                        sortedPagesWithHits.add(Integer.parseInt(resultDocumentId.substring(separatorIndex + 1)));
+                    String resultChunkID = StringUtils.substringAfter(resultDocumentId, Server.ID_CHUNK_SEP);
+                    if (StringUtils.isNotBlank(resultChunkID)) {
+                        sortedPagesWithHits.add(Integer.parseInt(resultChunkID));
                     } else {
                         sortedPagesWithHits.add(0);
                     }
@@ -268,63 +277,79 @@ class AccountsText implements IndexedText, TextMarkupLookup {
     }
 
     @Override
+    @NbBundle.Messages({"AccountsText.getMarkup.noMatchMsg="
+        + "<html><pre><span style\\\\='background\\\\:yellow'>There were no keyword hits on this page. <br />"
+        + "The keyword could have been in the file name."
+        + " <br />Advance to another page if present, or to view the original text, choose File Text"
+        + " <br />in the drop down menu to the right...</span></pre></html>",
+        "AccountsText.getMarkup.queryFailedMsg="
+        + "<html><pre><span style\\\\='background\\\\:yellow'>Failed to retrieve keyword hit results."
+        + " <br />Confirm that Autopsy can connect to the Solr server. "
+        + "<br /></span></pre></html>"})
     public String getText() {
         loadPageInfo(); //inits once
 
-        String highLightField = LuceneQuery.HIGHLIGHT_FIELD_REGEX;
-
         SolrQuery q = new SolrQuery();
         q.setShowDebugInfo(DEBUG); //debug
-        String query = keywords.stream().map(keyword -> "/.*" + KeywordSearchUtil.escapeLuceneQuery(keyword) + ".*/").collect(Collectors.joining(" "));
-        q.setQuery(LuceneQuery.HIGHLIGHT_FIELD_REGEX + ":" + query);
+        q.addHighlightField(HIGHLIGHT_FIELD);
+        q.setQuery(queryString);
 
-        String contentIdStr;
-        if (hasChunks) {
-            contentIdStr = solrObjectId + "_" + Integer.toString(this.currentPage);
-        } else {
-            contentIdStr = this.solrDocumentId;
-        }
+        //set the documentID filter
+        String queryDocumentID = this.solrObjectId + Server.ID_CHUNK_SEP + this.currentPage;
+        q.addFilterQuery(Server.Schema.ID.toString() + ":" + queryDocumentID);
 
-        final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
-        q.addFilterQuery(filterQuery);
-        q.addHighlightField(highLightField); //for exact highlighting, try content_ws field (with stored="true" in Solr schema)
-
-        //tune the highlighter
+        //configure the highlighter
         q.setParam("hl.useFastVectorHighlighter", "true"); //fast highlighter scales better than standard one NON-NLS
         q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
         q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
         q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
-
-        //docs says makes sense for the original Highlighter only, but not really
-        q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
+        q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //docs says makes sense for the original Highlighter only, but not really //NON-NLS
 
         try {
-            QueryResponse response = solrServer.query(q, METHOD.POST);
-            Map<String, Map<String, List<String>>> responseHighlight = response.getHighlighting();
-
-            Map<String, List<String>> responseHighlightID = responseHighlight.get(contentIdStr);
-            if (responseHighlightID == null) {
-                return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
+            //extract highlighting and bail early on null responses
+            Map<String, Map<String, List<String>>> highlightingPerDocument = solrServer.query(q, METHOD.POST).getHighlighting();
+            Map<String, List<String>> highlightingPerField = highlightingPerDocument.get(queryDocumentID);
+            if (highlightingPerField == null) {
+                return Bundle.AccountsText_getMarkup_noMatchMsg();
             }
-            List<String> contentHighlights = responseHighlightID.get(highLightField);
-            if (contentHighlights == null) {
-                return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");
-            } else {
-                // extracted content (minus highlight tags) is HTML-escaped
-                String highlightedContent = contentHighlights.get(0).trim();
-                highlightedContent = insertAnchors(highlightedContent);
-
-                return "<html><pre>" + highlightedContent + "</pre></html>"; //NON-NLS
+            List<String> highlights = highlightingPerField.get(HIGHLIGHT_FIELD);
+            if (highlights == null) {
+                return Bundle.AccountsText_getMarkup_noMatchMsg();
             }
+
+            //There should only be one item
+            String highlighting = highlights.get(0).trim();
+
+            /*
+             * use regex matcher to iterate over occurences of HIGHLIGHT_PRE,
+             * and prepend them with an anchor tag.
+             */
+            Matcher m = ANCHOR_DETECTION_PATTERN.matcher(highlighting);
+            StringBuffer sb = new StringBuffer(highlighting.length());
+            int count = 0;
+            while (m.find()) {
+                count++;
+                m.appendReplacement(sb, INSERT_PREFIX + count + INSERT_POSTFIX);
+            }
+            m.appendTail(sb);
+
+            //store total hits for this page, now that we know it
+            this.numberOfHitsPerPage.put(this.currentPage, count);
+            if (this.currentItem() == 0 && this.hasNextItem()) {
+                this.nextItem();
+            }
+
+            // extracted content (minus highlight tags) is HTML-escaped
+            return "<html><pre>" + sb.toString() + "</pre></html>"; //NON-NLS
         } catch (Exception ex) {
             LOGGER.log(Level.WARNING, "Error executing Solr highlighting query: " + keywords, ex); //NON-NLS
-            return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.queryFailedMsg");
+            return Bundle.AccountsText_getMarkup_queryFailedMsg();
         }
     }
 
     @Override
     public String toString() {
-        return getDisplayName();
+        return displayName;
     }
 
     @Override
@@ -334,7 +359,7 @@ class AccountsText implements IndexedText, TextMarkupLookup {
 
     @Override
     public String getAnchorPrefix() {
-        return ANCHOR_PREFIX;
+        return ANCHOR_NAME_PREFIX;
     }
 
     @Override
@@ -344,33 +369,4 @@ class AccountsText implements IndexedText, TextMarkupLookup {
         }
         return this.numberOfHitsPerPage.get(this.currentPage);
     }
-
-    private String insertAnchors(String searchableContent) {
-
-        final String insertPre = "<a name='" + ANCHOR_PREFIX; //NON-NLS
-        final String insertPost = "'></a>$0"; //$0 will insert current regex match  //NON-NLS
-
-        Matcher m = Pattern.compile(HIGHLIGHT_PRE).matcher(searchableContent);
-        StringBuffer sb = new StringBuffer(searchableContent.length());
-        int count;
-        for (count = 0; m.find(); count++) {
-            m.appendReplacement(sb, insertPre + count + insertPost);
-        }
-        m.appendTail(sb);
-
-        //store total hits for this page, now that we know it
-        this.numberOfHitsPerPage.put(this.currentPage, count);
-        if (this.currentItem() == 0 && this.hasNextItem()) {
-            this.nextItem();
-        }
-
-        return sb.toString();
-    }
-
-    @Override
-    @Deprecated
-    // factory method to create an instance of this object
-    public AccountsText createInstance(long objectId, String keywordHitQuery, boolean isRegex, String originalQuery) {
-        return new AccountsText(String.valueOf(objectId), Collections.emptySet());
-    }
 }
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
index 2ffccf5ce3..2dfaa0b4ec 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
@@ -184,7 +184,7 @@ public class ExtractedContentViewer implements DataContentViewer {
          */
         BlackboardArtifact artifact = nodeLookup.lookup(BlackboardArtifact.class);
         if (null != artifact) {
-            /**
+            /*
              * For keyword hit artifacts, add the text of the artifact that hit,
              * not the hit artifact; otherwise add the text for the artifact.
              */
diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
index cd9d1572c9..33a5a1d443 100644
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
@@ -40,32 +40,32 @@ import java.util.Collection;
 import java.util.List;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.logging.Level;
-import org.openide.util.NbBundle;
-import org.sleuthkit.autopsy.coreutils.Logger;
 import javax.swing.AbstractAction;
 import org.apache.solr.client.solrj.SolrQuery;
+import org.apache.solr.client.solrj.SolrRequest;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.HttpSolrServer;
+import org.apache.solr.client.solrj.impl.XMLResponseParser;
 import org.apache.solr.client.solrj.request.CoreAdminRequest;
+import org.apache.solr.client.solrj.response.CoreAdminResponse;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.response.TermsResponse;
-import org.apache.solr.client.solrj.SolrRequest;
-import org.apache.solr.client.solrj.impl.HttpSolrServer;
-import org.apache.solr.common.util.NamedList;
-import org.openide.modules.InstalledFileLocator;
-import org.openide.modules.Places;
-import org.sleuthkit.autopsy.casemodule.Case;
-import org.sleuthkit.autopsy.coreutils.ModuleSettings;
-import org.sleuthkit.autopsy.coreutils.PlatformUtil;
-import org.sleuthkit.datamodel.Content;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.client.solrj.impl.XMLResponseParser;
-import org.apache.solr.client.solrj.response.CoreAdminResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.NamedList;
+import org.openide.modules.InstalledFileLocator;
+import org.openide.modules.Places;
+import org.openide.util.NbBundle;
+import org.sleuthkit.autopsy.casemodule.Case;
 import org.sleuthkit.autopsy.casemodule.Case.CaseType;
-import org.sleuthkit.autopsy.coreutils.UNCPathUtilities;
 import org.sleuthkit.autopsy.core.UserPreferences;
+import org.sleuthkit.autopsy.coreutils.Logger;
+import org.sleuthkit.autopsy.coreutils.ModuleSettings;
+import org.sleuthkit.autopsy.coreutils.PlatformUtil;
+import org.sleuthkit.autopsy.coreutils.UNCPathUtilities;
+import org.sleuthkit.datamodel.Content;
 
 /**
  * Handles management of a either a local or centralized Solr server and its
@@ -157,7 +157,7 @@ public class Server {
     private static final Logger logger = Logger.getLogger(Server.class.getName());
     private static final String DEFAULT_CORE_NAME = "coreCase"; //NON-NLS
     public static final String CORE_EVT = "CORE_EVT"; //NON-NLS
-    public static final char ID_CHUNK_SEP = '_';
+    public static final String ID_CHUNK_SEP = "_";
     private String javaPath = "java"; //NON-NLS
     public static final Charset DEFAULT_INDEXED_TEXT_CHARSET = Charset.forName("UTF-8"); ///< default Charset to index text as
     private static final int MAX_SOLR_MEM_MB = 512; //TODO set dynamically based on avail. system resources