From 2b59bf85648baef4751afb5fd7083b781a0c13c5 Mon Sep 17 00:00:00 2001 From: esaunders Date: Mon, 30 Oct 2017 16:35:21 -0400 Subject: [PATCH 1/8] Update comment to hopefully clear up why all keyword hits are processed in loadPageInfoFromHits() --- .../sleuthkit/autopsy/keywordsearch/HighlightedText.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java index b8bb93b240..57e8aa91c4 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/HighlightedText.java @@ -205,7 +205,13 @@ class HighlightedText implements IndexedText { */ synchronized private void loadPageInfoFromHits() { isLiteral = hits.getQuery().isLiteral(); - //organize the hits by page, filter as needed + + /** + * Organize the hits by page, filter as needed. + * We process *every* keyword here because in the case of a regular + * expression search there may be multiple different keyword + * hits located in different chunks for the same file/artifact. + */ for (Keyword k : hits.getKeywords()) { for (KeywordHit hit : hits.getResults(k)) { int chunkID = hit.getChunkId(); From 4a17cf3f75f7aa0d25fc23599d5750d886c48791 Mon Sep 17 00:00:00 2001 From: esaunders Date: Mon, 30 Oct 2017 16:39:55 -0400 Subject: [PATCH 2/8] Don't store document id string in the hit. --- .../src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java index e51c1c2620..196e0e7c39 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java @@ -38,7 +38,6 @@ class KeywordHit implements Comparable { private static final String GET_CONTENT_ID_FROM_ARTIFACT_ID = "SELECT obj_id FROM blackboard_artifacts WHERE artifact_id = "; - private final String solrDocumentId; private final long solrObjectId; private final int chunkId; private final String snippet; @@ -63,7 +62,6 @@ class KeywordHit implements Comparable { KeywordHit(String solrDocumentId, String snippet, String hit) throws TskCoreException { this.snippet = StringUtils.stripToEmpty(snippet); this.hit = hit; - this.solrDocumentId = solrDocumentId; /* * Parse the Solr document id to get the Solr object id and chunk id. @@ -112,7 +110,7 @@ class KeywordHit implements Comparable { } String getSolrDocumentId() { - return this.solrDocumentId; + return Long.toString(solrObjectId) + Server.CHUNK_ID_SEPARATOR + Long.toString(chunkId); } long getSolrObjectId() { From 988fc1b40bcda1264f2a7b714b6db41e6ef26f57 Mon Sep 17 00:00:00 2001 From: esaunders Date: Mon, 30 Oct 2017 17:00:35 -0400 Subject: [PATCH 3/8] Removed contentID and isArtifactHit from KeywordHit. --- .../autopsy/keywordsearch/KeywordHit.java | 66 ++++++++++--------- .../autopsy/keywordsearch/RegexQuery.java | 4 +- .../keywordsearch/TermsComponentQuery.java | 2 +- 3 files changed, 36 insertions(+), 36 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java index 196e0e7c39..f9284e81d9 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java @@ -41,8 +41,6 @@ class KeywordHit implements Comparable { private final long solrObjectId; private final int chunkId; private final String snippet; - private final long contentID; - private final boolean hitOnArtifact; private final String hit; /** @@ -55,11 +53,8 @@ class KeywordHit implements Comparable { * For some searches (ie substring, regex) this will be * different than the search term. * - * @throws TskCoreException If there is a problem getting the underlying - * content associated with a hit on the text of an - * artifact. */ - KeywordHit(String solrDocumentId, String snippet, String hit) throws TskCoreException { + KeywordHit(String solrDocumentId, String snippet, String hit) { this.snippet = StringUtils.stripToEmpty(snippet); this.hit = hit; @@ -81,28 +76,6 @@ class KeywordHit implements Comparable { this.solrObjectId = Long.parseLong(split[0]); this.chunkId = Integer.parseInt(split[1]); } - - //artifacts have negative obj ids - hitOnArtifact = this.solrObjectId < 0; - - if (hitOnArtifact) { - // If the hit was in an artifact, look up the source content for the artifact. - SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase(); - try (SleuthkitCase.CaseDbQuery executeQuery = - caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId); - ResultSet resultSet = executeQuery.getResultSet();) { - if (resultSet.next()) { - contentID = resultSet.getLong("obj_id"); - } else { - throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found."); - } - } catch (SQLException ex) { - throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex); - } - } else { - //else the object id is for content. - contentID = this.solrObjectId; - } } String getHit() { @@ -129,8 +102,36 @@ class KeywordHit implements Comparable { return this.snippet; } - long getContentID() { - return this.contentID; + /** + * Get the content id associated with the content underlying hit. + * For hits on files this will be the same as the object id associated + * with the file. For hits on artifacts we look up the id of the object + * that produced the artifact. + * + * @return The id of the underlying content associated with the hit. + * @throws TskCoreException If there is a problem getting the underlying + * content associated with a hit on the text of an + * artifact. + */ + long getContentID() throws TskCoreException { + if (isArtifactHit()) { + // If the hit was in an artifact, look up the source content for the artifact. + SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase(); + try (SleuthkitCase.CaseDbQuery executeQuery = + caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId); + ResultSet resultSet = executeQuery.getResultSet();) { + if (resultSet.next()) { + return resultSet.getLong("obj_id"); + } else { + throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found."); + } + } catch (SQLException ex) { + throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex); + } + } else { + //else the object id is for content. + return this.solrObjectId; + } } /** @@ -139,7 +140,8 @@ class KeywordHit implements Comparable { * @return */ boolean isArtifactHit() { - return hitOnArtifact; + // artifacts have negative obj ids + return this.solrObjectId < 0; } /** @@ -148,7 +150,7 @@ class KeywordHit implements Comparable { * @return The artifact whose indexed text this hit is in. */ Optional getArtifactID() { - if (hitOnArtifact) { + if (isArtifactHit()) { return Optional.of(solrObjectId); } else { return Optional.empty(); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index 392a5e8bc8..6f1a574451 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -316,8 +316,6 @@ final class RegexQuery implements KeywordSearchQuery { } } - } catch (TskCoreException ex) { - throw ex; } catch (Throwable error) { /* * NOTE: Matcher.find() is known to throw StackOverflowError in rare @@ -447,7 +445,7 @@ final class RegexQuery implements KeywordSearchQuery { if (hit.isArtifactHit()) { LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS } else { - LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID())); //NON-NLS + LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS } return null; } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermsComponentQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermsComponentQuery.java index 58087c4590..5953f6f35f 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermsComponentQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/TermsComponentQuery.java @@ -366,7 +366,7 @@ final class TermsComponentQuery implements KeywordSearchQuery { if (hit.isArtifactHit()) { LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS } else { - LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getContentID())); //NON-NLS + LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS } return null; } From 33a72835a4ae713e10864877db232e4316a9f52f Mon Sep 17 00:00:00 2001 From: esaunders Date: Mon, 30 Oct 2017 17:17:45 -0400 Subject: [PATCH 4/8] Reduce number of string instances created by re-using existing hit string for both the Keyword and KeywordHit instances. --- .../autopsy/keywordsearch/RegexQuery.java | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index 6f1a574451..acefc554a4 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -100,6 +100,16 @@ final class RegexQuery implements KeywordSearchQuery { private String escapedQuery; private String field = Server.Schema.CONTENT_STR.toString(); + /** + * The following map is an optimization to ensure that we are referencing + * the same keyword hit String object in both the KeywordHit instance and + * it's associated Keyword instance. Even though we benefit from G1GC + * String deduplication, the overhead associated with both Keyword and + * KeywordHit maintaining their own reference can be significant when the + * number of hits gets large. + */ + private final HashMap keywordsFoundAcrossAllDocuments; + /** * Constructor with query to process. * @@ -113,6 +123,7 @@ final class RegexQuery implements KeywordSearchQuery { this.queryStringContainsWildcardPrefix = this.keywordString.startsWith(".*"); this.queryStringContainsWildcardSuffix = this.keywordString.endsWith(".*"); + this.keywordsFoundAcrossAllDocuments = new HashMap<>(); } @Override @@ -273,6 +284,14 @@ final class RegexQuery implements KeywordSearchQuery { hit = hit.replaceAll("[^0-9]$", ""); } + // Optimization to reduce the number of String objects created. + if (keywordsFoundAcrossAllDocuments.containsKey(hit)) { + // Use an existing String reference if it exists. + hit = keywordsFoundAcrossAllDocuments.get(hit); + } else { + keywordsFoundAcrossAllDocuments.put(hit, hit); + } + if (artifactAttributeType == null) { hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); } else { @@ -303,7 +322,7 @@ final class RegexQuery implements KeywordSearchQuery { final String group = ccnMatcher.group("ccn"); if (CreditCardValidator.isValidCCN(group)) { hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); - }; + } } } From b4456ff85355a3f08933c25afea40fbbeb0a4940 Mon Sep 17 00:00:00 2001 From: esaunders Date: Mon, 30 Oct 2017 17:21:58 -0400 Subject: [PATCH 5/8] Provided a more accurate comment for the keywordsFoundAcrossAllDocuments member. --- .../sleuthkit/autopsy/keywordsearch/RegexQuery.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index acefc554a4..9c92cdea5d 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -101,12 +101,11 @@ final class RegexQuery implements KeywordSearchQuery { private String field = Server.Schema.CONTENT_STR.toString(); /** - * The following map is an optimization to ensure that we are referencing - * the same keyword hit String object in both the KeywordHit instance and - * it's associated Keyword instance. Even though we benefit from G1GC - * String deduplication, the overhead associated with both Keyword and - * KeywordHit maintaining their own reference can be significant when the - * number of hits gets large. + * The following map is an optimization to ensure that we reuse + * the same keyword hit String object across all hits. Even though we + * benefit from G1GC String deduplication, the overhead associated with + * creating a new String object for every KeywordHit can be significant + * when the number of hits gets large. */ private final HashMap keywordsFoundAcrossAllDocuments; From 761884534a36da52060c164e084b25f20451ff71 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\zhaohui" Date: Thu, 28 Sep 2017 11:58:33 -0400 Subject: [PATCH 6/8] 3095: Expanding Data Sources Node in nightly test --- .../autopsy/testing/AutopsyTestCases.java | 24 +++++++++++++++++++ .../autopsy/testing/RegressionTest.java | 5 ++++ 2 files changed, 29 insertions(+) diff --git a/Testing/src/org/sleuthkit/autopsy/testing/AutopsyTestCases.java b/Testing/src/org/sleuthkit/autopsy/testing/AutopsyTestCases.java index 672ca5b22b..c716081672 100755 --- a/Testing/src/org/sleuthkit/autopsy/testing/AutopsyTestCases.java +++ b/Testing/src/org/sleuthkit/autopsy/testing/AutopsyTestCases.java @@ -36,6 +36,7 @@ import java.util.logging.Logger; import javax.imageio.ImageIO; import javax.swing.JDialog; import javax.swing.text.JTextComponent; +import javax.swing.tree.TreePath; import org.netbeans.jellytools.MainWindowOperator; import org.netbeans.jellytools.NbDialogOperator; import org.netbeans.jellytools.WizardOperator; @@ -53,6 +54,8 @@ import org.netbeans.jemmy.operators.JTabbedPaneOperator; import org.netbeans.jemmy.operators.JTableOperator; import org.netbeans.jemmy.operators.JTextFieldOperator; import org.netbeans.jemmy.operators.JToggleButtonOperator; +import org.netbeans.jemmy.operators.JTreeOperator; +import org.netbeans.jemmy.operators.JTreeOperator.NoSuchPathException; import org.sleuthkit.autopsy.core.UserPreferences; import org.sleuthkit.autopsy.core.UserPreferencesException; import org.sleuthkit.autopsy.events.MessageServiceConnectionInfo; @@ -290,6 +293,16 @@ public class AutopsyTestCases { } + public void testExpandDataSourcesTree() { + logger.info("Data Sources Node"); + MainWindowOperator mwo = MainWindowOperator.getDefault(); + JTreeOperator jto = new JTreeOperator(mwo, "Data Sources"); + String [] nodeNames = {"Data Sources"}; + TreePath tp = jto.findPath(nodeNames); + expandNodes(jto, tp); + screenshot("Data Sources Tree"); + } + public void testGenerateReportToolbar() { logger.info("Generate Report Toolbars"); MainWindowOperator mwo = MainWindowOperator.getDefault(); @@ -380,4 +393,15 @@ public class AutopsyTestCases { logger.log(Level.SEVERE, "Error saving messaging service connection info", ex); //NON-NLS } } + + private void expandNodes (JTreeOperator jto, TreePath tp) { + try { + jto.expandPath(tp); + for (TreePath t : jto.getChildPaths(tp)) { + expandNodes(jto, t); + } + } catch (NoSuchPathException ne) { + logger.log(Level.SEVERE, "Error expanding tree path", ne); + } + } } diff --git a/Testing/test/qa-functional/src/org/sleuthkit/autopsy/testing/RegressionTest.java b/Testing/test/qa-functional/src/org/sleuthkit/autopsy/testing/RegressionTest.java index 674395e0f1..0518da865b 100755 --- a/Testing/test/qa-functional/src/org/sleuthkit/autopsy/testing/RegressionTest.java +++ b/Testing/test/qa-functional/src/org/sleuthkit/autopsy/testing/RegressionTest.java @@ -69,6 +69,7 @@ public class RegressionTest extends TestCase { "testConfigureSearch", "testAddSourceWizard1", "testIngest", + "testExpandDataSourcesTree", //After do ingest, before generate report, we expand Data Sources node "testGenerateReportToolbar", "testGenerateReportButton"); } @@ -83,6 +84,7 @@ public class RegressionTest extends TestCase { "testConfigureSearch", "testAddSourceWizard1", "testIngest", + "testExpandDataSourcesTree", "testGenerateReportToolbar", "testGenerateReportButton"); } @@ -147,6 +149,9 @@ public class RegressionTest extends TestCase { autopsyTests.testIngest(); } + public void testExpandDataSourcesTree() { + autopsyTests.testExpandDataSourcesTree(); + } public void testGenerateReportToolbar() { autopsyTests.testGenerateReportToolbar(); } From e1546bd51043b4931e265efebadfcc5936e7839d Mon Sep 17 00:00:00 2001 From: esaunders Date: Tue, 7 Nov 2017 16:12:06 -0500 Subject: [PATCH 7/8] Only create one KeywordHit instance per document for a given hit. --- .../sleuthkit/autopsy/keywordsearch/RegexQuery.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index 9c92cdea5d..a0383ef03b 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -236,6 +236,8 @@ final class RegexQuery implements KeywordSearchQuery { private List createKeywordHits(SolrDocument solrDoc) throws TskCoreException { + final HashMap keywordsFoundInThisDocument = new HashMap<>(); + List hits = new ArrayList<>(); final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString(); final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString()); @@ -283,9 +285,14 @@ final class RegexQuery implements KeywordSearchQuery { hit = hit.replaceAll("[^0-9]$", ""); } - // Optimization to reduce the number of String objects created. + // We will only create one KeywordHit instance per document for + // a given hit. + if (keywordsFoundInThisDocument.containsKey(hit)) { + continue; + } + keywordsFoundInThisDocument.put(hit, hit); + if (keywordsFoundAcrossAllDocuments.containsKey(hit)) { - // Use an existing String reference if it exists. hit = keywordsFoundAcrossAllDocuments.get(hit); } else { keywordsFoundAcrossAllDocuments.put(hit, hit); From 0a6b3bc62b55c2a05533988b8b5c55015605e29a Mon Sep 17 00:00:00 2001 From: esaunders Date: Tue, 7 Nov 2017 16:48:11 -0500 Subject: [PATCH 8/8] Use String interning instead of our own hashmap to reuse hits. --- .../autopsy/keywordsearch/RegexQuery.java | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java index a0383ef03b..7702cfdc7e 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/RegexQuery.java @@ -100,15 +100,6 @@ final class RegexQuery implements KeywordSearchQuery { private String escapedQuery; private String field = Server.Schema.CONTENT_STR.toString(); - /** - * The following map is an optimization to ensure that we reuse - * the same keyword hit String object across all hits. Even though we - * benefit from G1GC String deduplication, the overhead associated with - * creating a new String object for every KeywordHit can be significant - * when the number of hits gets large. - */ - private final HashMap keywordsFoundAcrossAllDocuments; - /** * Constructor with query to process. * @@ -122,7 +113,6 @@ final class RegexQuery implements KeywordSearchQuery { this.queryStringContainsWildcardPrefix = this.keywordString.startsWith(".*"); this.queryStringContainsWildcardSuffix = this.keywordString.endsWith(".*"); - this.keywordsFoundAcrossAllDocuments = new HashMap<>(); } @Override @@ -285,6 +275,16 @@ final class RegexQuery implements KeywordSearchQuery { hit = hit.replaceAll("[^0-9]$", ""); } + /** + * The use of String interning is an optimization to ensure + * that we reuse the same keyword hit String object across + * all hits. Even though we benefit from G1GC String + * deduplication, the overhead associated with creating a + * new String object for every KeywordHit can be significant + * when the number of hits gets large. + */ + hit = hit.intern(); + // We will only create one KeywordHit instance per document for // a given hit. if (keywordsFoundInThisDocument.containsKey(hit)) { @@ -292,12 +292,6 @@ final class RegexQuery implements KeywordSearchQuery { } keywordsFoundInThisDocument.put(hit, hit); - if (keywordsFoundAcrossAllDocuments.containsKey(hit)) { - hit = keywordsFoundAcrossAllDocuments.get(hit); - } else { - keywordsFoundAcrossAllDocuments.put(hit, hit); - } - if (artifactAttributeType == null) { hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); } else {