Merge pull request #3187 from esaunders/release-4.5.1

Reduce memory consumed by keyword search hits
This commit is contained in:
Richard Cordovano 2017-11-13 13:26:28 -05:00 committed by GitHub
commit bb0f25ae02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 93 additions and 41 deletions

View File

@ -205,7 +205,13 @@ class HighlightedText implements IndexedText {
*/ */
synchronized private void loadPageInfoFromHits() { synchronized private void loadPageInfoFromHits() {
isLiteral = hits.getQuery().isLiteral(); isLiteral = hits.getQuery().isLiteral();
//organize the hits by page, filter as needed
/**
* Organize the hits by page, filter as needed.
* We process *every* keyword here because in the case of a regular
* expression search there may be multiple different keyword
* hits located in different chunks for the same file/artifact.
*/
for (Keyword k : hits.getKeywords()) { for (Keyword k : hits.getKeywords()) {
for (KeywordHit hit : hits.getResults(k)) { for (KeywordHit hit : hits.getResults(k)) {
int chunkID = hit.getChunkId(); int chunkID = hit.getChunkId();

View File

@ -38,12 +38,9 @@ class KeywordHit implements Comparable<KeywordHit> {
private static final String GET_CONTENT_ID_FROM_ARTIFACT_ID = "SELECT obj_id FROM blackboard_artifacts WHERE artifact_id = "; private static final String GET_CONTENT_ID_FROM_ARTIFACT_ID = "SELECT obj_id FROM blackboard_artifacts WHERE artifact_id = ";
private final String solrDocumentId;
private final long solrObjectId; private final long solrObjectId;
private final int chunkId; private final int chunkId;
private final String snippet; private final String snippet;
private final long contentID;
private final boolean hitOnArtifact;
private final String hit; private final String hit;
/** /**
@ -56,14 +53,10 @@ class KeywordHit implements Comparable<KeywordHit> {
* For some searches (ie substring, regex) this will be * For some searches (ie substring, regex) this will be
* different than the search term. * different than the search term.
* *
* @throws TskCoreException If there is a problem getting the underlying
* content associated with a hit on the text of an
* artifact.
*/ */
KeywordHit(String solrDocumentId, String snippet, String hit) throws TskCoreException { KeywordHit(String solrDocumentId, String snippet, String hit) {
this.snippet = StringUtils.stripToEmpty(snippet); this.snippet = StringUtils.stripToEmpty(snippet);
this.hit = hit; this.hit = hit;
this.solrDocumentId = solrDocumentId;
/* /*
* Parse the Solr document id to get the Solr object id and chunk id. * Parse the Solr document id to get the Solr object id and chunk id.
@ -83,28 +76,6 @@ class KeywordHit implements Comparable<KeywordHit> {
this.solrObjectId = Long.parseLong(split[0]); this.solrObjectId = Long.parseLong(split[0]);
this.chunkId = Integer.parseInt(split[1]); this.chunkId = Integer.parseInt(split[1]);
} }
//artifacts have negative obj ids
hitOnArtifact = this.solrObjectId < 0;
if (hitOnArtifact) {
// If the hit was in an artifact, look up the source content for the artifact.
SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase();
try (SleuthkitCase.CaseDbQuery executeQuery =
caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId);
ResultSet resultSet = executeQuery.getResultSet();) {
if (resultSet.next()) {
contentID = resultSet.getLong("obj_id");
} else {
throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found.");
}
} catch (SQLException ex) {
throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex);
}
} else {
//else the object id is for content.
contentID = this.solrObjectId;
}
} }
String getHit() { String getHit() {
@ -112,7 +83,7 @@ class KeywordHit implements Comparable<KeywordHit> {
} }
String getSolrDocumentId() { String getSolrDocumentId() {
return this.solrDocumentId; return Long.toString(solrObjectId) + Server.CHUNK_ID_SEPARATOR + Long.toString(chunkId);
} }
long getSolrObjectId() { long getSolrObjectId() {
@ -131,8 +102,36 @@ class KeywordHit implements Comparable<KeywordHit> {
return this.snippet; return this.snippet;
} }
long getContentID() { /**
return this.contentID; * Get the content id associated with the content underlying hit.
* For hits on files this will be the same as the object id associated
* with the file. For hits on artifacts we look up the id of the object
* that produced the artifact.
*
* @return The id of the underlying content associated with the hit.
* @throws TskCoreException If there is a problem getting the underlying
* content associated with a hit on the text of an
* artifact.
*/
long getContentID() throws TskCoreException {
if (isArtifactHit()) {
// If the hit was in an artifact, look up the source content for the artifact.
SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase();
try (SleuthkitCase.CaseDbQuery executeQuery =
caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId);
ResultSet resultSet = executeQuery.getResultSet();) {
if (resultSet.next()) {
return resultSet.getLong("obj_id");
} else {
throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found.");
}
} catch (SQLException ex) {
throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex);
}
} else {
//else the object id is for content.
return this.solrObjectId;
}
} }
/** /**
@ -141,7 +140,8 @@ class KeywordHit implements Comparable<KeywordHit> {
* @return * @return
*/ */
boolean isArtifactHit() { boolean isArtifactHit() {
return hitOnArtifact; // artifacts have negative obj ids
return this.solrObjectId < 0;
} }
/** /**
@ -150,7 +150,7 @@ class KeywordHit implements Comparable<KeywordHit> {
* @return The artifact whose indexed text this hit is in. * @return The artifact whose indexed text this hit is in.
*/ */
Optional<Long> getArtifactID() { Optional<Long> getArtifactID() {
if (hitOnArtifact) { if (isArtifactHit()) {
return Optional.of(solrObjectId); return Optional.of(solrObjectId);
} else { } else {
return Optional.empty(); return Optional.empty();

View File

@ -226,6 +226,8 @@ final class RegexQuery implements KeywordSearchQuery {
private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreException { private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreException {
final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>();
List<KeywordHit> hits = new ArrayList<>(); List<KeywordHit> hits = new ArrayList<>();
final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString(); final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString()); final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
@ -273,6 +275,23 @@ final class RegexQuery implements KeywordSearchQuery {
hit = hit.replaceAll("[^0-9]$", ""); hit = hit.replaceAll("[^0-9]$", "");
} }
/**
* The use of String interning is an optimization to ensure
* that we reuse the same keyword hit String object across
* all hits. Even though we benefit from G1GC String
* deduplication, the overhead associated with creating a
* new String object for every KeywordHit can be significant
* when the number of hits gets large.
*/
hit = hit.intern();
// We will only create one KeywordHit instance per document for
// a given hit.
if (keywordsFoundInThisDocument.containsKey(hit)) {
continue;
}
keywordsFoundInThisDocument.put(hit, hit);
if (artifactAttributeType == null) { if (artifactAttributeType == null) {
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
} else { } else {
@ -303,7 +322,7 @@ final class RegexQuery implements KeywordSearchQuery {
final String group = ccnMatcher.group("ccn"); final String group = ccnMatcher.group("ccn");
if (CreditCardValidator.isValidCCN(group)) { if (CreditCardValidator.isValidCCN(group)) {
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit)); hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
}; }
} }
} }
@ -316,8 +335,6 @@ final class RegexQuery implements KeywordSearchQuery {
} }
} }
} catch (TskCoreException ex) {
throw ex;
} catch (Throwable error) { } catch (Throwable error) {
/* /*
* NOTE: Matcher.find() is known to throw StackOverflowError in rare * NOTE: Matcher.find() is known to throw StackOverflowError in rare
@ -447,7 +464,7 @@ final class RegexQuery implements KeywordSearchQuery {
if (hit.isArtifactHit()) { if (hit.isArtifactHit()) {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
} else { } else {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID())); //NON-NLS LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS
} }
return null; return null;
} }

View File

@ -366,7 +366,7 @@ final class TermsComponentQuery implements KeywordSearchQuery {
if (hit.isArtifactHit()) { if (hit.isArtifactHit()) {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
} else { } else {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getContentID())); //NON-NLS LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS
} }
return null; return null;
} }

View File

@ -36,6 +36,7 @@ import java.util.logging.Logger;
import javax.imageio.ImageIO; import javax.imageio.ImageIO;
import javax.swing.JDialog; import javax.swing.JDialog;
import javax.swing.text.JTextComponent; import javax.swing.text.JTextComponent;
import javax.swing.tree.TreePath;
import org.netbeans.jellytools.MainWindowOperator; import org.netbeans.jellytools.MainWindowOperator;
import org.netbeans.jellytools.NbDialogOperator; import org.netbeans.jellytools.NbDialogOperator;
import org.netbeans.jellytools.WizardOperator; import org.netbeans.jellytools.WizardOperator;
@ -53,6 +54,8 @@ import org.netbeans.jemmy.operators.JTabbedPaneOperator;
import org.netbeans.jemmy.operators.JTableOperator; import org.netbeans.jemmy.operators.JTableOperator;
import org.netbeans.jemmy.operators.JTextFieldOperator; import org.netbeans.jemmy.operators.JTextFieldOperator;
import org.netbeans.jemmy.operators.JToggleButtonOperator; import org.netbeans.jemmy.operators.JToggleButtonOperator;
import org.netbeans.jemmy.operators.JTreeOperator;
import org.netbeans.jemmy.operators.JTreeOperator.NoSuchPathException;
import org.sleuthkit.autopsy.core.UserPreferences; import org.sleuthkit.autopsy.core.UserPreferences;
import org.sleuthkit.autopsy.core.UserPreferencesException; import org.sleuthkit.autopsy.core.UserPreferencesException;
import org.sleuthkit.autopsy.events.MessageServiceConnectionInfo; import org.sleuthkit.autopsy.events.MessageServiceConnectionInfo;
@ -290,6 +293,16 @@ public class AutopsyTestCases {
} }
public void testExpandDataSourcesTree() {
logger.info("Data Sources Node");
MainWindowOperator mwo = MainWindowOperator.getDefault();
JTreeOperator jto = new JTreeOperator(mwo, "Data Sources");
String [] nodeNames = {"Data Sources"};
TreePath tp = jto.findPath(nodeNames);
expandNodes(jto, tp);
screenshot("Data Sources Tree");
}
public void testGenerateReportToolbar() { public void testGenerateReportToolbar() {
logger.info("Generate Report Toolbars"); logger.info("Generate Report Toolbars");
MainWindowOperator mwo = MainWindowOperator.getDefault(); MainWindowOperator mwo = MainWindowOperator.getDefault();
@ -380,4 +393,15 @@ public class AutopsyTestCases {
logger.log(Level.SEVERE, "Error saving messaging service connection info", ex); //NON-NLS logger.log(Level.SEVERE, "Error saving messaging service connection info", ex); //NON-NLS
} }
} }
private void expandNodes (JTreeOperator jto, TreePath tp) {
try {
jto.expandPath(tp);
for (TreePath t : jto.getChildPaths(tp)) {
expandNodes(jto, t);
}
} catch (NoSuchPathException ne) {
logger.log(Level.SEVERE, "Error expanding tree path", ne);
}
}
} }

View File

@ -69,6 +69,7 @@ public class RegressionTest extends TestCase {
"testConfigureSearch", "testConfigureSearch",
"testAddSourceWizard1", "testAddSourceWizard1",
"testIngest", "testIngest",
"testExpandDataSourcesTree", //After do ingest, before generate report, we expand Data Sources node
"testGenerateReportToolbar", "testGenerateReportToolbar",
"testGenerateReportButton"); "testGenerateReportButton");
} }
@ -83,6 +84,7 @@ public class RegressionTest extends TestCase {
"testConfigureSearch", "testConfigureSearch",
"testAddSourceWizard1", "testAddSourceWizard1",
"testIngest", "testIngest",
"testExpandDataSourcesTree",
"testGenerateReportToolbar", "testGenerateReportToolbar",
"testGenerateReportButton"); "testGenerateReportButton");
} }
@ -147,6 +149,9 @@ public class RegressionTest extends TestCase {
autopsyTests.testIngest(); autopsyTests.testIngest();
} }
public void testExpandDataSourcesTree() {
autopsyTests.testExpandDataSourcesTree();
}
public void testGenerateReportToolbar() { public void testGenerateReportToolbar() {
autopsyTests.testGenerateReportToolbar(); autopsyTests.testGenerateReportToolbar();
} }