Merge pull request #3187 from esaunders/release-4.5.1

Reduce memory consumed by keyword search hits
This commit is contained in:
Richard Cordovano 2017-11-13 13:26:28 -05:00 committed by GitHub
commit bb0f25ae02
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 93 additions and 41 deletions

View File

@ -205,7 +205,13 @@ class HighlightedText implements IndexedText {
*/
synchronized private void loadPageInfoFromHits() {
isLiteral = hits.getQuery().isLiteral();
//organize the hits by page, filter as needed
/**
* Organize the hits by page, filter as needed.
* We process *every* keyword here because in the case of a regular
* expression search there may be multiple different keyword
* hits located in different chunks for the same file/artifact.
*/
for (Keyword k : hits.getKeywords()) {
for (KeywordHit hit : hits.getResults(k)) {
int chunkID = hit.getChunkId();

View File

@ -38,12 +38,9 @@ class KeywordHit implements Comparable<KeywordHit> {
private static final String GET_CONTENT_ID_FROM_ARTIFACT_ID = "SELECT obj_id FROM blackboard_artifacts WHERE artifact_id = ";
private final String solrDocumentId;
private final long solrObjectId;
private final int chunkId;
private final String snippet;
private final long contentID;
private final boolean hitOnArtifact;
private final String hit;
/**
@ -56,14 +53,10 @@ class KeywordHit implements Comparable<KeywordHit> {
* For some searches (ie substring, regex) this will be
* different than the search term.
*
* @throws TskCoreException If there is a problem getting the underlying
* content associated with a hit on the text of an
* artifact.
*/
KeywordHit(String solrDocumentId, String snippet, String hit) throws TskCoreException {
KeywordHit(String solrDocumentId, String snippet, String hit) {
this.snippet = StringUtils.stripToEmpty(snippet);
this.hit = hit;
this.solrDocumentId = solrDocumentId;
/*
* Parse the Solr document id to get the Solr object id and chunk id.
@ -83,28 +76,6 @@ class KeywordHit implements Comparable<KeywordHit> {
this.solrObjectId = Long.parseLong(split[0]);
this.chunkId = Integer.parseInt(split[1]);
}
//artifacts have negative obj ids
hitOnArtifact = this.solrObjectId < 0;
if (hitOnArtifact) {
// If the hit was in an artifact, look up the source content for the artifact.
SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase();
try (SleuthkitCase.CaseDbQuery executeQuery =
caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId);
ResultSet resultSet = executeQuery.getResultSet();) {
if (resultSet.next()) {
contentID = resultSet.getLong("obj_id");
} else {
throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found.");
}
} catch (SQLException ex) {
throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex);
}
} else {
//else the object id is for content.
contentID = this.solrObjectId;
}
}
String getHit() {
@ -112,7 +83,7 @@ class KeywordHit implements Comparable<KeywordHit> {
}
String getSolrDocumentId() {
return this.solrDocumentId;
return Long.toString(solrObjectId) + Server.CHUNK_ID_SEPARATOR + Long.toString(chunkId);
}
long getSolrObjectId() {
@ -131,8 +102,36 @@ class KeywordHit implements Comparable<KeywordHit> {
return this.snippet;
}
long getContentID() {
return this.contentID;
/**
* Get the content id associated with the content underlying hit.
* For hits on files this will be the same as the object id associated
* with the file. For hits on artifacts we look up the id of the object
* that produced the artifact.
*
* @return The id of the underlying content associated with the hit.
* @throws TskCoreException If there is a problem getting the underlying
* content associated with a hit on the text of an
* artifact.
*/
long getContentID() throws TskCoreException {
if (isArtifactHit()) {
// If the hit was in an artifact, look up the source content for the artifact.
SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase();
try (SleuthkitCase.CaseDbQuery executeQuery =
caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId);
ResultSet resultSet = executeQuery.getResultSet();) {
if (resultSet.next()) {
return resultSet.getLong("obj_id");
} else {
throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found.");
}
} catch (SQLException ex) {
throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex);
}
} else {
//else the object id is for content.
return this.solrObjectId;
}
}
/**
@ -141,7 +140,8 @@ class KeywordHit implements Comparable<KeywordHit> {
* @return
*/
boolean isArtifactHit() {
return hitOnArtifact;
// artifacts have negative obj ids
return this.solrObjectId < 0;
}
/**
@ -150,7 +150,7 @@ class KeywordHit implements Comparable<KeywordHit> {
* @return The artifact whose indexed text this hit is in.
*/
Optional<Long> getArtifactID() {
if (hitOnArtifact) {
if (isArtifactHit()) {
return Optional.of(solrObjectId);
} else {
return Optional.empty();

View File

@ -226,6 +226,8 @@ final class RegexQuery implements KeywordSearchQuery {
private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreException {
final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>();
List<KeywordHit> hits = new ArrayList<>();
final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
@ -273,6 +275,23 @@ final class RegexQuery implements KeywordSearchQuery {
hit = hit.replaceAll("[^0-9]$", "");
}
/**
* The use of String interning is an optimization to ensure
* that we reuse the same keyword hit String object across
* all hits. Even though we benefit from G1GC String
* deduplication, the overhead associated with creating a
* new String object for every KeywordHit can be significant
* when the number of hits gets large.
*/
hit = hit.intern();
// We will only create one KeywordHit instance per document for
// a given hit.
if (keywordsFoundInThisDocument.containsKey(hit)) {
continue;
}
keywordsFoundInThisDocument.put(hit, hit);
if (artifactAttributeType == null) {
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
} else {
@ -303,7 +322,7 @@ final class RegexQuery implements KeywordSearchQuery {
final String group = ccnMatcher.group("ccn");
if (CreditCardValidator.isValidCCN(group)) {
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
};
}
}
}
@ -316,8 +335,6 @@ final class RegexQuery implements KeywordSearchQuery {
}
}
} catch (TskCoreException ex) {
throw ex;
} catch (Throwable error) {
/*
* NOTE: Matcher.find() is known to throw StackOverflowError in rare
@ -447,7 +464,7 @@ final class RegexQuery implements KeywordSearchQuery {
if (hit.isArtifactHit()) {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
} else {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID())); //NON-NLS
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS
}
return null;
}

View File

@ -366,7 +366,7 @@ final class TermsComponentQuery implements KeywordSearchQuery {
if (hit.isArtifactHit()) {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
} else {
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getContentID())); //NON-NLS
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS
}
return null;
}

View File

@ -36,6 +36,7 @@ import java.util.logging.Logger;
import javax.imageio.ImageIO;
import javax.swing.JDialog;
import javax.swing.text.JTextComponent;
import javax.swing.tree.TreePath;
import org.netbeans.jellytools.MainWindowOperator;
import org.netbeans.jellytools.NbDialogOperator;
import org.netbeans.jellytools.WizardOperator;
@ -53,6 +54,8 @@ import org.netbeans.jemmy.operators.JTabbedPaneOperator;
import org.netbeans.jemmy.operators.JTableOperator;
import org.netbeans.jemmy.operators.JTextFieldOperator;
import org.netbeans.jemmy.operators.JToggleButtonOperator;
import org.netbeans.jemmy.operators.JTreeOperator;
import org.netbeans.jemmy.operators.JTreeOperator.NoSuchPathException;
import org.sleuthkit.autopsy.core.UserPreferences;
import org.sleuthkit.autopsy.core.UserPreferencesException;
import org.sleuthkit.autopsy.events.MessageServiceConnectionInfo;
@ -290,6 +293,16 @@ public class AutopsyTestCases {
}
public void testExpandDataSourcesTree() {
logger.info("Data Sources Node");
MainWindowOperator mwo = MainWindowOperator.getDefault();
JTreeOperator jto = new JTreeOperator(mwo, "Data Sources");
String [] nodeNames = {"Data Sources"};
TreePath tp = jto.findPath(nodeNames);
expandNodes(jto, tp);
screenshot("Data Sources Tree");
}
public void testGenerateReportToolbar() {
logger.info("Generate Report Toolbars");
MainWindowOperator mwo = MainWindowOperator.getDefault();
@ -380,4 +393,15 @@ public class AutopsyTestCases {
logger.log(Level.SEVERE, "Error saving messaging service connection info", ex); //NON-NLS
}
}
private void expandNodes (JTreeOperator jto, TreePath tp) {
try {
jto.expandPath(tp);
for (TreePath t : jto.getChildPaths(tp)) {
expandNodes(jto, t);
}
} catch (NoSuchPathException ne) {
logger.log(Level.SEVERE, "Error expanding tree path", ne);
}
}
}

View File

@ -69,6 +69,7 @@ public class RegressionTest extends TestCase {
"testConfigureSearch",
"testAddSourceWizard1",
"testIngest",
"testExpandDataSourcesTree", //After do ingest, before generate report, we expand Data Sources node
"testGenerateReportToolbar",
"testGenerateReportButton");
}
@ -83,6 +84,7 @@ public class RegressionTest extends TestCase {
"testConfigureSearch",
"testAddSourceWizard1",
"testIngest",
"testExpandDataSourcesTree",
"testGenerateReportToolbar",
"testGenerateReportButton");
}
@ -147,6 +149,9 @@ public class RegressionTest extends TestCase {
autopsyTests.testIngest();
}
public void testExpandDataSourcesTree() {
autopsyTests.testExpandDataSourcesTree();
}
public void testGenerateReportToolbar() {
autopsyTests.testGenerateReportToolbar();
}