mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-15 01:07:42 +00:00
Merge pull request #3187 from esaunders/release-4.5.1
Reduce memory consumed by keyword search hits
This commit is contained in:
commit
bb0f25ae02
@ -205,7 +205,13 @@ class HighlightedText implements IndexedText {
|
||||
*/
|
||||
synchronized private void loadPageInfoFromHits() {
|
||||
isLiteral = hits.getQuery().isLiteral();
|
||||
//organize the hits by page, filter as needed
|
||||
|
||||
/**
|
||||
* Organize the hits by page, filter as needed.
|
||||
* We process *every* keyword here because in the case of a regular
|
||||
* expression search there may be multiple different keyword
|
||||
* hits located in different chunks for the same file/artifact.
|
||||
*/
|
||||
for (Keyword k : hits.getKeywords()) {
|
||||
for (KeywordHit hit : hits.getResults(k)) {
|
||||
int chunkID = hit.getChunkId();
|
||||
|
@ -38,12 +38,9 @@ class KeywordHit implements Comparable<KeywordHit> {
|
||||
|
||||
private static final String GET_CONTENT_ID_FROM_ARTIFACT_ID = "SELECT obj_id FROM blackboard_artifacts WHERE artifact_id = ";
|
||||
|
||||
private final String solrDocumentId;
|
||||
private final long solrObjectId;
|
||||
private final int chunkId;
|
||||
private final String snippet;
|
||||
private final long contentID;
|
||||
private final boolean hitOnArtifact;
|
||||
private final String hit;
|
||||
|
||||
/**
|
||||
@ -56,14 +53,10 @@ class KeywordHit implements Comparable<KeywordHit> {
|
||||
* For some searches (ie substring, regex) this will be
|
||||
* different than the search term.
|
||||
*
|
||||
* @throws TskCoreException If there is a problem getting the underlying
|
||||
* content associated with a hit on the text of an
|
||||
* artifact.
|
||||
*/
|
||||
KeywordHit(String solrDocumentId, String snippet, String hit) throws TskCoreException {
|
||||
KeywordHit(String solrDocumentId, String snippet, String hit) {
|
||||
this.snippet = StringUtils.stripToEmpty(snippet);
|
||||
this.hit = hit;
|
||||
this.solrDocumentId = solrDocumentId;
|
||||
|
||||
/*
|
||||
* Parse the Solr document id to get the Solr object id and chunk id.
|
||||
@ -83,28 +76,6 @@ class KeywordHit implements Comparable<KeywordHit> {
|
||||
this.solrObjectId = Long.parseLong(split[0]);
|
||||
this.chunkId = Integer.parseInt(split[1]);
|
||||
}
|
||||
|
||||
//artifacts have negative obj ids
|
||||
hitOnArtifact = this.solrObjectId < 0;
|
||||
|
||||
if (hitOnArtifact) {
|
||||
// If the hit was in an artifact, look up the source content for the artifact.
|
||||
SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase();
|
||||
try (SleuthkitCase.CaseDbQuery executeQuery =
|
||||
caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId);
|
||||
ResultSet resultSet = executeQuery.getResultSet();) {
|
||||
if (resultSet.next()) {
|
||||
contentID = resultSet.getLong("obj_id");
|
||||
} else {
|
||||
throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found.");
|
||||
}
|
||||
} catch (SQLException ex) {
|
||||
throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex);
|
||||
}
|
||||
} else {
|
||||
//else the object id is for content.
|
||||
contentID = this.solrObjectId;
|
||||
}
|
||||
}
|
||||
|
||||
String getHit() {
|
||||
@ -112,7 +83,7 @@ class KeywordHit implements Comparable<KeywordHit> {
|
||||
}
|
||||
|
||||
String getSolrDocumentId() {
|
||||
return this.solrDocumentId;
|
||||
return Long.toString(solrObjectId) + Server.CHUNK_ID_SEPARATOR + Long.toString(chunkId);
|
||||
}
|
||||
|
||||
long getSolrObjectId() {
|
||||
@ -131,8 +102,36 @@ class KeywordHit implements Comparable<KeywordHit> {
|
||||
return this.snippet;
|
||||
}
|
||||
|
||||
long getContentID() {
|
||||
return this.contentID;
|
||||
/**
|
||||
* Get the content id associated with the content underlying hit.
|
||||
* For hits on files this will be the same as the object id associated
|
||||
* with the file. For hits on artifacts we look up the id of the object
|
||||
* that produced the artifact.
|
||||
*
|
||||
* @return The id of the underlying content associated with the hit.
|
||||
* @throws TskCoreException If there is a problem getting the underlying
|
||||
* content associated with a hit on the text of an
|
||||
* artifact.
|
||||
*/
|
||||
long getContentID() throws TskCoreException {
|
||||
if (isArtifactHit()) {
|
||||
// If the hit was in an artifact, look up the source content for the artifact.
|
||||
SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase();
|
||||
try (SleuthkitCase.CaseDbQuery executeQuery =
|
||||
caseDb.executeQuery(GET_CONTENT_ID_FROM_ARTIFACT_ID + this.solrObjectId);
|
||||
ResultSet resultSet = executeQuery.getResultSet();) {
|
||||
if (resultSet.next()) {
|
||||
return resultSet.getLong("obj_id");
|
||||
} else {
|
||||
throw new TskCoreException("Failed to get obj_id for artifact with artifact_id =" + this.solrObjectId + ". No matching artifact was found.");
|
||||
}
|
||||
} catch (SQLException ex) {
|
||||
throw new TskCoreException("Error getting obj_id for artifact with artifact_id =" + this.solrObjectId, ex);
|
||||
}
|
||||
} else {
|
||||
//else the object id is for content.
|
||||
return this.solrObjectId;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -141,7 +140,8 @@ class KeywordHit implements Comparable<KeywordHit> {
|
||||
* @return
|
||||
*/
|
||||
boolean isArtifactHit() {
|
||||
return hitOnArtifact;
|
||||
// artifacts have negative obj ids
|
||||
return this.solrObjectId < 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -150,7 +150,7 @@ class KeywordHit implements Comparable<KeywordHit> {
|
||||
* @return The artifact whose indexed text this hit is in.
|
||||
*/
|
||||
Optional<Long> getArtifactID() {
|
||||
if (hitOnArtifact) {
|
||||
if (isArtifactHit()) {
|
||||
return Optional.of(solrObjectId);
|
||||
} else {
|
||||
return Optional.empty();
|
||||
|
@ -226,6 +226,8 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
|
||||
private List<KeywordHit> createKeywordHits(SolrDocument solrDoc) throws TskCoreException {
|
||||
|
||||
final HashMap<String, String> keywordsFoundInThisDocument = new HashMap<>();
|
||||
|
||||
List<KeywordHit> hits = new ArrayList<>();
|
||||
final String docId = solrDoc.getFieldValue(Server.Schema.ID.toString()).toString();
|
||||
final Integer chunkSize = (Integer) solrDoc.getFieldValue(Server.Schema.CHUNK_SIZE.toString());
|
||||
@ -273,6 +275,23 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
hit = hit.replaceAll("[^0-9]$", "");
|
||||
}
|
||||
|
||||
/**
|
||||
* The use of String interning is an optimization to ensure
|
||||
* that we reuse the same keyword hit String object across
|
||||
* all hits. Even though we benefit from G1GC String
|
||||
* deduplication, the overhead associated with creating a
|
||||
* new String object for every KeywordHit can be significant
|
||||
* when the number of hits gets large.
|
||||
*/
|
||||
hit = hit.intern();
|
||||
|
||||
// We will only create one KeywordHit instance per document for
|
||||
// a given hit.
|
||||
if (keywordsFoundInThisDocument.containsKey(hit)) {
|
||||
continue;
|
||||
}
|
||||
keywordsFoundInThisDocument.put(hit, hit);
|
||||
|
||||
if (artifactAttributeType == null) {
|
||||
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
|
||||
} else {
|
||||
@ -303,7 +322,7 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
final String group = ccnMatcher.group("ccn");
|
||||
if (CreditCardValidator.isValidCCN(group)) {
|
||||
hits.add(new KeywordHit(docId, makeSnippet(content, hitMatcher, hit), hit));
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -316,8 +335,6 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
}
|
||||
|
||||
}
|
||||
} catch (TskCoreException ex) {
|
||||
throw ex;
|
||||
} catch (Throwable error) {
|
||||
/*
|
||||
* NOTE: Matcher.find() is known to throw StackOverflowError in rare
|
||||
@ -447,7 +464,7 @@ final class RegexQuery implements KeywordSearchQuery {
|
||||
if (hit.isArtifactHit()) {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
|
||||
} else {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getContentID())); //NON-NLS
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", foundKeyword.getSearchTerm(), hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -366,7 +366,7 @@ final class TermsComponentQuery implements KeywordSearchQuery {
|
||||
if (hit.isArtifactHit()) {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for artifact keyword hit: term = %s, snippet = '%s', artifact id = %d", searchTerm, hit.getSnippet(), hit.getArtifactID().get())); //NON-NLS
|
||||
} else {
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getContentID())); //NON-NLS
|
||||
LOGGER.log(Level.SEVERE, String.format("Failed to parse credit card account number for content keyword hit: term = %s, snippet = '%s', object id = %d", searchTerm, hit.getSnippet(), hit.getSolrObjectId())); //NON-NLS
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ import java.util.logging.Logger;
|
||||
import javax.imageio.ImageIO;
|
||||
import javax.swing.JDialog;
|
||||
import javax.swing.text.JTextComponent;
|
||||
import javax.swing.tree.TreePath;
|
||||
import org.netbeans.jellytools.MainWindowOperator;
|
||||
import org.netbeans.jellytools.NbDialogOperator;
|
||||
import org.netbeans.jellytools.WizardOperator;
|
||||
@ -53,6 +54,8 @@ import org.netbeans.jemmy.operators.JTabbedPaneOperator;
|
||||
import org.netbeans.jemmy.operators.JTableOperator;
|
||||
import org.netbeans.jemmy.operators.JTextFieldOperator;
|
||||
import org.netbeans.jemmy.operators.JToggleButtonOperator;
|
||||
import org.netbeans.jemmy.operators.JTreeOperator;
|
||||
import org.netbeans.jemmy.operators.JTreeOperator.NoSuchPathException;
|
||||
import org.sleuthkit.autopsy.core.UserPreferences;
|
||||
import org.sleuthkit.autopsy.core.UserPreferencesException;
|
||||
import org.sleuthkit.autopsy.events.MessageServiceConnectionInfo;
|
||||
@ -290,6 +293,16 @@ public class AutopsyTestCases {
|
||||
|
||||
}
|
||||
|
||||
public void testExpandDataSourcesTree() {
|
||||
logger.info("Data Sources Node");
|
||||
MainWindowOperator mwo = MainWindowOperator.getDefault();
|
||||
JTreeOperator jto = new JTreeOperator(mwo, "Data Sources");
|
||||
String [] nodeNames = {"Data Sources"};
|
||||
TreePath tp = jto.findPath(nodeNames);
|
||||
expandNodes(jto, tp);
|
||||
screenshot("Data Sources Tree");
|
||||
}
|
||||
|
||||
public void testGenerateReportToolbar() {
|
||||
logger.info("Generate Report Toolbars");
|
||||
MainWindowOperator mwo = MainWindowOperator.getDefault();
|
||||
@ -380,4 +393,15 @@ public class AutopsyTestCases {
|
||||
logger.log(Level.SEVERE, "Error saving messaging service connection info", ex); //NON-NLS
|
||||
}
|
||||
}
|
||||
|
||||
private void expandNodes (JTreeOperator jto, TreePath tp) {
|
||||
try {
|
||||
jto.expandPath(tp);
|
||||
for (TreePath t : jto.getChildPaths(tp)) {
|
||||
expandNodes(jto, t);
|
||||
}
|
||||
} catch (NoSuchPathException ne) {
|
||||
logger.log(Level.SEVERE, "Error expanding tree path", ne);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -69,6 +69,7 @@ public class RegressionTest extends TestCase {
|
||||
"testConfigureSearch",
|
||||
"testAddSourceWizard1",
|
||||
"testIngest",
|
||||
"testExpandDataSourcesTree", //After do ingest, before generate report, we expand Data Sources node
|
||||
"testGenerateReportToolbar",
|
||||
"testGenerateReportButton");
|
||||
}
|
||||
@ -83,6 +84,7 @@ public class RegressionTest extends TestCase {
|
||||
"testConfigureSearch",
|
||||
"testAddSourceWizard1",
|
||||
"testIngest",
|
||||
"testExpandDataSourcesTree",
|
||||
"testGenerateReportToolbar",
|
||||
"testGenerateReportButton");
|
||||
}
|
||||
@ -147,6 +149,9 @@ public class RegressionTest extends TestCase {
|
||||
autopsyTests.testIngest();
|
||||
}
|
||||
|
||||
public void testExpandDataSourcesTree() {
|
||||
autopsyTests.testExpandDataSourcesTree();
|
||||
}
|
||||
public void testGenerateReportToolbar() {
|
||||
autopsyTests.testGenerateReportToolbar();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user