Merge branch 'search_improvements' of https://github.com/sleuthkit/autopsy into index_upgrade_3

This commit is contained in:
Eugene Livis 2017-01-19 09:18:52 -05:00
commit 308d6945f0
2 changed files with 82 additions and 56 deletions

View File

@ -21,9 +21,11 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Set;
import java.util.Timer; import java.util.Timer;
import java.util.TimerTask; import java.util.TimerTask;
import java.util.concurrent.CancellationException; import java.util.concurrent.CancellationException;
@ -262,12 +264,14 @@ public final class SearchRunner {
// mutable state: // mutable state:
private volatile boolean workerRunning; private volatile boolean workerRunning;
private List<String> keywordListNames; //guarded by SearchJobInfo.this private List<String> keywordListNames; //guarded by SearchJobInfo.this
private Map<Keyword, List<Long>> currentResults; //guarded by SearchJobInfo.this
// Map of keyword to the object ids that contain a hit
private Map<Keyword, Set<Long>> currentResults; //guarded by SearchJobInfo.this
private SearchRunner.Searcher currentSearcher; private SearchRunner.Searcher currentSearcher;
private AtomicLong moduleReferenceCount = new AtomicLong(0); private AtomicLong moduleReferenceCount = new AtomicLong(0);
private final Object finalSearchLock = new Object(); //used for a condition wait private final Object finalSearchLock = new Object(); //used for a condition wait
public SearchJobInfo(long jobId, long dataSourceId, List<String> keywordListNames) { private SearchJobInfo(long jobId, long dataSourceId, List<String> keywordListNames) {
this.jobId = jobId; this.jobId = jobId;
this.dataSourceId = dataSourceId; this.dataSourceId = dataSourceId;
this.keywordListNames = new ArrayList<>(keywordListNames); this.keywordListNames = new ArrayList<>(keywordListNames);
@ -276,53 +280,53 @@ public final class SearchRunner {
currentSearcher = null; currentSearcher = null;
} }
public long getJobId() { private long getJobId() {
return jobId; return jobId;
} }
public long getDataSourceId() { private long getDataSourceId() {
return dataSourceId; return dataSourceId;
} }
public synchronized List<String> getKeywordListNames() { private synchronized List<String> getKeywordListNames() {
return new ArrayList<>(keywordListNames); return new ArrayList<>(keywordListNames);
} }
public synchronized void addKeywordListName(String keywordListName) { private synchronized void addKeywordListName(String keywordListName) {
if (!keywordListNames.contains(keywordListName)) { if (!keywordListNames.contains(keywordListName)) {
keywordListNames.add(keywordListName); keywordListNames.add(keywordListName);
} }
} }
public synchronized List<Long> currentKeywordResults(Keyword k) { private synchronized Set<Long> currentKeywordResults(Keyword k) {
return currentResults.get(k); return currentResults.get(k);
} }
public synchronized void addKeywordResults(Keyword k, List<Long> resultsIDs) { private synchronized void addKeywordResults(Keyword k, Set<Long> resultsIDs) {
currentResults.put(k, resultsIDs); currentResults.put(k, resultsIDs);
} }
public boolean isWorkerRunning() { private boolean isWorkerRunning() {
return workerRunning; return workerRunning;
} }
public void setWorkerRunning(boolean flag) { private void setWorkerRunning(boolean flag) {
workerRunning = flag; workerRunning = flag;
} }
public synchronized SearchRunner.Searcher getCurrentSearcher() { private synchronized SearchRunner.Searcher getCurrentSearcher() {
return currentSearcher; return currentSearcher;
} }
public synchronized void setCurrentSearcher(SearchRunner.Searcher searchRunner) { private synchronized void setCurrentSearcher(SearchRunner.Searcher searchRunner) {
currentSearcher = searchRunner; currentSearcher = searchRunner;
} }
public void incrementModuleReferenceCount() { private void incrementModuleReferenceCount() {
moduleReferenceCount.incrementAndGet(); moduleReferenceCount.incrementAndGet();
} }
public long decrementModuleReferenceCount() { private long decrementModuleReferenceCount() {
return moduleReferenceCount.decrementAndGet(); return moduleReferenceCount.decrementAndGet();
} }
@ -331,7 +335,7 @@ public final class SearchRunner {
* *
* @throws InterruptedException * @throws InterruptedException
*/ */
public void waitForCurrentWorker() throws InterruptedException { private void waitForCurrentWorker() throws InterruptedException {
synchronized (finalSearchLock) { synchronized (finalSearchLock) {
while (workerRunning) { while (workerRunning) {
finalSearchLock.wait(); //wait() releases the lock finalSearchLock.wait(); //wait() releases the lock
@ -342,7 +346,7 @@ public final class SearchRunner {
/** /**
* Unset workerRunning and wake up thread(s) waiting on finalSearchLock * Unset workerRunning and wake up thread(s) waiting on finalSearchLock
*/ */
public void searchNotify() { private void searchNotify() {
synchronized (finalSearchLock) { synchronized (finalSearchLock) {
workerRunning = false; workerRunning = false;
finalSearchLock.notify(); finalSearchLock.notify();
@ -468,8 +472,8 @@ public final class SearchRunner {
return null; return null;
} }
// calculate new results by substracting results already obtained in this ingest // Reduce the results of the query to only those hits we
// this creates a map of each keyword to the list of unique files that have that hit. // have not already seen.
QueryResults newResults = filterResults(queryResults); QueryResults newResults = filterResults(queryResults);
if (!newResults.getKeywords().isEmpty()) { if (!newResults.getKeywords().isEmpty()) {
@ -567,40 +571,68 @@ public final class SearchRunner {
}); });
} }
//calculate new results but substracting results already obtained in this ingest /**
//update currentResults map with the new results * This method filters out all of the hits found in earlier
* periodic searches and returns only the results found by the most
* recent search.
*
* This method will only return hits for objects for which we haven't
* previously seen a hit for the keyword.
*
* @param queryResult The results returned by a keyword search.
* @return The set of hits found by the most recent search for objects
* that have not previously had a hit.
*
*/
private QueryResults filterResults(QueryResults queryResult) { private QueryResults filterResults(QueryResults queryResult) {
// Create a new (empty) QueryResults object to hold the most recently
// found hits.
QueryResults newResults = new QueryResults(queryResult.getQuery(), queryResult.getKeywordList()); QueryResults newResults = new QueryResults(queryResult.getQuery(), queryResult.getKeywordList());
// For each keyword represented in the results.
for (Keyword keyword : queryResult.getKeywords()) { for (Keyword keyword : queryResult.getKeywords()) {
// These are all of the hits across all objects for the most recent search.
// This may well include duplicates of hits we've seen in earlier periodic searches.
List<KeywordHit> queryTermResults = queryResult.getResults(keyword); List<KeywordHit> queryTermResults = queryResult.getResults(keyword);
//translate to list of IDs that we keep track of // This will be used to build up the hits we haven't seen before
List<Long> queryTermResultsIDs = new ArrayList<>(); // for this keyword.
for (KeywordHit ch : queryTermResults) { List<KeywordHit> newUniqueHits = new ArrayList<>();
queryTermResultsIDs.add(ch.getSolrObjectId());
// Get the set of object ids seen in the past by this searcher
// for the given keyword.
Set<Long> curTermResults = job.currentKeywordResults(keyword);
if (curTermResults == null) {
// We create a new empty set if we haven't seen results for
// this keyword before.
curTermResults = new HashSet<>();
} }
List<Long> curTermResults = job.currentKeywordResults(keyword); // For each hit for this keyword.
if (curTermResults == null) { for (KeywordHit hit : queryTermResults) {
job.addKeywordResults(keyword, queryTermResultsIDs); if (curTermResults.contains(hit.getSolrObjectId())) {
newResults.addResult(keyword, queryTermResults); // Skip the hit if we've already seen a hit for
} else { // this keyword in the object.
//some AbstractFile hits already exist for this keyword continue;
for (KeywordHit res : queryTermResults) {
if (!curTermResults.contains(res.getSolrObjectId())) {
//add to new results
List<KeywordHit> newResultsFs = newResults.getResults(keyword);
if (newResultsFs == null) {
newResultsFs = new ArrayList<>();
newResults.addResult(keyword, newResultsFs);
}
newResultsFs.add(res);
curTermResults.add(res.getSolrObjectId());
}
} }
// We haven't seen the hit before so add it to list of new
// unique hits.
newUniqueHits.add(hit);
// Add the object id to the results we've seen for this
// keyword.
curTermResults.add(hit.getSolrObjectId());
} }
// Update the job with the list of objects for which we have
// seen hits for the current keyword.
job.addKeywordResults(keyword, curTermResults);
// Add the new hits for the current keyword into the results
// to be returned.
newResults.addResult(keyword, newUniqueHits);
} }
return newResults; return newResults;

View File

@ -1,23 +1,17 @@
---------------- VERSION 4.3.0 -------------- ---------------- VERSION 4.3.0 --------------
Improvements: Improvements:
- Creation and analysis (e.g., keyword search) of virtual files for slack - Support for slack space on files (as separate virtual files) to enable keyword searching and other analysis.
space. - Simple mode for the file extension mismatch module that focuses on only only multimedia and executable files to reduce false positives.
- A preloader in an Android device image does not prevent adding the image as - New view in tree that shows the MIME types.
a data source (reading of secondary GPT tables supported).
- User can add data sources with no file systems or unsupported file systems
as "unallocated space image files" for carving, keyword search, etc.
- File extension mismatch analysis can be configured to check all file types,
all file types except text files, or only multimedia and executable files.
- Column order changes in table views are "sticky" for each type of tree view
item.
- Tree view has new file types by MIME type sub tree.
- User can bulk add list of keywords to a keyword list.
- Tagged items are highlighted in table views. - Tagged items are highlighted in table views.
- Toolbar button for Image/Video Gallery - Ordering of columns is saved when user changes them.
- New "Experimental" module (activate via Tools, Plugins) with auto ingest - Support for Android devices with preloaders (uses backup GPT)
feature. - Support for images with no file systems (all data is added as unallocated space)
- User can bulk add list of keywords to a keyword list.
- New "Experimental" module (activate via Tools, Plugins) with auto ingest feature.
- Assorted bug fixes and minor enhancements. - Assorted bug fixes and minor enhancements.
---------------- VERSION 4.2.0 -------------- ---------------- VERSION 4.2.0 --------------
Improvements: Improvements:
- Credit card account search. - Credit card account search.