Merge branch 'develop' of https://www.github.com/sleuthkit/autopsy into develop

This commit is contained in:
alexjacks92 2014-04-09 16:36:33 -04:00
commit 9d75f560e2
9 changed files with 759 additions and 633 deletions

View File

@ -177,10 +177,6 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
+ sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
throw ingEx; //need to rethrow/return to signal error and move on
}
//check if need invoke commit/search between chunks
//not to delay commit if timer has gone off
module.checkRunCommitSearch();
}
} catch (IOException ex) {
logger.log(Level.WARNING, "Unable to read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex);

View File

@ -147,10 +147,6 @@ class AbstractFileStringExtract implements AbstractFileExtract {
throw ingEx; //need to rethrow/return to signal error and move on
}
//check if need invoke commit/search between chunks
//not to delay commit if timer has gone off
module.checkRunCommitSearch();
//debug.close();
}

View File

@ -223,10 +223,6 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
+ sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
throw ingEx; //need to rethrow/return to signal error and move on
}
//check if need invoke commit/search between chunks
//not to delay commit if timer has gone off
module.checkRunCommitSearch();
}
} catch (IOException ex) {
final String msg = "Exception: Unable to read Tika content stream from " + sourceFile.getId() + ": " + sourceFile.getName();

View File

@ -263,3 +263,10 @@ KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster
KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index:
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer)
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default)
KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit:
KeywordSearchIngestModule.kwHitLbl=Keyword hit:
KeywordSearchIngestModule.kwHitThLbl=Keyword
KeywordSearchIngestModule.previewThLbl=Preview
KeywordSearchIngestModule.fileThLbl=File
KeywordSearchIngestModule.listThLbl=List
KeywordSearchIngestModule.regExThLbl=Reg Ex

View File

@ -53,7 +53,6 @@ import org.sleuthkit.datamodel.ContentVisitor;
import org.sleuthkit.datamodel.DerivedFile;
import org.sleuthkit.datamodel.Directory;
import org.sleuthkit.datamodel.File;
import org.sleuthkit.datamodel.FsContent;
import org.sleuthkit.datamodel.LayoutFile;
import org.sleuthkit.datamodel.LocalFile;
import org.sleuthkit.datamodel.ReadContentInputStream;
@ -66,7 +65,7 @@ import org.sleuthkit.datamodel.TskCoreException;
class Ingester {
private static final Logger logger = Logger.getLogger(Ingester.class.getName());
private boolean uncommitedIngests = false;
private volatile boolean uncommitedIngests = false;
private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor();
private final Server solrServer = KeywordSearch.getServer();
private final GetContentFieldsV getContentFieldsV = new GetContentFieldsV();
@ -74,8 +73,7 @@ class Ingester {
//for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
//TODO use a streaming way to add content to /update handler
private final static int MAX_DOC_CHUNK_SIZE = 1024*1024;
private final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE];
private static final int MAX_DOC_CHUNK_SIZE = 1024*1024;
private static final String docContentEncoding = "UTF-8";
@ -286,6 +284,7 @@ class Ingester {
throw new IngesterException(msg);
}
final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE];
SolrInputDocument updateDoc = new SolrInputDocument();
for (String key : fields.keySet()) {

View File

@ -18,36 +18,19 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.Logger;
import javax.swing.SwingUtilities;
import javax.swing.SwingWorker;
import javax.swing.Timer;
import org.apache.tika.Tika;
import org.netbeans.api.progress.aggregate.AggregateProgressFactory;
import org.netbeans.api.progress.aggregate.AggregateProgressHandle;
import org.netbeans.api.progress.aggregate.ProgressContributor;
import org.openide.util.Cancellable;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
import org.sleuthkit.autopsy.coreutils.StopWatch;
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
import org.sleuthkit.autopsy.ingest.FileIngestModule;
import org.sleuthkit.autopsy.ingest.IngestServices;
@ -55,11 +38,7 @@ import org.sleuthkit.autopsy.ingest.IngestMessage;
import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
import org.sleuthkit.autopsy.ingest.IngestModuleAdapter;
import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.ReadContentInputStream;
import org.sleuthkit.datamodel.SleuthkitCase;
@ -97,27 +76,21 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName());
private IngestServices services = IngestServices.getInstance();
private Ingester ingester = null;
private volatile boolean commitIndex = false; //whether to commit index next time
private volatile boolean runSearcher = false; //whether to run searcher next time
private Timer commitTimer;
private Timer searchTimer;
private Indexer indexer;
private Searcher currentSearcher;
private Searcher finalSearcher;
private volatile boolean searcherDone = true; //mark as done, until it's inited
private Map<Keyword, List<Long>> currentResults;
//only search images from current ingest, not images previously ingested/indexed
//accessed read-only by searcher thread
private Set<Long> curDataSourceIds;
private static final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true); //use fairness policy
private static final Lock searcherLock = rwLock.writeLock();
private boolean processedFiles;
private boolean startedSearching = false;
private SleuthkitCase caseHandle = null;
private static List<AbstractFileExtract> textExtractors;
private static AbstractFileStringExtract stringExtractor;
private List<AbstractFileExtract> textExtractors;
private AbstractFileStringExtract stringExtractor;
private final KeywordSearchJobSettings settings;
private boolean initialized = false;
private Tika tikaFormatDetector;
private long jobId;
private long dataSourceId;
private static AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging
private int instanceNum = 0;
private enum IngestStatus {
@ -132,6 +105,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
KeywordSearchIngestModule(KeywordSearchJobSettings settings) {
this.settings = settings;
instanceNum = instanceCount.getAndIncrement();
}
/**
@ -141,13 +115,12 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
*/
@Override
public void startUp(IngestJobContext context) throws IngestModuleException {
logger.log(Level.INFO, "init()");
logger.log(Level.INFO, "Initializing instance {0}", instanceNum);
initialized = false;
jobId = context.getJobId();
caseHandle = Case.getCurrentCase().getSleuthkitCase();
tikaFormatDetector = new Tika();
ingester = Server.getIngester();
final Server server = KeywordSearch.getServer();
@ -209,31 +182,12 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));
}
processedFiles = false;
searcherDone = true; //make sure to start the initial currentSearcher
//keeps track of all results per run not to repeat reporting the same hits
currentResults = new HashMap<>();
curDataSourceIds = new HashSet<>();
indexer = new Indexer();
final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000;
logger.log(Level.INFO, "Using commit interval (ms): {0}", updateIntervalMs);
logger.log(Level.INFO, "Using searcher interval (ms): {0}", updateIntervalMs);
commitTimer = new Timer(updateIntervalMs, new CommitTimerAction());
searchTimer = new Timer(updateIntervalMs, new SearchTimerAction());
initialized = true;
commitTimer.start();
searchTimer.start();
}
@Override
public ProcessResult process(AbstractFile abstractFile) {
if (initialized == false) //error initializing indexing/Solr
{
logger.log(Level.WARNING, "Skipping processing, module not initialized, file: {0}", abstractFile.getName());
@ -242,8 +196,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
}
try {
//add data source id of the file to the set, keeping track of images being ingested
final long fileSourceId = caseHandle.getFileDataSource(abstractFile);
curDataSourceIds.add(fileSourceId);
dataSourceId = caseHandle.getFileDataSource(abstractFile);
} catch (TskCoreException ex) {
logger.log(Level.SEVERE, "Error getting image id of file processed by keyword search: " + abstractFile.getName(), ex);
@ -260,14 +213,16 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
return ProcessResult.OK;
}
processedFiles = true;
//check if it's time to commit after previous processing
checkRunCommitSearch();
//index the file and content (if the content is supported)
indexer.indexFile(abstractFile, true);
// Start searching if it hasn't started already
if (!startedSearching) {
List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists();
SearchRunner.getInstance().startJob(jobId, dataSourceId, keywordListNames);
startedSearching = true;
}
return ProcessResult.OK;
}
@ -277,39 +232,23 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
*/
@Override
public void shutDown(boolean ingestJobCancelled) {
logger.log(Level.INFO, "Instance {0}", instanceNum);
if (initialized == false) {
return;
}
if (ingestJobCancelled) {
logger.log(Level.INFO, "Ingest job cancelled");
stop();
return;
}
commitTimer.stop();
//NOTE, we let the 1 before last searcher complete fully, and enqueue the last one
//cancel searcher timer, ensure unwanted searcher does not start
//before we start the final one
if (searchTimer.isRunning()) {
searchTimer.stop();
}
runSearcher = false;
logger.log(Level.INFO, "Running final index commit and search");
//final commit
commit();
// Remove from the search list and trigger final commit and final search
SearchRunner.getInstance().endJob(jobId);
postIndexSummary();
//run one last search as there are probably some new files committed
List<String> keywordLists = settings.getNamesOfEnabledKeyWordLists();
if (!keywordLists.isEmpty() && processedFiles == true) {
finalSearcher = new Searcher(keywordLists, true); //final searcher run
finalSearcher.execute();
}
//log number of files / chunks in index
//signal a potential change in number of text_ingested files
try {
@ -320,8 +259,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files/chunks: ", ex);
}
//cleanup done in final searcher
}
/**
@ -330,21 +267,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
private void stop() {
logger.log(Level.INFO, "stop()");
//stop timer
commitTimer.stop();
//stop currentSearcher
if (currentSearcher != null) {
currentSearcher.cancel(true);
}
//cancel searcher timer, ensure unwanted searcher does not start
if (searchTimer.isRunning()) {
searchTimer.stop();
}
runSearcher = false;
//commit uncommited files, don't search again
commit();
SearchRunner.getInstance().stopJob(jobId);
cleanup();
}
@ -354,15 +277,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
*/
private void cleanup() {
ingestStatus.clear();
currentResults.clear();
curDataSourceIds.clear();
currentSearcher = null;
//finalSearcher = null; //do not collect, might be finalizing
commitTimer.stop();
searchTimer.stop();
commitTimer = null;
//searchTimer = null; // do not collect, final searcher might still be running, in which case it throws an exception
textExtractors.clear();
textExtractors = null;
@ -373,19 +287,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
initialized = false;
}
/**
* Commits index and notifies listeners of index update
*/
private void commit() {
if (initialized) {
logger.log(Level.INFO, "Commiting index");
ingester.commit();
logger.log(Level.INFO, "Index comitted");
//signal a potential change in number of text_ingested files
indexChangeNotify();
}
}
/**
* Posts inbox message with summary of text_ingested files
*/
@ -417,7 +318,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
error_io++;
break;
default:
;
;
}
}
@ -441,73 +342,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
}
}
/**
* Helper method to notify listeners on index update
*/
private void indexChangeNotify() {
//signal a potential change in number of text_ingested files
try {
final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
KeywordSearch.fireNumIndexedFilesChange(null, new Integer(numIndexedFiles));
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex);
}
}
/**
* Check if time to commit, if so, run commit. Then run search if search
* timer is also set.
*/
void checkRunCommitSearch() {
if (commitIndex) {
logger.log(Level.INFO, "Commiting index");
commit();
commitIndex = false;
//after commit, check if time to run searcher
//NOTE commit/searcher timings don't need to align
//in worst case, we will run search next time after commit timer goes off, or at the end of ingest
if (searcherDone && runSearcher) {
//start search if previous not running
List<String> keywordLists = settings.getNamesOfEnabledKeyWordLists();
if (!keywordLists.isEmpty()) {
currentSearcher = new Searcher(keywordLists);
currentSearcher.execute();//searcher will stop timer and restart timer when done
}
}
}
}
/**
* CommitTimerAction to run by commitTimer Sets a flag to indicate we are
* ready for commit
*/
private class CommitTimerAction implements ActionListener {
private final Logger logger = Logger.getLogger(CommitTimerAction.class.getName());
@Override
public void actionPerformed(ActionEvent e) {
commitIndex = true;
logger.log(Level.INFO, "CommitTimer awake");
}
}
/**
* SearchTimerAction to run by searchTimer Sets a flag to indicate we are
* ready to search
*/
private class SearchTimerAction implements ActionListener {
private final Logger logger = Logger.getLogger(SearchTimerAction.class.getName());
@Override
public void actionPerformed(ActionEvent e) {
runSearcher = true;
logger.log(Level.INFO, "SearchTimer awake");
}
}
/**
* File indexer, processes and indexes known/allocated files,
* unknown/unallocated files and directories accordingly
@ -689,420 +523,4 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
}
}
/**
* Searcher responsible for searching the current index and writing results
* to blackboard and the inbox. Also, posts results to listeners as Ingest
* data events. Searches entire index, and keeps track of only new results
* to report and save. Runs as a background thread.
*/
private final class Searcher extends SwingWorker<Object, Void> {
/**
* Searcher has private copies/snapshots of the lists and keywords
*/
private List<Keyword> keywords; //keywords to search
private List<String> keywordLists; // lists currently being searched
private Map<String, KeywordList> keywordToList; //keyword to list name mapping
private AggregateProgressHandle progressGroup;
private final Logger logger = Logger.getLogger(Searcher.class.getName());
private boolean finalRun = false;
Searcher(List<String> keywordLists) {
this.keywordLists = new ArrayList<>(keywordLists);
this.keywords = new ArrayList<>();
this.keywordToList = new HashMap<>();
//keywords are populated as searcher runs
}
Searcher(List<String> keywordLists, boolean finalRun) {
this(keywordLists);
this.finalRun = finalRun;
}
@Override
protected Object doInBackground() throws Exception {
if (finalRun) {
logger.log(Level.INFO, "Pending start of new (final) searcher");
} else {
logger.log(Level.INFO, "Pending start of new searcher");
}
final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName")
+ (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : "");
progressGroup = AggregateProgressFactory.createSystemHandle(displayName + (" ("
+ NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.pendingMsg") + ")"), null, new Cancellable() {
@Override
public boolean cancel() {
logger.log(Level.INFO, "Cancelling the searcher by user.");
if (progressGroup != null) {
progressGroup.setDisplayName(displayName + " (" + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.cancelMsg") + "...)");
}
return Searcher.this.cancel(true);
}
}, null);
updateKeywords();
ProgressContributor[] subProgresses = new ProgressContributor[keywords.size()];
int i = 0;
for (Keyword keywordQuery : keywords) {
subProgresses[i] =
AggregateProgressFactory.createProgressContributor(keywordQuery.getQuery());
progressGroup.addContributor(subProgresses[i]);
i++;
}
progressGroup.start();
//block to ensure previous searcher is completely done with doInBackground()
//even after previous searcher cancellation, we need to check this
searcherLock.lock();
final StopWatch stopWatch = new StopWatch();
stopWatch.start();
try {
logger.log(Level.INFO, "Started a new searcher");
progressGroup.setDisplayName(displayName);
//make sure other searchers are not spawned
searcherDone = false;
runSearcher = false;
if (searchTimer.isRunning()) {
searchTimer.stop();
}
int keywordsSearched = 0;
//updateKeywords();
for (Keyword keywordQuery : keywords) {
if (this.isCancelled()) {
logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keywordQuery.getQuery());
return null;
}
final String queryStr = keywordQuery.getQuery();
final KeywordList list = keywordToList.get(queryStr);
final String listName = list.getName();
//new subProgress will be active after the initial query
//when we know number of hits to start() with
if (keywordsSearched > 0) {
subProgresses[keywordsSearched - 1].finish();
}
KeywordSearchQuery del = null;
boolean isRegex = !keywordQuery.isLiteral();
if (isRegex) {
del = new TermComponentQuery(keywordQuery);
} else {
del = new LuceneQuery(keywordQuery);
del.escape();
}
//limit search to currently ingested data sources
//set up a filter with 1 or more image ids OR'ed
final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, curDataSourceIds);
del.addFilter(dataSourceFilter);
Map<String, List<ContentHit>> queryResult;
try {
queryResult = del.performQuery();
} catch (NoOpenCoreException ex) {
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), ex);
//no reason to continue with next query if recovery failed
//or wait for recovery to kick in and run again later
//likely case has closed and threads are being interrupted
return null;
} catch (CancellationException e) {
logger.log(Level.INFO, "Cancel detected, bailing during keyword query: {0}", keywordQuery.getQuery());
return null;
} catch (Exception e) {
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), e);
continue;
}
// calculate new results but substracting results already obtained in this ingest
// this creates a map of each keyword to the list of unique files that have that hit.
Map<Keyword, List<ContentHit>> newResults = filterResults(queryResult, isRegex);
if (!newResults.isEmpty()) {
//write results to BB
//new artifacts created, to report to listeners
Collection<BlackboardArtifact> newArtifacts = new ArrayList<>();
//scale progress bar more more granular, per result sub-progress, within per keyword
int totalUnits = newResults.size();
subProgresses[keywordsSearched].start(totalUnits);
int unitProgress = 0;
String queryDisplayStr = keywordQuery.getQuery();
if (queryDisplayStr.length() > 50) {
queryDisplayStr = queryDisplayStr.substring(0, 49) + "...";
}
subProgresses[keywordsSearched].progress(listName + ": " + queryDisplayStr, unitProgress);
/* cycle through the keywords returned -- only one unless it was a regexp */
for (final Keyword hitTerm : newResults.keySet()) {
//checking for cancellation between results
if (this.isCancelled()) {
logger.log(Level.INFO, "Cancel detected, bailing before new hit processed for query: {0}", keywordQuery.getQuery());
return null;
}
// update progress display
String hitDisplayStr = hitTerm.getQuery();
if (hitDisplayStr.length() > 50) {
hitDisplayStr = hitDisplayStr.substring(0, 49) + "...";
}
subProgresses[keywordsSearched].progress(listName + ": " + hitDisplayStr, unitProgress);
//subProgresses[keywordsSearched].progress(unitProgress);
// this returns the unique files in the set with the first chunk that has a hit
Map<AbstractFile, Integer> contentHitsFlattened = ContentHit.flattenResults(newResults.get(hitTerm));
for (final AbstractFile hitFile : contentHitsFlattened.keySet()) {
// get the snippet for the first hit in the file
String snippet;
final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(hitTerm.getQuery());
int chunkId = contentHitsFlattened.get(hitFile);
try {
snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, isRegex, true);
} catch (NoOpenCoreException e) {
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
//no reason to continue
return null;
} catch (Exception e) {
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
continue;
}
// write the blackboard artifact for this keyword in this file
KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, snippet, listName);
if (written == null) {
logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()});
continue;
}
newArtifacts.add(written.getArtifact());
//generate an ingest inbox message for this keyword in this file
if (list.getIngestMessages()) {
StringBuilder subjectSb = new StringBuilder();
StringBuilder detailsSb = new StringBuilder();
//final int hitFiles = newResults.size();
if (!keywordQuery.isLiteral()) {
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl"));
} else {
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl"));
}
//subjectSb.append("<");
String uniqueKey = null;
BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
if (attr != null) {
final String keyword = attr.getValueString();
subjectSb.append(keyword);
uniqueKey = keyword.toLowerCase();
}
//subjectSb.append(">");
//String uniqueKey = queryStr;
//details
detailsSb.append("<table border='0' cellpadding='4' width='280'>");
//hit
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLThLbl"));
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
detailsSb.append("</tr>");
//preview
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
if (attr != null) {
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.previewThLbl"));
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
detailsSb.append("</tr>");
}
//file
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.fileThLbl"));
detailsSb.append("<td>").append(hitFile.getParentPath()).append(hitFile.getName()).append("</td>");
detailsSb.append("</tr>");
//list
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID());
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.listThLbl"));
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
detailsSb.append("</tr>");
//regex
if (!keywordQuery.isLiteral()) {
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
if (attr != null) {
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExThLbl"));
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
detailsSb.append("</tr>");
}
}
detailsSb.append("</table>");
services.postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact()));
}
} //for each file hit
++unitProgress;
}//for each hit term
//update artifact browser
if (!newArtifacts.isEmpty()) {
services.fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
}
} //if has results
//reset the status text before it goes away
subProgresses[keywordsSearched].progress("");
++keywordsSearched;
} //for each keyword
} //end try block
catch (Exception ex) {
logger.log(Level.WARNING, "searcher exception occurred", ex);
} finally {
try {
finalizeSearcher();
stopWatch.stop();
logger.log(Level.INFO, "Searcher took to run: {0} secs.", stopWatch.getElapsedTimeSecs());
} finally {
searcherLock.unlock();
}
}
return null;
}
@Override
protected void done() {
// call get to see if there were any errors
try {
get();
} catch (InterruptedException | ExecutionException e) {
logger.log(Level.SEVERE, "Error performing keyword search: " + e.getMessage());
services.postMessage(IngestMessage.createErrorMessage(KeywordSearchModuleFactory.getModuleName(), "Error performing keyword search", e.getMessage()));
} // catch and ignore if we were cancelled
catch (java.util.concurrent.CancellationException ex) {
}
}
/**
* Sync-up the updated keywords from the currently used lists in the XML
*/
private void updateKeywords() {
KeywordSearchListsXML loader = KeywordSearchListsXML.getCurrent();
this.keywords.clear();
this.keywordToList.clear();
for (String name : this.keywordLists) {
KeywordList list = loader.getList(name);
for (Keyword k : list.getKeywords()) {
this.keywords.add(k);
this.keywordToList.put(k.getQuery(), list);
}
}
}
//perform all essential cleanup that needs to be done right AFTER doInBackground() returns
//without relying on done() method that is not guaranteed to run after background thread completes
//NEED to call this method always right before doInBackground() returns
/**
* Performs the cleanup that needs to be done right AFTER
* doInBackground() returns without relying on done() method that is not
* guaranteed to run after background thread completes REQUIRED to call
* this method always right before doInBackground() returns
*/
private void finalizeSearcher() {
logger.log(Level.INFO, "Searcher finalizing");
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressGroup.finish();
}
});
searcherDone = true; //next currentSearcher can start
if (finalRun) {
//this is the final searcher
logger.log(Level.INFO, "The final searcher in this ingest done.");
//run module cleanup
cleanup();
} else {
//start counting time for a new searcher to start
//unless final searcher is pending
if (finalSearcher == null) {
//we need a new Timer object, because restarting previus will not cause firing of the action
final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000;
searchTimer = new Timer(updateIntervalMs, new SearchTimerAction());
searchTimer.start();
}
}
}
//calculate new results but substracting results already obtained in this ingest
//update currentResults map with the new results
private Map<Keyword, List<ContentHit>> filterResults(Map<String, List<ContentHit>> queryResult, boolean isRegex) {
Map<Keyword, List<ContentHit>> newResults = new HashMap<>();
for (String termResult : queryResult.keySet()) {
List<ContentHit> queryTermResults = queryResult.get(termResult);
//translate to list of IDs that we keep track of
List<Long> queryTermResultsIDs = new ArrayList<>();
for (ContentHit ch : queryTermResults) {
queryTermResultsIDs.add(ch.getId());
}
Keyword termResultK = new Keyword(termResult, !isRegex);
List<Long> curTermResults = currentResults.get(termResultK);
if (curTermResults == null) {
currentResults.put(termResultK, queryTermResultsIDs);
newResults.put(termResultK, queryTermResults);
} else {
//some AbstractFile hits already exist for this keyword
for (ContentHit res : queryTermResults) {
if (!curTermResults.contains(res.getId())) {
//add to new results
List<ContentHit> newResultsFs = newResults.get(termResultK);
if (newResultsFs == null) {
newResultsFs = new ArrayList<>();
newResults.put(termResultK, newResultsFs);
}
newResultsFs.add(res);
curTermResults.add(res.getId());
}
}
}
}
return newResults;
}
}
}

View File

@ -140,6 +140,7 @@ class KeywordSearchListsViewerPanel extends AbstractKeywordSearchPerformer {
@Override
public void actionPerformed(ActionEvent e) {
if (ingestRunning) {
SearchRunner.getInstance().addKeywordListsToAllJobs(listsTableModel.getSelectedLists());
logger.log(Level.INFO, "Submitted enqueued lists to ingest");
} else {
searchAction(e);

View File

@ -66,7 +66,7 @@ final class KeywordSearchListsXML extends KeywordSearchListsAbstract {
* RJCTODO: Move this one to the manager
* @return
*/
static KeywordSearchListsXML getCurrent() {
static synchronized KeywordSearchListsXML getCurrent() {
if (currentInstance == null) {
currentInstance = new KeywordSearchListsXML(CUR_LISTS_FILE);
currentInstance.reload();

View File

@ -0,0 +1,713 @@
/*
* Autopsy Forensic Browser
*
* Copyright 2011 - 2014 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import javax.swing.SwingUtilities;
import javax.swing.SwingWorker;
import java.util.Timer;
import java.util.TimerTask;
import org.netbeans.api.progress.aggregate.AggregateProgressFactory;
import org.netbeans.api.progress.aggregate.AggregateProgressHandle;
import org.netbeans.api.progress.aggregate.ProgressContributor;
import org.openide.util.Cancellable;
import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.StopWatch;
import org.sleuthkit.autopsy.ingest.IngestMessage;
import org.sleuthkit.autopsy.ingest.IngestServices;
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute;
/**
* Singleton keyword search manager:
* Launches search threads for each job and performs commits, both on timed
* intervals.
*/
public final class SearchRunner {
private static final Logger logger = Logger.getLogger(SearchRunner.class.getName());
private static SearchRunner instance = null;
private IngestServices services = IngestServices.getInstance();
private Ingester ingester = null; //guarded by "ingester"
private volatile boolean updateTimerRunning = false;
private Timer updateTimer;
private Map<Long, SearchJobInfo> jobs = new HashMap<>(); //guarded by "this"
SearchRunner() {
ingester = Server.getIngester();
updateTimer = new Timer("SearchRunner update timer", true); // run as a daemon
}
/**
*
* @return the singleton object
*/
public static synchronized SearchRunner getInstance() {
if (instance == null) {
instance = new SearchRunner();
}
return instance;
}
/**
*
* @param jobId
* @param dataSourceId
* @param keywordListNames
*/
public synchronized void startJob(long jobId, long dataSourceId, List<String> keywordListNames) {
if (!jobs.containsKey(jobId)) {
logger.log(Level.INFO, "Adding job {0}", jobId);
SearchJobInfo jobData = new SearchJobInfo(jobId, dataSourceId, keywordListNames);
jobs.put(jobId, jobData);
}
jobs.get(jobId).incrementModuleReferenceCount();
if (jobs.size() > 0) {
final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000;
if (!updateTimerRunning) {
updateTimer.scheduleAtFixedRate(new UpdateTimerTask(), updateIntervalMs, updateIntervalMs);
updateTimerRunning = true;
}
}
}
/**
* Perform normal finishing of searching for this job, including one last
* commit and search. Blocks until the final search is complete.
* @param jobId
*/
public void endJob(long jobId) {
SearchJobInfo job;
boolean readyForFinalSearch = false;
synchronized(this) {
job = jobs.get(jobId);
if (job == null) {
return;
}
// Only do final search if this is the last module in this job to call endJob()
if(job.decrementModuleReferenceCount() == 0) {
jobs.remove(jobId);
readyForFinalSearch = true;
}
}
if (readyForFinalSearch) {
commit();
doFinalSearch(job); //this will block until it's done
}
}
/**
* Immediate stop and removal of job from SearchRunner. Cancels the
* associated search worker if it's still running.
* @param jobId
*/
public void stopJob(long jobId) {
logger.log(Level.INFO, "Stopping job {0}", jobId);
commit();
SearchJobInfo job;
synchronized(this) {
job = jobs.get(jobId);
if (job == null) {
return;
}
//stop currentSearcher
SearchRunner.Searcher currentSearcher = job.getCurrentSearcher();
if ((currentSearcher != null) && (!currentSearcher.isDone())) {
currentSearcher.cancel(true);
}
jobs.remove(jobId);
}
}
/**
* Add these lists to all of the jobs
* @param keywordListName
*/
public synchronized void addKeywordListsToAllJobs(List<String> keywordListNames) {
for(String listName : keywordListNames) {
logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName);
for(SearchJobInfo j : jobs.values()) {
j.addKeywordListName(listName);
}
}
}
/**
* Commits index and notifies listeners of index update
*/
private void commit() {
synchronized(ingester) {
ingester.commit();
}
// Signal a potential change in number of text_ingested files
try {
final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
KeywordSearch.fireNumIndexedFilesChange(null, new Integer(numIndexedFiles));
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex);
}
}
/**
* A final search waits for any still-running workers, and then executes a
* new one and waits until that is done.
* @param job
*/
private void doFinalSearch(SearchJobInfo job) {
// Run one last search as there are probably some new files committed
logger.log(Level.INFO, "Running final search for jobid {0}", job.getJobId());
if (!job.getKeywordListNames().isEmpty()) {
try {
// In case this job still has a worker running, wait for it to finish
job.waitForCurrentWorker();
SearchRunner.Searcher finalSearcher = new SearchRunner.Searcher(job, true);
job.setCurrentSearcher(finalSearcher); //save the ref
finalSearcher.execute(); //start thread
// block until the search is complete
finalSearcher.get();
} catch (InterruptedException | ExecutionException ex) {
logger.log(Level.WARNING, "Job {1} final search thread failed: {2}", new Object[]{job.getJobId(), ex});
}
}
}
/**
* Timer triggered re-search for each job (does a single index commit first)
*/
private class UpdateTimerTask extends TimerTask {
private final Logger logger = Logger.getLogger(SearchRunner.UpdateTimerTask.class.getName());
@Override
public void run() {
// If no jobs then cancel the task. If more job(s) come along, a new task will start up.
if (jobs.isEmpty()) {
this.cancel(); //terminate this timer task
updateTimerRunning = false;
return;
}
commit();
synchronized(SearchRunner.this) {
// Spawn a search thread for each job
for(Entry<Long, SearchJobInfo> j : jobs.entrySet()) {
SearchJobInfo job = j.getValue();
// If no lists or the worker is already running then skip it
if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) {
Searcher searcher = new Searcher(job);
job.setCurrentSearcher(searcher); //save the ref
searcher.execute(); //start thread
job.setWorkerRunning(true);
}
}
}
}
}
/**
* Data structure to keep track of keyword lists, current results, and search
* running status for each jobid
*/
private class SearchJobInfo {
private final long jobId;
private final long dataSourceId;
// mutable state:
private volatile boolean workerRunning;
private List<String> keywordListNames; //guarded by SearchJobInfo.this
private Map<Keyword, List<Long>> currentResults; //guarded by SearchJobInfo.this
private SearchRunner.Searcher currentSearcher;
private AtomicLong moduleReferenceCount = new AtomicLong(0);
private final Object finalSearchLock = new Object(); //used for a condition wait
public SearchJobInfo(long jobId, long dataSourceId, List<String> keywordListNames) {
this.jobId = jobId;
this.dataSourceId = dataSourceId;
this.keywordListNames = new ArrayList<>(keywordListNames);
currentResults = new HashMap<>();
workerRunning = false;
currentSearcher = null;
}
public long getJobId() {
return jobId;
}
public long getDataSourceId() {
return dataSourceId;
}
public synchronized List<String> getKeywordListNames() {
return new ArrayList<>(keywordListNames);
}
public synchronized void addKeywordListName(String keywordListName) {
if (!keywordListNames.contains(keywordListName)) {
keywordListNames.add(keywordListName);
}
}
public synchronized List<Long> currentKeywordResults(Keyword k) {
return currentResults.get(k);
}
public synchronized void addKeywordResults(Keyword k, List<Long> resultsIDs) {
currentResults.put(k, resultsIDs);
}
public boolean isWorkerRunning() {
return workerRunning;
}
public void setWorkerRunning(boolean flag) {
workerRunning = flag;
}
public synchronized SearchRunner.Searcher getCurrentSearcher() {
return currentSearcher;
}
public synchronized void setCurrentSearcher(SearchRunner.Searcher searchRunner) {
currentSearcher = searchRunner;
}
public void incrementModuleReferenceCount() {
moduleReferenceCount.incrementAndGet();
}
public long decrementModuleReferenceCount() {
return moduleReferenceCount.decrementAndGet();
}
/** In case this job still has a worker running, wait for it to finish
*
* @throws InterruptedException
*/
public void waitForCurrentWorker() throws InterruptedException {
synchronized(finalSearchLock) {
while(workerRunning) {
finalSearchLock.wait(); //wait() releases the lock
}
}
}
/**
* Unset workerRunning and wake up thread(s) waiting on finalSearchLock
*/
public void searchNotify() {
synchronized(finalSearchLock) {
workerRunning = false;
finalSearchLock.notify();
}
}
}
/**
* Searcher responsible for searching the current index and writing results
* to blackboard and the inbox. Also, posts results to listeners as Ingest
* data events. Searches entire index, and keeps track of only new results
* to report and save. Runs as a background thread.
*/
private final class Searcher extends SwingWorker<Object, Void> {
/**
* Searcher has private copies/snapshots of the lists and keywords
*/
private SearchJobInfo job;
private List<Keyword> keywords; //keywords to search
private List<String> keywordListNames; // lists currently being searched
private Map<String, KeywordList> keywordToList; //keyword to list name mapping
private AggregateProgressHandle progressGroup;
private final Logger logger = Logger.getLogger(SearchRunner.Searcher.class.getName());
private boolean finalRun = false;
Searcher(SearchJobInfo job) {
this.job = job;
keywordListNames = job.getKeywordListNames();
keywords = new ArrayList<>();
keywordToList = new HashMap<>();
//keywords are populated as searcher runs
}
Searcher(SearchJobInfo job, boolean finalRun) {
this(job);
this.finalRun = finalRun;
}
@Override
protected Object doInBackground() throws Exception {
final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName")
+ (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : "");
final String pgDisplayName = displayName + (" (" + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.pendingMsg") + ")");
progressGroup = AggregateProgressFactory.createSystemHandle(pgDisplayName, null, new Cancellable() {
@Override
public boolean cancel() {
logger.log(Level.INFO, "Cancelling the searcher by user.");
if (progressGroup != null) {
progressGroup.setDisplayName(displayName + " (" + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg") + "...)");
}
return SearchRunner.Searcher.this.cancel(true);
}
}, null);
updateKeywords();
ProgressContributor[] subProgresses = new ProgressContributor[keywords.size()];
int i = 0;
for (Keyword keywordQuery : keywords) {
subProgresses[i] = AggregateProgressFactory.createProgressContributor(keywordQuery.getQuery());
progressGroup.addContributor(subProgresses[i]);
i++;
}
progressGroup.start();
final StopWatch stopWatch = new StopWatch();
stopWatch.start();
try {
progressGroup.setDisplayName(displayName);
int keywordsSearched = 0;
for (Keyword keywordQuery : keywords) {
if (this.isCancelled()) {
logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keywordQuery.getQuery());
return null;
}
final String queryStr = keywordQuery.getQuery();
final KeywordList list = keywordToList.get(queryStr);
final String listName = list.getName();
//new subProgress will be active after the initial query
//when we know number of hits to start() with
if (keywordsSearched > 0) {
subProgresses[keywordsSearched - 1].finish();
}
KeywordSearchQuery del = null;
boolean isRegex = !keywordQuery.isLiteral();
if (isRegex) {
del = new TermComponentQuery(keywordQuery);
} else {
del = new LuceneQuery(keywordQuery);
del.escape();
}
//limit search to currently ingested data sources
//set up a filter with 1 or more image ids OR'ed
final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId());
del.addFilter(dataSourceFilter);
Map<String, List<ContentHit>> queryResult;
try {
queryResult = del.performQuery();
} catch (NoOpenCoreException ex) {
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), ex);
//no reason to continue with next query if recovery failed
//or wait for recovery to kick in and run again later
//likely case has closed and threads are being interrupted
return null;
} catch (CancellationException e) {
logger.log(Level.INFO, "Cancel detected, bailing during keyword query: {0}", keywordQuery.getQuery());
return null;
} catch (Exception e) {
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), e);
continue;
}
// calculate new results by substracting results already obtained in this ingest
// this creates a map of each keyword to the list of unique files that have that hit.
Map<Keyword, List<ContentHit>> newResults = filterResults(queryResult, isRegex);
if (!newResults.isEmpty()) {
//write results to BB
//new artifacts created, to report to listeners
Collection<BlackboardArtifact> newArtifacts = new ArrayList<>();
//scale progress bar more more granular, per result sub-progress, within per keyword
int totalUnits = newResults.size();
subProgresses[keywordsSearched].start(totalUnits);
int unitProgress = 0;
String queryDisplayStr = keywordQuery.getQuery();
if (queryDisplayStr.length() > 50) {
queryDisplayStr = queryDisplayStr.substring(0, 49) + "...";
}
subProgresses[keywordsSearched].progress(listName + ": " + queryDisplayStr, unitProgress);
// cycle through the keywords returned -- only one unless it was a regexp
for (final Keyword hitTerm : newResults.keySet()) {
//checking for cancellation between results
if (this.isCancelled()) {
logger.log(Level.INFO, "Cancel detected, bailing before new hit processed for query: {0}", keywordQuery.getQuery());
return null;
}
// update progress display
String hitDisplayStr = hitTerm.getQuery();
if (hitDisplayStr.length() > 50) {
hitDisplayStr = hitDisplayStr.substring(0, 49) + "...";
}
subProgresses[keywordsSearched].progress(listName + ": " + hitDisplayStr, unitProgress);
// this returns the unique files in the set with the first chunk that has a hit
Map<AbstractFile, Integer> contentHitsFlattened = ContentHit.flattenResults(newResults.get(hitTerm));
for (final AbstractFile hitFile : contentHitsFlattened.keySet()) {
// get the snippet for the first hit in the file
String snippet;
final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(hitTerm.getQuery());
int chunkId = contentHitsFlattened.get(hitFile);
try {
snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, isRegex, true);
} catch (NoOpenCoreException e) {
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
//no reason to continue
return null;
} catch (Exception e) {
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
continue;
}
// write the blackboard artifact for this keyword in this file
KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, snippet, listName);
if (written == null) {
logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()});
continue;
}
newArtifacts.add(written.getArtifact());
//generate an ingest inbox message for this keyword in this file
if (list.getIngestMessages()) {
StringBuilder subjectSb = new StringBuilder();
StringBuilder detailsSb = new StringBuilder();
if (!keywordQuery.isLiteral()) {
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl"));
} else {
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl"));
}
String uniqueKey = null;
BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
if (attr != null) {
final String keyword = attr.getValueString();
subjectSb.append(keyword);
uniqueKey = keyword.toLowerCase();
}
//details
detailsSb.append("<table border='0' cellpadding='4' width='280'>");
//hit
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitThLbl"));
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
detailsSb.append("</tr>");
//preview
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
if (attr != null) {
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.previewThLbl"));
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
detailsSb.append("</tr>");
}
//file
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.fileThLbl"));
detailsSb.append("<td>").append(hitFile.getParentPath()).append(hitFile.getName()).append("</td>");
detailsSb.append("</tr>");
//list
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID());
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.listThLbl"));
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
detailsSb.append("</tr>");
//regex
if (!keywordQuery.isLiteral()) {
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
if (attr != null) {
detailsSb.append("<tr>");
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExThLbl"));
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
detailsSb.append("</tr>");
}
}
detailsSb.append("</table>");
services.postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact()));
}
} //for each file hit
++unitProgress;
}//for each hit term
//update artifact browser
if (!newArtifacts.isEmpty()) {
services.fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
}
} //if has results
//reset the status text before it goes away
subProgresses[keywordsSearched].progress("");
++keywordsSearched;
} //for each keyword
} //end try block
catch (Exception ex) {
logger.log(Level.WARNING, "searcher exception occurred", ex);
} finally {
try {
finalizeSearcher();
stopWatch.stop();
logger.log(Level.INFO, "Searcher took to run: {0} secs.", stopWatch.getElapsedTimeSecs());
} finally {
// In case a thread is waiting on this worker to be done
job.searchNotify();
}
}
return null;
}
@Override
protected void done() {
// call get to see if there were any errors
try {
get();
} catch (InterruptedException | ExecutionException e) {
logger.log(Level.SEVERE, "Error performing keyword search: " + e.getMessage());
services.postMessage(IngestMessage.createErrorMessage(KeywordSearchModuleFactory.getModuleName(), "Error performing keyword search", e.getMessage()));
} // catch and ignore if we were cancelled
catch (java.util.concurrent.CancellationException ex) {
}
}
/**
* Sync-up the updated keywords from the currently used lists in the XML
*/
private void updateKeywords() {
KeywordSearchListsXML loader = KeywordSearchListsXML.getCurrent();
keywords.clear();
keywordToList.clear();
for (String name : keywordListNames) {
KeywordList list = loader.getList(name);
for (Keyword k : list.getKeywords()) {
this.keywords.add(k);
this.keywordToList.put(k.getQuery(), list);
}
}
}
/**
* Performs the cleanup that needs to be done right AFTER
* doInBackground() returns without relying on done() method that is not
* guaranteed to run.
*/
private void finalizeSearcher() {
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
progressGroup.finish();
}
});
}
//calculate new results but substracting results already obtained in this ingest
//update currentResults map with the new results
private Map<Keyword, List<ContentHit>> filterResults(Map<String, List<ContentHit>> queryResult, boolean isRegex) {
Map<Keyword, List<ContentHit>> newResults = new HashMap<>();
for (String termResult : queryResult.keySet()) {
List<ContentHit> queryTermResults = queryResult.get(termResult);
//translate to list of IDs that we keep track of
List<Long> queryTermResultsIDs = new ArrayList<>();
for (ContentHit ch : queryTermResults) {
queryTermResultsIDs.add(ch.getId());
}
Keyword termResultK = new Keyword(termResult, !isRegex);
List<Long> curTermResults = job.currentKeywordResults(termResultK);
if (curTermResults == null) {
job.addKeywordResults(termResultK, queryTermResultsIDs);
newResults.put(termResultK, queryTermResults);
} else {
//some AbstractFile hits already exist for this keyword
for (ContentHit res : queryTermResults) {
if (!curTermResults.contains(res.getId())) {
//add to new results
List<ContentHit> newResultsFs = newResults.get(termResultK);
if (newResultsFs == null) {
newResultsFs = new ArrayList<>();
newResults.put(termResultK, newResultsFs);
}
newResultsFs.add(res);
curTermResults.add(res.getId());
}
}
}
}
return newResults;
}
}
}