diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileHtmlExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileHtmlExtract.java index e013ec2fac..7af85f7e3f 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileHtmlExtract.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileHtmlExtract.java @@ -177,10 +177,6 @@ import org.sleuthkit.datamodel.ReadContentInputStream; + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx); throw ingEx; //need to rethrow/return to signal error and move on } - - //check if need invoke commit/search between chunks - //not to delay commit if timer has gone off - module.checkRunCommitSearch(); } } catch (IOException ex) { logger.log(Level.WARNING, "Unable to read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java index 4d82827c6a..6f96ff2495 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java @@ -147,10 +147,6 @@ class AbstractFileStringExtract implements AbstractFileExtract { throw ingEx; //need to rethrow/return to signal error and move on } - //check if need invoke commit/search between chunks - //not to delay commit if timer has gone off - module.checkRunCommitSearch(); - //debug.close(); } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java index 4f52805753..e19be18a5c 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java @@ -223,10 +223,6 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract { + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx); throw ingEx; //need to rethrow/return to signal error and move on } - - //check if need invoke commit/search between chunks - //not to delay commit if timer has gone off - module.checkRunCommitSearch(); } } catch (IOException ex) { final String msg = "Exception: Unable to read Tika content stream from " + sourceFile.getId() + ": " + sourceFile.getName(); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties index 040e9e0c03..a608ce590f 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Bundle.properties @@ -263,3 +263,10 @@ KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index: KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer) KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default) +KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit: +KeywordSearchIngestModule.kwHitLbl=Keyword hit: +KeywordSearchIngestModule.kwHitThLbl=Keyword +KeywordSearchIngestModule.previewThLbl=Preview +KeywordSearchIngestModule.fileThLbl=File +KeywordSearchIngestModule.listThLbl=List +KeywordSearchIngestModule.regExThLbl=Reg Ex diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java index 12d9999b01..bdb304dabf 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java @@ -53,7 +53,6 @@ import org.sleuthkit.datamodel.ContentVisitor; import org.sleuthkit.datamodel.DerivedFile; import org.sleuthkit.datamodel.Directory; import org.sleuthkit.datamodel.File; -import org.sleuthkit.datamodel.FsContent; import org.sleuthkit.datamodel.LayoutFile; import org.sleuthkit.datamodel.LocalFile; import org.sleuthkit.datamodel.ReadContentInputStream; @@ -66,7 +65,7 @@ import org.sleuthkit.datamodel.TskCoreException; class Ingester { private static final Logger logger = Logger.getLogger(Ingester.class.getName()); - private boolean uncommitedIngests = false; + private volatile boolean uncommitedIngests = false; private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor(); private final Server solrServer = KeywordSearch.getServer(); private final GetContentFieldsV getContentFieldsV = new GetContentFieldsV(); @@ -74,8 +73,7 @@ class Ingester { //for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika) //TODO use a streaming way to add content to /update handler - private final static int MAX_DOC_CHUNK_SIZE = 1024*1024; - private final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE]; + private static final int MAX_DOC_CHUNK_SIZE = 1024*1024; private static final String docContentEncoding = "UTF-8"; @@ -286,6 +284,7 @@ class Ingester { throw new IngesterException(msg); } + final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE]; SolrInputDocument updateDoc = new SolrInputDocument(); for (String key : fields.keySet()) { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java old mode 100755 new mode 100644 index a8d124934c..205fd36035 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java @@ -18,36 +18,19 @@ */ package org.sleuthkit.autopsy.keywordsearch; -import java.awt.event.ActionEvent; -import java.awt.event.ActionListener; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; -import java.util.concurrent.CancellationException; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.Logger; -import javax.swing.SwingUtilities; -import javax.swing.SwingWorker; -import javax.swing.Timer; import org.apache.tika.Tika; -import org.netbeans.api.progress.aggregate.AggregateProgressFactory; -import org.netbeans.api.progress.aggregate.AggregateProgressHandle; -import org.netbeans.api.progress.aggregate.ProgressContributor; -import org.openide.util.Cancellable; import org.sleuthkit.autopsy.casemodule.Case; -import org.sleuthkit.autopsy.coreutils.EscapeUtil; import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; -import org.sleuthkit.autopsy.coreutils.StopWatch; import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT; import org.sleuthkit.autopsy.ingest.FileIngestModule; import org.sleuthkit.autopsy.ingest.IngestServices; @@ -55,11 +38,7 @@ import org.sleuthkit.autopsy.ingest.IngestMessage; import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType; import org.sleuthkit.autopsy.ingest.IngestModuleAdapter; import org.sleuthkit.autopsy.ingest.IngestJobContext; -import org.sleuthkit.autopsy.ingest.ModuleDataEvent; import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException; -import org.sleuthkit.datamodel.BlackboardArtifact; -import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE; -import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.ReadContentInputStream; import org.sleuthkit.datamodel.SleuthkitCase; @@ -97,28 +76,22 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName()); private IngestServices services = IngestServices.getInstance(); private Ingester ingester = null; - private volatile boolean commitIndex = false; //whether to commit index next time - private volatile boolean runSearcher = false; //whether to run searcher next time - private Timer commitTimer; - private Timer searchTimer; private Indexer indexer; - private Searcher currentSearcher; - private Searcher finalSearcher; - private volatile boolean searcherDone = true; //mark as done, until it's inited - private Map> currentResults; //only search images from current ingest, not images previously ingested/indexed //accessed read-only by searcher thread - private Set curDataSourceIds; - private static final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true); //use fairness policy - private static final Lock searcherLock = rwLock.writeLock(); - private boolean processedFiles; + + private boolean startedSearching = false; private SleuthkitCase caseHandle = null; - private static List textExtractors; - private static AbstractFileStringExtract stringExtractor; + private List textExtractors; + private AbstractFileStringExtract stringExtractor; private final KeywordSearchJobSettings settings; private boolean initialized = false; private Tika tikaFormatDetector; - + private long jobId; + private long dataSourceId; + private static AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging + private int instanceNum = 0; + private enum IngestStatus { TEXT_INGESTED, /// Text was extracted by knowing file type and text_ingested @@ -132,6 +105,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme KeywordSearchIngestModule(KeywordSearchJobSettings settings) { this.settings = settings; + instanceNum = instanceCount.getAndIncrement(); } /** @@ -141,13 +115,12 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme */ @Override public void startUp(IngestJobContext context) throws IngestModuleException { - logger.log(Level.INFO, "init()"); + logger.log(Level.INFO, "Initializing instance {0}", instanceNum); initialized = false; - + + jobId = context.getJobId(); caseHandle = Case.getCurrentCase().getSleuthkitCase(); - tikaFormatDetector = new Tika(); - ingester = Server.getIngester(); final Server server = KeywordSearch.getServer(); @@ -209,31 +182,12 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.onlyIdxKwSkipMsg"))); } - processedFiles = false; - searcherDone = true; //make sure to start the initial currentSearcher - //keeps track of all results per run not to repeat reporting the same hits - currentResults = new HashMap<>(); - - curDataSourceIds = new HashSet<>(); - indexer = new Indexer(); - - final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000; - logger.log(Level.INFO, "Using commit interval (ms): {0}", updateIntervalMs); - logger.log(Level.INFO, "Using searcher interval (ms): {0}", updateIntervalMs); - - commitTimer = new Timer(updateIntervalMs, new CommitTimerAction()); - searchTimer = new Timer(updateIntervalMs, new SearchTimerAction()); - initialized = true; - - commitTimer.start(); - searchTimer.start(); } @Override public ProcessResult process(AbstractFile abstractFile) { - if (initialized == false) //error initializing indexing/Solr { logger.log(Level.WARNING, "Skipping processing, module not initialized, file: {0}", abstractFile.getName()); @@ -242,8 +196,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme } try { //add data source id of the file to the set, keeping track of images being ingested - final long fileSourceId = caseHandle.getFileDataSource(abstractFile); - curDataSourceIds.add(fileSourceId); + dataSourceId = caseHandle.getFileDataSource(abstractFile); } catch (TskCoreException ex) { logger.log(Level.SEVERE, "Error getting image id of file processed by keyword search: " + abstractFile.getName(), ex); @@ -260,14 +213,16 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme return ProcessResult.OK; } - processedFiles = true; - - //check if it's time to commit after previous processing - checkRunCommitSearch(); - //index the file and content (if the content is supported) indexer.indexFile(abstractFile, true); + // Start searching if it hasn't started already + if (!startedSearching) { + List keywordListNames = settings.getNamesOfEnabledKeyWordLists(); + SearchRunner.getInstance().startJob(jobId, dataSourceId, keywordListNames); + startedSearching = true; + } + return ProcessResult.OK; } @@ -277,39 +232,23 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme */ @Override public void shutDown(boolean ingestJobCancelled) { + logger.log(Level.INFO, "Instance {0}", instanceNum); + if (initialized == false) { return; } if (ingestJobCancelled) { + logger.log(Level.INFO, "Ingest job cancelled"); stop(); return; } - commitTimer.stop(); - - //NOTE, we let the 1 before last searcher complete fully, and enqueue the last one - - //cancel searcher timer, ensure unwanted searcher does not start - //before we start the final one - if (searchTimer.isRunning()) { - searchTimer.stop(); - } - runSearcher = false; - - logger.log(Level.INFO, "Running final index commit and search"); - //final commit - commit(); - - postIndexSummary(); - - //run one last search as there are probably some new files committed - List keywordLists = settings.getNamesOfEnabledKeyWordLists(); - if (!keywordLists.isEmpty() && processedFiles == true) { - finalSearcher = new Searcher(keywordLists, true); //final searcher run - finalSearcher.execute(); - } - + // Remove from the search list and trigger final commit and final search + SearchRunner.getInstance().endJob(jobId); + + postIndexSummary(); + //log number of files / chunks in index //signal a potential change in number of text_ingested files try { @@ -320,8 +259,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme } catch (NoOpenCoreException | KeywordSearchModuleException ex) { logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files/chunks: ", ex); } - - //cleanup done in final searcher } /** @@ -330,22 +267,8 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme private void stop() { logger.log(Level.INFO, "stop()"); - //stop timer - commitTimer.stop(); - //stop currentSearcher - if (currentSearcher != null) { - currentSearcher.cancel(true); - } - - //cancel searcher timer, ensure unwanted searcher does not start - if (searchTimer.isRunning()) { - searchTimer.stop(); - } - runSearcher = false; - - //commit uncommited files, don't search again - commit(); - + SearchRunner.getInstance().stopJob(jobId); + cleanup(); } @@ -354,15 +277,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme */ private void cleanup() { ingestStatus.clear(); - currentResults.clear(); - curDataSourceIds.clear(); - currentSearcher = null; - //finalSearcher = null; //do not collect, might be finalizing - - commitTimer.stop(); - searchTimer.stop(); - commitTimer = null; - //searchTimer = null; // do not collect, final searcher might still be running, in which case it throws an exception textExtractors.clear(); textExtractors = null; @@ -373,19 +287,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme initialized = false; } - /** - * Commits index and notifies listeners of index update - */ - private void commit() { - if (initialized) { - logger.log(Level.INFO, "Commiting index"); - ingester.commit(); - logger.log(Level.INFO, "Index comitted"); - //signal a potential change in number of text_ingested files - indexChangeNotify(); - } - } - /** * Posts inbox message with summary of text_ingested files */ @@ -417,7 +318,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme error_io++; break; default: - ; + ; } } @@ -441,73 +342,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme } } - /** - * Helper method to notify listeners on index update - */ - private void indexChangeNotify() { - //signal a potential change in number of text_ingested files - try { - final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); - KeywordSearch.fireNumIndexedFilesChange(null, new Integer(numIndexedFiles)); - } catch (NoOpenCoreException | KeywordSearchModuleException ex) { - logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex); - } - } - - /** - * Check if time to commit, if so, run commit. Then run search if search - * timer is also set. - */ - void checkRunCommitSearch() { - if (commitIndex) { - logger.log(Level.INFO, "Commiting index"); - commit(); - commitIndex = false; - - //after commit, check if time to run searcher - //NOTE commit/searcher timings don't need to align - //in worst case, we will run search next time after commit timer goes off, or at the end of ingest - if (searcherDone && runSearcher) { - //start search if previous not running - List keywordLists = settings.getNamesOfEnabledKeyWordLists(); - if (!keywordLists.isEmpty()) { - currentSearcher = new Searcher(keywordLists); - currentSearcher.execute();//searcher will stop timer and restart timer when done - } - } - } - } - - /** - * CommitTimerAction to run by commitTimer Sets a flag to indicate we are - * ready for commit - */ - private class CommitTimerAction implements ActionListener { - - private final Logger logger = Logger.getLogger(CommitTimerAction.class.getName()); - - @Override - public void actionPerformed(ActionEvent e) { - commitIndex = true; - logger.log(Level.INFO, "CommitTimer awake"); - } - } - - /** - * SearchTimerAction to run by searchTimer Sets a flag to indicate we are - * ready to search - */ - private class SearchTimerAction implements ActionListener { - - private final Logger logger = Logger.getLogger(SearchTimerAction.class.getName()); - - @Override - public void actionPerformed(ActionEvent e) { - runSearcher = true; - logger.log(Level.INFO, "SearchTimer awake"); - } - } - /** * File indexer, processes and indexes known/allocated files, * unknown/unallocated files and directories accordingly @@ -689,420 +523,4 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme } } - /** - * Searcher responsible for searching the current index and writing results - * to blackboard and the inbox. Also, posts results to listeners as Ingest - * data events. Searches entire index, and keeps track of only new results - * to report and save. Runs as a background thread. - */ - private final class Searcher extends SwingWorker { - - /** - * Searcher has private copies/snapshots of the lists and keywords - */ - private List keywords; //keywords to search - private List keywordLists; // lists currently being searched - private Map keywordToList; //keyword to list name mapping - private AggregateProgressHandle progressGroup; - private final Logger logger = Logger.getLogger(Searcher.class.getName()); - private boolean finalRun = false; - - Searcher(List keywordLists) { - this.keywordLists = new ArrayList<>(keywordLists); - this.keywords = new ArrayList<>(); - this.keywordToList = new HashMap<>(); - //keywords are populated as searcher runs - } - - Searcher(List keywordLists, boolean finalRun) { - this(keywordLists); - this.finalRun = finalRun; - } - - @Override - protected Object doInBackground() throws Exception { - if (finalRun) { - logger.log(Level.INFO, "Pending start of new (final) searcher"); - } else { - logger.log(Level.INFO, "Pending start of new searcher"); - } - - final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName") - + (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : ""); - progressGroup = AggregateProgressFactory.createSystemHandle(displayName + (" (" - + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.pendingMsg") + ")"), null, new Cancellable() { - @Override - public boolean cancel() { - logger.log(Level.INFO, "Cancelling the searcher by user."); - if (progressGroup != null) { - progressGroup.setDisplayName(displayName + " (" + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.cancelMsg") + "...)"); - } - return Searcher.this.cancel(true); - } - }, null); - - updateKeywords(); - - ProgressContributor[] subProgresses = new ProgressContributor[keywords.size()]; - int i = 0; - for (Keyword keywordQuery : keywords) { - subProgresses[i] = - AggregateProgressFactory.createProgressContributor(keywordQuery.getQuery()); - progressGroup.addContributor(subProgresses[i]); - i++; - } - - progressGroup.start(); - - //block to ensure previous searcher is completely done with doInBackground() - //even after previous searcher cancellation, we need to check this - searcherLock.lock(); - final StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - try { - logger.log(Level.INFO, "Started a new searcher"); - progressGroup.setDisplayName(displayName); - //make sure other searchers are not spawned - searcherDone = false; - runSearcher = false; - if (searchTimer.isRunning()) { - searchTimer.stop(); - } - - int keywordsSearched = 0; - - //updateKeywords(); - - for (Keyword keywordQuery : keywords) { - if (this.isCancelled()) { - logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keywordQuery.getQuery()); - return null; - } - - final String queryStr = keywordQuery.getQuery(); - final KeywordList list = keywordToList.get(queryStr); - final String listName = list.getName(); - - //new subProgress will be active after the initial query - //when we know number of hits to start() with - if (keywordsSearched > 0) { - subProgresses[keywordsSearched - 1].finish(); - } - - - KeywordSearchQuery del = null; - - boolean isRegex = !keywordQuery.isLiteral(); - if (isRegex) { - del = new TermComponentQuery(keywordQuery); - } else { - del = new LuceneQuery(keywordQuery); - del.escape(); - } - - //limit search to currently ingested data sources - //set up a filter with 1 or more image ids OR'ed - final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, curDataSourceIds); - del.addFilter(dataSourceFilter); - - Map> queryResult; - - try { - queryResult = del.performQuery(); - } catch (NoOpenCoreException ex) { - logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), ex); - //no reason to continue with next query if recovery failed - //or wait for recovery to kick in and run again later - //likely case has closed and threads are being interrupted - return null; - } catch (CancellationException e) { - logger.log(Level.INFO, "Cancel detected, bailing during keyword query: {0}", keywordQuery.getQuery()); - return null; - } catch (Exception e) { - logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), e); - continue; - } - - // calculate new results but substracting results already obtained in this ingest - // this creates a map of each keyword to the list of unique files that have that hit. - Map> newResults = filterResults(queryResult, isRegex); - - if (!newResults.isEmpty()) { - - //write results to BB - - //new artifacts created, to report to listeners - Collection newArtifacts = new ArrayList<>(); - - //scale progress bar more more granular, per result sub-progress, within per keyword - int totalUnits = newResults.size(); - subProgresses[keywordsSearched].start(totalUnits); - int unitProgress = 0; - String queryDisplayStr = keywordQuery.getQuery(); - if (queryDisplayStr.length() > 50) { - queryDisplayStr = queryDisplayStr.substring(0, 49) + "..."; - } - subProgresses[keywordsSearched].progress(listName + ": " + queryDisplayStr, unitProgress); - - - /* cycle through the keywords returned -- only one unless it was a regexp */ - for (final Keyword hitTerm : newResults.keySet()) { - //checking for cancellation between results - if (this.isCancelled()) { - logger.log(Level.INFO, "Cancel detected, bailing before new hit processed for query: {0}", keywordQuery.getQuery()); - return null; - } - - // update progress display - String hitDisplayStr = hitTerm.getQuery(); - if (hitDisplayStr.length() > 50) { - hitDisplayStr = hitDisplayStr.substring(0, 49) + "..."; - } - subProgresses[keywordsSearched].progress(listName + ": " + hitDisplayStr, unitProgress); - //subProgresses[keywordsSearched].progress(unitProgress); - - // this returns the unique files in the set with the first chunk that has a hit - Map contentHitsFlattened = ContentHit.flattenResults(newResults.get(hitTerm)); - for (final AbstractFile hitFile : contentHitsFlattened.keySet()) { - - // get the snippet for the first hit in the file - String snippet; - final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(hitTerm.getQuery()); - int chunkId = contentHitsFlattened.get(hitFile); - try { - snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, isRegex, true); - } catch (NoOpenCoreException e) { - logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); - //no reason to continue - return null; - } catch (Exception e) { - logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); - continue; - } - - // write the blackboard artifact for this keyword in this file - KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, snippet, listName); - if (written == null) { - logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()}); - continue; - } - - newArtifacts.add(written.getArtifact()); - - //generate an ingest inbox message for this keyword in this file - if (list.getIngestMessages()) { - StringBuilder subjectSb = new StringBuilder(); - StringBuilder detailsSb = new StringBuilder(); - //final int hitFiles = newResults.size(); - - if (!keywordQuery.isLiteral()) { - subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl")); - } else { - subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl")); - } - //subjectSb.append("<"); - String uniqueKey = null; - BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID()); - if (attr != null) { - final String keyword = attr.getValueString(); - subjectSb.append(keyword); - uniqueKey = keyword.toLowerCase(); - } - - //subjectSb.append(">"); - //String uniqueKey = queryStr; - - //details - detailsSb.append(""); - //hit - detailsSb.append(""); - detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLThLbl")); - detailsSb.append(""); - detailsSb.append(""); - - //preview - attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID()); - if (attr != null) { - detailsSb.append(""); - detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.previewThLbl")); - detailsSb.append(""); - detailsSb.append(""); - - } - - //file - detailsSb.append(""); - detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.fileThLbl")); - detailsSb.append(""); - - detailsSb.append(""); - - - //list - attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID()); - detailsSb.append(""); - detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.listThLbl")); - detailsSb.append(""); - detailsSb.append(""); - - //regex - if (!keywordQuery.isLiteral()) { - attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID()); - if (attr != null) { - detailsSb.append(""); - detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExThLbl")); - detailsSb.append(""); - detailsSb.append(""); - - } - } - detailsSb.append("
").append(EscapeUtil.escapeHtml(attr.getValueString())).append("
").append(EscapeUtil.escapeHtml(attr.getValueString())).append("
").append(hitFile.getParentPath()).append(hitFile.getName()).append("
").append(attr.getValueString()).append("
").append(attr.getValueString()).append("
"); - - services.postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact())); - } - } //for each file hit - - ++unitProgress; - - }//for each hit term - - //update artifact browser - if (!newArtifacts.isEmpty()) { - services.fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts)); - } - } //if has results - - //reset the status text before it goes away - subProgresses[keywordsSearched].progress(""); - - ++keywordsSearched; - - } //for each keyword - - } //end try block - catch (Exception ex) { - logger.log(Level.WARNING, "searcher exception occurred", ex); - } finally { - try { - finalizeSearcher(); - stopWatch.stop(); - logger.log(Level.INFO, "Searcher took to run: {0} secs.", stopWatch.getElapsedTimeSecs()); - } finally { - searcherLock.unlock(); - } - } - - return null; - } - - @Override - protected void done() { - // call get to see if there were any errors - try { - get(); - } catch (InterruptedException | ExecutionException e) { - logger.log(Level.SEVERE, "Error performing keyword search: " + e.getMessage()); - services.postMessage(IngestMessage.createErrorMessage(KeywordSearchModuleFactory.getModuleName(), "Error performing keyword search", e.getMessage())); - } // catch and ignore if we were cancelled - catch (java.util.concurrent.CancellationException ex) { - } - } - - /** - * Sync-up the updated keywords from the currently used lists in the XML - */ - private void updateKeywords() { - KeywordSearchListsXML loader = KeywordSearchListsXML.getCurrent(); - - this.keywords.clear(); - this.keywordToList.clear(); - - for (String name : this.keywordLists) { - KeywordList list = loader.getList(name); - for (Keyword k : list.getKeywords()) { - this.keywords.add(k); - this.keywordToList.put(k.getQuery(), list); - } - } - - - } - - //perform all essential cleanup that needs to be done right AFTER doInBackground() returns - //without relying on done() method that is not guaranteed to run after background thread completes - //NEED to call this method always right before doInBackground() returns - /** - * Performs the cleanup that needs to be done right AFTER - * doInBackground() returns without relying on done() method that is not - * guaranteed to run after background thread completes REQUIRED to call - * this method always right before doInBackground() returns - */ - private void finalizeSearcher() { - logger.log(Level.INFO, "Searcher finalizing"); - SwingUtilities.invokeLater(new Runnable() { - @Override - public void run() { - progressGroup.finish(); - } - }); - searcherDone = true; //next currentSearcher can start - - if (finalRun) { - //this is the final searcher - logger.log(Level.INFO, "The final searcher in this ingest done."); - - //run module cleanup - cleanup(); - } else { - //start counting time for a new searcher to start - //unless final searcher is pending - if (finalSearcher == null) { - //we need a new Timer object, because restarting previus will not cause firing of the action - final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000; - searchTimer = new Timer(updateIntervalMs, new SearchTimerAction()); - searchTimer.start(); - } - } - } - - //calculate new results but substracting results already obtained in this ingest - //update currentResults map with the new results - private Map> filterResults(Map> queryResult, boolean isRegex) { - Map> newResults = new HashMap<>(); - - for (String termResult : queryResult.keySet()) { - List queryTermResults = queryResult.get(termResult); - - //translate to list of IDs that we keep track of - List queryTermResultsIDs = new ArrayList<>(); - for (ContentHit ch : queryTermResults) { - queryTermResultsIDs.add(ch.getId()); - } - - Keyword termResultK = new Keyword(termResult, !isRegex); - List curTermResults = currentResults.get(termResultK); - if (curTermResults == null) { - currentResults.put(termResultK, queryTermResultsIDs); - newResults.put(termResultK, queryTermResults); - } else { - //some AbstractFile hits already exist for this keyword - for (ContentHit res : queryTermResults) { - if (!curTermResults.contains(res.getId())) { - //add to new results - List newResultsFs = newResults.get(termResultK); - if (newResultsFs == null) { - newResultsFs = new ArrayList<>(); - newResults.put(termResultK, newResultsFs); - } - newResultsFs.add(res); - curTermResults.add(res.getId()); - } - } - } - } - - return newResults; - - } - } } diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsViewerPanel.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsViewerPanel.java index 6b0ca4ad8a..8464a1ecc5 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsViewerPanel.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsViewerPanel.java @@ -139,7 +139,8 @@ class KeywordSearchListsViewerPanel extends AbstractKeywordSearchPerformer { searchAddListener = new ActionListener() { @Override public void actionPerformed(ActionEvent e) { - if (ingestRunning) { + if (ingestRunning) { + SearchRunner.getInstance().addKeywordListsToAllJobs(listsTableModel.getSelectedLists()); logger.log(Level.INFO, "Submitted enqueued lists to ingest"); } else { searchAction(e); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsXML.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsXML.java index 6a6bd9fc87..929ec49ac3 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsXML.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchListsXML.java @@ -66,7 +66,7 @@ final class KeywordSearchListsXML extends KeywordSearchListsAbstract { * RJCTODO: Move this one to the manager * @return */ - static KeywordSearchListsXML getCurrent() { + static synchronized KeywordSearchListsXML getCurrent() { if (currentInstance == null) { currentInstance = new KeywordSearchListsXML(CUR_LISTS_FILE); currentInstance.reload(); diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java new file mode 100644 index 0000000000..813c0fd8dd --- /dev/null +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/SearchRunner.java @@ -0,0 +1,713 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2011 - 2014 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.sleuthkit.autopsy.keywordsearch; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicLong; +import java.util.logging.Level; +import javax.swing.SwingUtilities; +import javax.swing.SwingWorker; +import java.util.Timer; +import java.util.TimerTask; +import org.netbeans.api.progress.aggregate.AggregateProgressFactory; +import org.netbeans.api.progress.aggregate.AggregateProgressHandle; +import org.netbeans.api.progress.aggregate.ProgressContributor; +import org.openide.util.Cancellable; +import org.openide.util.NbBundle; +import org.sleuthkit.autopsy.coreutils.EscapeUtil; +import org.sleuthkit.autopsy.coreutils.Logger; +import org.sleuthkit.autopsy.coreutils.StopWatch; +import org.sleuthkit.autopsy.ingest.IngestMessage; +import org.sleuthkit.autopsy.ingest.IngestServices; +import org.sleuthkit.autopsy.ingest.ModuleDataEvent; +import org.sleuthkit.datamodel.AbstractFile; +import org.sleuthkit.datamodel.BlackboardArtifact; +import org.sleuthkit.datamodel.BlackboardAttribute; + +/** + * Singleton keyword search manager: + * Launches search threads for each job and performs commits, both on timed + * intervals. + */ +public final class SearchRunner { + private static final Logger logger = Logger.getLogger(SearchRunner.class.getName()); + private static SearchRunner instance = null; + private IngestServices services = IngestServices.getInstance(); + private Ingester ingester = null; //guarded by "ingester" + private volatile boolean updateTimerRunning = false; + private Timer updateTimer; + private Map jobs = new HashMap<>(); //guarded by "this" + + SearchRunner() { + ingester = Server.getIngester(); + updateTimer = new Timer("SearchRunner update timer", true); // run as a daemon + } + + /** + * + * @return the singleton object + */ + public static synchronized SearchRunner getInstance() { + if (instance == null) { + instance = new SearchRunner(); + } + return instance; + } + + /** + * + * @param jobId + * @param dataSourceId + * @param keywordListNames + */ + public synchronized void startJob(long jobId, long dataSourceId, List keywordListNames) { + if (!jobs.containsKey(jobId)) { + logger.log(Level.INFO, "Adding job {0}", jobId); + SearchJobInfo jobData = new SearchJobInfo(jobId, dataSourceId, keywordListNames); + jobs.put(jobId, jobData); + } + + jobs.get(jobId).incrementModuleReferenceCount(); + + if (jobs.size() > 0) { + final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000; + if (!updateTimerRunning) { + updateTimer.scheduleAtFixedRate(new UpdateTimerTask(), updateIntervalMs, updateIntervalMs); + updateTimerRunning = true; + } + } + } + + /** + * Perform normal finishing of searching for this job, including one last + * commit and search. Blocks until the final search is complete. + * @param jobId + */ + public void endJob(long jobId) { + SearchJobInfo job; + boolean readyForFinalSearch = false; + synchronized(this) { + job = jobs.get(jobId); + if (job == null) { + return; + } + + // Only do final search if this is the last module in this job to call endJob() + if(job.decrementModuleReferenceCount() == 0) { + jobs.remove(jobId); + readyForFinalSearch = true; + } + } + + if (readyForFinalSearch) { + commit(); + doFinalSearch(job); //this will block until it's done + } + } + + + /** + * Immediate stop and removal of job from SearchRunner. Cancels the + * associated search worker if it's still running. + * @param jobId + */ + public void stopJob(long jobId) { + logger.log(Level.INFO, "Stopping job {0}", jobId); + commit(); + + SearchJobInfo job; + synchronized(this) { + job = jobs.get(jobId); + if (job == null) { + return; + } + + //stop currentSearcher + SearchRunner.Searcher currentSearcher = job.getCurrentSearcher(); + if ((currentSearcher != null) && (!currentSearcher.isDone())) { + currentSearcher.cancel(true); + } + + jobs.remove(jobId); + } + } + + /** + * Add these lists to all of the jobs + * @param keywordListName + */ + public synchronized void addKeywordListsToAllJobs(List keywordListNames) { + for(String listName : keywordListNames) { + logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName); + for(SearchJobInfo j : jobs.values()) { + j.addKeywordListName(listName); + } + } + } + + /** + * Commits index and notifies listeners of index update + */ + private void commit() { + synchronized(ingester) { + ingester.commit(); + } + + // Signal a potential change in number of text_ingested files + try { + final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles(); + KeywordSearch.fireNumIndexedFilesChange(null, new Integer(numIndexedFiles)); + } catch (NoOpenCoreException | KeywordSearchModuleException ex) { + logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex); + } + } + + /** + * A final search waits for any still-running workers, and then executes a + * new one and waits until that is done. + * @param job + */ + private void doFinalSearch(SearchJobInfo job) { + // Run one last search as there are probably some new files committed + logger.log(Level.INFO, "Running final search for jobid {0}", job.getJobId()); + if (!job.getKeywordListNames().isEmpty()) { + try { + // In case this job still has a worker running, wait for it to finish + job.waitForCurrentWorker(); + + SearchRunner.Searcher finalSearcher = new SearchRunner.Searcher(job, true); + job.setCurrentSearcher(finalSearcher); //save the ref + finalSearcher.execute(); //start thread + + // block until the search is complete + finalSearcher.get(); + + } catch (InterruptedException | ExecutionException ex) { + logger.log(Level.WARNING, "Job {1} final search thread failed: {2}", new Object[]{job.getJobId(), ex}); + } + } + } + + + /** + * Timer triggered re-search for each job (does a single index commit first) + */ + private class UpdateTimerTask extends TimerTask { + private final Logger logger = Logger.getLogger(SearchRunner.UpdateTimerTask.class.getName()); + + @Override + public void run() { + // If no jobs then cancel the task. If more job(s) come along, a new task will start up. + if (jobs.isEmpty()) { + this.cancel(); //terminate this timer task + updateTimerRunning = false; + return; + } + + commit(); + + synchronized(SearchRunner.this) { + // Spawn a search thread for each job + for(Entry j : jobs.entrySet()) { + SearchJobInfo job = j.getValue(); + // If no lists or the worker is already running then skip it + if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) { + Searcher searcher = new Searcher(job); + job.setCurrentSearcher(searcher); //save the ref + searcher.execute(); //start thread + job.setWorkerRunning(true); + } + } + } + } + } + + /** + * Data structure to keep track of keyword lists, current results, and search + * running status for each jobid + */ + private class SearchJobInfo { + private final long jobId; + private final long dataSourceId; + // mutable state: + private volatile boolean workerRunning; + private List keywordListNames; //guarded by SearchJobInfo.this + private Map> currentResults; //guarded by SearchJobInfo.this + private SearchRunner.Searcher currentSearcher; + private AtomicLong moduleReferenceCount = new AtomicLong(0); + private final Object finalSearchLock = new Object(); //used for a condition wait + + public SearchJobInfo(long jobId, long dataSourceId, List keywordListNames) { + this.jobId = jobId; + this.dataSourceId = dataSourceId; + this.keywordListNames = new ArrayList<>(keywordListNames); + currentResults = new HashMap<>(); + workerRunning = false; + currentSearcher = null; + } + + public long getJobId() { + return jobId; + } + + public long getDataSourceId() { + return dataSourceId; + } + + public synchronized List getKeywordListNames() { + return new ArrayList<>(keywordListNames); + } + + public synchronized void addKeywordListName(String keywordListName) { + if (!keywordListNames.contains(keywordListName)) { + keywordListNames.add(keywordListName); + } + } + + public synchronized List currentKeywordResults(Keyword k) { + return currentResults.get(k); + } + + public synchronized void addKeywordResults(Keyword k, List resultsIDs) { + currentResults.put(k, resultsIDs); + } + + public boolean isWorkerRunning() { + return workerRunning; + } + + public void setWorkerRunning(boolean flag) { + workerRunning = flag; + } + + public synchronized SearchRunner.Searcher getCurrentSearcher() { + return currentSearcher; + } + + public synchronized void setCurrentSearcher(SearchRunner.Searcher searchRunner) { + currentSearcher = searchRunner; + } + + public void incrementModuleReferenceCount() { + moduleReferenceCount.incrementAndGet(); + } + + public long decrementModuleReferenceCount() { + return moduleReferenceCount.decrementAndGet(); + } + + /** In case this job still has a worker running, wait for it to finish + * + * @throws InterruptedException + */ + public void waitForCurrentWorker() throws InterruptedException { + synchronized(finalSearchLock) { + while(workerRunning) { + finalSearchLock.wait(); //wait() releases the lock + } + } + } + + /** + * Unset workerRunning and wake up thread(s) waiting on finalSearchLock + */ + public void searchNotify() { + synchronized(finalSearchLock) { + workerRunning = false; + finalSearchLock.notify(); + } + } + } + + /** + * Searcher responsible for searching the current index and writing results + * to blackboard and the inbox. Also, posts results to listeners as Ingest + * data events. Searches entire index, and keeps track of only new results + * to report and save. Runs as a background thread. + */ + private final class Searcher extends SwingWorker { + + /** + * Searcher has private copies/snapshots of the lists and keywords + */ + private SearchJobInfo job; + private List keywords; //keywords to search + private List keywordListNames; // lists currently being searched + private Map keywordToList; //keyword to list name mapping + private AggregateProgressHandle progressGroup; + private final Logger logger = Logger.getLogger(SearchRunner.Searcher.class.getName()); + private boolean finalRun = false; + + Searcher(SearchJobInfo job) { + this.job = job; + keywordListNames = job.getKeywordListNames(); + keywords = new ArrayList<>(); + keywordToList = new HashMap<>(); + //keywords are populated as searcher runs + } + + Searcher(SearchJobInfo job, boolean finalRun) { + this(job); + this.finalRun = finalRun; + } + + @Override + protected Object doInBackground() throws Exception { + final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName") + + (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : ""); + final String pgDisplayName = displayName + (" (" + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.pendingMsg") + ")"); + progressGroup = AggregateProgressFactory.createSystemHandle(pgDisplayName, null, new Cancellable() { + @Override + public boolean cancel() { + logger.log(Level.INFO, "Cancelling the searcher by user."); + if (progressGroup != null) { + progressGroup.setDisplayName(displayName + " (" + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg") + "...)"); + } + return SearchRunner.Searcher.this.cancel(true); + } + }, null); + + updateKeywords(); + + ProgressContributor[] subProgresses = new ProgressContributor[keywords.size()]; + int i = 0; + for (Keyword keywordQuery : keywords) { + subProgresses[i] = AggregateProgressFactory.createProgressContributor(keywordQuery.getQuery()); + progressGroup.addContributor(subProgresses[i]); + i++; + } + + progressGroup.start(); + + final StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + try { + progressGroup.setDisplayName(displayName); + + int keywordsSearched = 0; + + for (Keyword keywordQuery : keywords) { + if (this.isCancelled()) { + logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keywordQuery.getQuery()); + return null; + } + + final String queryStr = keywordQuery.getQuery(); + final KeywordList list = keywordToList.get(queryStr); + final String listName = list.getName(); + + //new subProgress will be active after the initial query + //when we know number of hits to start() with + if (keywordsSearched > 0) { + subProgresses[keywordsSearched - 1].finish(); + } + + KeywordSearchQuery del = null; + + boolean isRegex = !keywordQuery.isLiteral(); + if (isRegex) { + del = new TermComponentQuery(keywordQuery); + } else { + del = new LuceneQuery(keywordQuery); + del.escape(); + } + + //limit search to currently ingested data sources + //set up a filter with 1 or more image ids OR'ed + final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId()); + del.addFilter(dataSourceFilter); + + Map> queryResult; + + try { + queryResult = del.performQuery(); + } catch (NoOpenCoreException ex) { + logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), ex); + //no reason to continue with next query if recovery failed + //or wait for recovery to kick in and run again later + //likely case has closed and threads are being interrupted + return null; + } catch (CancellationException e) { + logger.log(Level.INFO, "Cancel detected, bailing during keyword query: {0}", keywordQuery.getQuery()); + return null; + } catch (Exception e) { + logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), e); + continue; + } + + // calculate new results by substracting results already obtained in this ingest + // this creates a map of each keyword to the list of unique files that have that hit. + Map> newResults = filterResults(queryResult, isRegex); + + if (!newResults.isEmpty()) { + + //write results to BB + + //new artifacts created, to report to listeners + Collection newArtifacts = new ArrayList<>(); + + //scale progress bar more more granular, per result sub-progress, within per keyword + int totalUnits = newResults.size(); + subProgresses[keywordsSearched].start(totalUnits); + int unitProgress = 0; + String queryDisplayStr = keywordQuery.getQuery(); + if (queryDisplayStr.length() > 50) { + queryDisplayStr = queryDisplayStr.substring(0, 49) + "..."; + } + subProgresses[keywordsSearched].progress(listName + ": " + queryDisplayStr, unitProgress); + + // cycle through the keywords returned -- only one unless it was a regexp + for (final Keyword hitTerm : newResults.keySet()) { + //checking for cancellation between results + if (this.isCancelled()) { + logger.log(Level.INFO, "Cancel detected, bailing before new hit processed for query: {0}", keywordQuery.getQuery()); + return null; + } + + // update progress display + String hitDisplayStr = hitTerm.getQuery(); + if (hitDisplayStr.length() > 50) { + hitDisplayStr = hitDisplayStr.substring(0, 49) + "..."; + } + subProgresses[keywordsSearched].progress(listName + ": " + hitDisplayStr, unitProgress); + + // this returns the unique files in the set with the first chunk that has a hit + Map contentHitsFlattened = ContentHit.flattenResults(newResults.get(hitTerm)); + for (final AbstractFile hitFile : contentHitsFlattened.keySet()) { + + // get the snippet for the first hit in the file + String snippet; + final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(hitTerm.getQuery()); + int chunkId = contentHitsFlattened.get(hitFile); + try { + snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, isRegex, true); + } catch (NoOpenCoreException e) { + logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); + //no reason to continue + return null; + } catch (Exception e) { + logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); + continue; + } + + // write the blackboard artifact for this keyword in this file + KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, snippet, listName); + if (written == null) { + logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()}); + continue; + } + + newArtifacts.add(written.getArtifact()); + + //generate an ingest inbox message for this keyword in this file + if (list.getIngestMessages()) { + StringBuilder subjectSb = new StringBuilder(); + StringBuilder detailsSb = new StringBuilder(); + + if (!keywordQuery.isLiteral()) { + subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl")); + } else { + subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl")); + } + String uniqueKey = null; + BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID()); + if (attr != null) { + final String keyword = attr.getValueString(); + subjectSb.append(keyword); + uniqueKey = keyword.toLowerCase(); + } + + //details + detailsSb.append(""); + //hit + detailsSb.append(""); + detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitThLbl")); + detailsSb.append(""); + detailsSb.append(""); + + //preview + attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID()); + if (attr != null) { + detailsSb.append(""); + detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.previewThLbl")); + detailsSb.append(""); + detailsSb.append(""); + } + + //file + detailsSb.append(""); + detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.fileThLbl")); + detailsSb.append(""); + detailsSb.append(""); + + //list + attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID()); + detailsSb.append(""); + detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.listThLbl")); + detailsSb.append(""); + detailsSb.append(""); + + //regex + if (!keywordQuery.isLiteral()) { + attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID()); + if (attr != null) { + detailsSb.append(""); + detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExThLbl")); + detailsSb.append(""); + detailsSb.append(""); + } + } + detailsSb.append("
").append(EscapeUtil.escapeHtml(attr.getValueString())).append("
").append(EscapeUtil.escapeHtml(attr.getValueString())).append("
").append(hitFile.getParentPath()).append(hitFile.getName()).append("
").append(attr.getValueString()).append("
").append(attr.getValueString()).append("
"); + + services.postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact())); + } + } //for each file hit + + ++unitProgress; + + }//for each hit term + + //update artifact browser + if (!newArtifacts.isEmpty()) { + services.fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts)); + } + } //if has results + + //reset the status text before it goes away + subProgresses[keywordsSearched].progress(""); + + ++keywordsSearched; + + } //for each keyword + + } //end try block + catch (Exception ex) { + logger.log(Level.WARNING, "searcher exception occurred", ex); + } finally { + try { + finalizeSearcher(); + stopWatch.stop(); + + logger.log(Level.INFO, "Searcher took to run: {0} secs.", stopWatch.getElapsedTimeSecs()); + } finally { + // In case a thread is waiting on this worker to be done + job.searchNotify(); + } + } + + return null; + } + + @Override + protected void done() { + // call get to see if there were any errors + try { + get(); + } catch (InterruptedException | ExecutionException e) { + logger.log(Level.SEVERE, "Error performing keyword search: " + e.getMessage()); + services.postMessage(IngestMessage.createErrorMessage(KeywordSearchModuleFactory.getModuleName(), "Error performing keyword search", e.getMessage())); + } // catch and ignore if we were cancelled + catch (java.util.concurrent.CancellationException ex) { + } + } + + /** + * Sync-up the updated keywords from the currently used lists in the XML + */ + private void updateKeywords() { + KeywordSearchListsXML loader = KeywordSearchListsXML.getCurrent(); + + keywords.clear(); + keywordToList.clear(); + + + for (String name : keywordListNames) { + KeywordList list = loader.getList(name); + for (Keyword k : list.getKeywords()) { + this.keywords.add(k); + this.keywordToList.put(k.getQuery(), list); + } + } + } + + /** + * Performs the cleanup that needs to be done right AFTER + * doInBackground() returns without relying on done() method that is not + * guaranteed to run. + */ + private void finalizeSearcher() { + SwingUtilities.invokeLater(new Runnable() { + @Override + public void run() { + progressGroup.finish(); + } + }); + } + + //calculate new results but substracting results already obtained in this ingest + //update currentResults map with the new results + private Map> filterResults(Map> queryResult, boolean isRegex) { + Map> newResults = new HashMap<>(); + + for (String termResult : queryResult.keySet()) { + List queryTermResults = queryResult.get(termResult); + + //translate to list of IDs that we keep track of + List queryTermResultsIDs = new ArrayList<>(); + for (ContentHit ch : queryTermResults) { + queryTermResultsIDs.add(ch.getId()); + } + + Keyword termResultK = new Keyword(termResult, !isRegex); + List curTermResults = job.currentKeywordResults(termResultK); + if (curTermResults == null) { + job.addKeywordResults(termResultK, queryTermResultsIDs); + newResults.put(termResultK, queryTermResults); + } else { + //some AbstractFile hits already exist for this keyword + for (ContentHit res : queryTermResults) { + if (!curTermResults.contains(res.getId())) { + //add to new results + List newResultsFs = newResults.get(termResultK); + if (newResultsFs == null) { + newResultsFs = new ArrayList<>(); + newResults.put(termResultK, newResultsFs); + } + newResultsFs.add(res); + curTermResults.add(res.getId()); + } + } + } + } + + return newResults; + + } + + } + +}