mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-18 18:47:43 +00:00
Merge branch 'develop' of https://www.github.com/sleuthkit/autopsy into develop
This commit is contained in:
commit
9d75f560e2
@ -177,10 +177,6 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
|
|||||||
+ sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
|
+ sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
|
||||||
throw ingEx; //need to rethrow/return to signal error and move on
|
throw ingEx; //need to rethrow/return to signal error and move on
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if need invoke commit/search between chunks
|
|
||||||
//not to delay commit if timer has gone off
|
|
||||||
module.checkRunCommitSearch();
|
|
||||||
}
|
}
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
logger.log(Level.WARNING, "Unable to read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex);
|
logger.log(Level.WARNING, "Unable to read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex);
|
||||||
|
@ -147,10 +147,6 @@ class AbstractFileStringExtract implements AbstractFileExtract {
|
|||||||
throw ingEx; //need to rethrow/return to signal error and move on
|
throw ingEx; //need to rethrow/return to signal error and move on
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if need invoke commit/search between chunks
|
|
||||||
//not to delay commit if timer has gone off
|
|
||||||
module.checkRunCommitSearch();
|
|
||||||
|
|
||||||
//debug.close();
|
//debug.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,10 +223,6 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
|
|||||||
+ sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
|
+ sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
|
||||||
throw ingEx; //need to rethrow/return to signal error and move on
|
throw ingEx; //need to rethrow/return to signal error and move on
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if need invoke commit/search between chunks
|
|
||||||
//not to delay commit if timer has gone off
|
|
||||||
module.checkRunCommitSearch();
|
|
||||||
}
|
}
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
final String msg = "Exception: Unable to read Tika content stream from " + sourceFile.getId() + ": " + sourceFile.getName();
|
final String msg = "Exception: Unable to read Tika content stream from " + sourceFile.getId() + ": " + sourceFile.getName();
|
||||||
|
@ -263,3 +263,10 @@ KeywordSearchGlobalSearchSettingsPanel.timeRadioButton4.text_1=1 minute (faster
|
|||||||
KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index:
|
KeywordSearchGlobalSearchSettingsPanel.chunksLabel.text=Chunks in keyword index:
|
||||||
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer)
|
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.toolTipText=5 minutes (overall ingest time will be longer)
|
||||||
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default)
|
KeywordSearchGlobalSearchSettingsPanel.timeRadioButton3.text=5 minutes (default)
|
||||||
|
KeywordSearchIngestModule.regExpHitLbl=Reg Ex hit:
|
||||||
|
KeywordSearchIngestModule.kwHitLbl=Keyword hit:
|
||||||
|
KeywordSearchIngestModule.kwHitThLbl=Keyword
|
||||||
|
KeywordSearchIngestModule.previewThLbl=Preview
|
||||||
|
KeywordSearchIngestModule.fileThLbl=File
|
||||||
|
KeywordSearchIngestModule.listThLbl=List
|
||||||
|
KeywordSearchIngestModule.regExThLbl=Reg Ex
|
||||||
|
@ -53,7 +53,6 @@ import org.sleuthkit.datamodel.ContentVisitor;
|
|||||||
import org.sleuthkit.datamodel.DerivedFile;
|
import org.sleuthkit.datamodel.DerivedFile;
|
||||||
import org.sleuthkit.datamodel.Directory;
|
import org.sleuthkit.datamodel.Directory;
|
||||||
import org.sleuthkit.datamodel.File;
|
import org.sleuthkit.datamodel.File;
|
||||||
import org.sleuthkit.datamodel.FsContent;
|
|
||||||
import org.sleuthkit.datamodel.LayoutFile;
|
import org.sleuthkit.datamodel.LayoutFile;
|
||||||
import org.sleuthkit.datamodel.LocalFile;
|
import org.sleuthkit.datamodel.LocalFile;
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
@ -66,7 +65,7 @@ import org.sleuthkit.datamodel.TskCoreException;
|
|||||||
class Ingester {
|
class Ingester {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(Ingester.class.getName());
|
private static final Logger logger = Logger.getLogger(Ingester.class.getName());
|
||||||
private boolean uncommitedIngests = false;
|
private volatile boolean uncommitedIngests = false;
|
||||||
private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor();
|
private final ExecutorService upRequestExecutor = Executors.newSingleThreadExecutor();
|
||||||
private final Server solrServer = KeywordSearch.getServer();
|
private final Server solrServer = KeywordSearch.getServer();
|
||||||
private final GetContentFieldsV getContentFieldsV = new GetContentFieldsV();
|
private final GetContentFieldsV getContentFieldsV = new GetContentFieldsV();
|
||||||
@ -74,8 +73,7 @@ class Ingester {
|
|||||||
|
|
||||||
//for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
|
//for ingesting chunk as SolrInputDocument (non-content-streaming, by-pass tika)
|
||||||
//TODO use a streaming way to add content to /update handler
|
//TODO use a streaming way to add content to /update handler
|
||||||
private final static int MAX_DOC_CHUNK_SIZE = 1024*1024;
|
private static final int MAX_DOC_CHUNK_SIZE = 1024*1024;
|
||||||
private final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE];
|
|
||||||
private static final String docContentEncoding = "UTF-8";
|
private static final String docContentEncoding = "UTF-8";
|
||||||
|
|
||||||
|
|
||||||
@ -286,6 +284,7 @@ class Ingester {
|
|||||||
throw new IngesterException(msg);
|
throw new IngesterException(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final byte[] docChunkContentBuf = new byte[MAX_DOC_CHUNK_SIZE];
|
||||||
SolrInputDocument updateDoc = new SolrInputDocument();
|
SolrInputDocument updateDoc = new SolrInputDocument();
|
||||||
|
|
||||||
for (String key : fields.keySet()) {
|
for (String key : fields.keySet()) {
|
||||||
|
634
KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
Executable file → Normal file
634
KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestModule.java
Executable file → Normal file
@ -18,36 +18,19 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
import java.awt.event.ActionEvent;
|
|
||||||
import java.awt.event.ActionListener;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.CancellationException;
|
|
||||||
import java.util.concurrent.ExecutionException;
|
|
||||||
import java.util.concurrent.locks.Lock;
|
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import org.openide.util.NbBundle;
|
import org.openide.util.NbBundle;
|
||||||
import org.sleuthkit.autopsy.coreutils.Logger;
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
import javax.swing.SwingUtilities;
|
|
||||||
import javax.swing.SwingWorker;
|
|
||||||
import javax.swing.Timer;
|
|
||||||
import org.apache.tika.Tika;
|
import org.apache.tika.Tika;
|
||||||
import org.netbeans.api.progress.aggregate.AggregateProgressFactory;
|
|
||||||
import org.netbeans.api.progress.aggregate.AggregateProgressHandle;
|
|
||||||
import org.netbeans.api.progress.aggregate.ProgressContributor;
|
|
||||||
import org.openide.util.Cancellable;
|
|
||||||
import org.sleuthkit.autopsy.casemodule.Case;
|
import org.sleuthkit.autopsy.casemodule.Case;
|
||||||
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
|
|
||||||
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
|
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
|
||||||
import org.sleuthkit.autopsy.coreutils.StopWatch;
|
|
||||||
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
|
import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT;
|
||||||
import org.sleuthkit.autopsy.ingest.FileIngestModule;
|
import org.sleuthkit.autopsy.ingest.FileIngestModule;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestServices;
|
import org.sleuthkit.autopsy.ingest.IngestServices;
|
||||||
@ -55,11 +38,7 @@ import org.sleuthkit.autopsy.ingest.IngestMessage;
|
|||||||
import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
|
import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestModuleAdapter;
|
import org.sleuthkit.autopsy.ingest.IngestModuleAdapter;
|
||||||
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
import org.sleuthkit.autopsy.ingest.IngestJobContext;
|
||||||
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
|
|
||||||
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
|
import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact;
|
|
||||||
import org.sleuthkit.datamodel.BlackboardArtifact.ARTIFACT_TYPE;
|
|
||||||
import org.sleuthkit.datamodel.BlackboardAttribute;
|
|
||||||
import org.sleuthkit.datamodel.AbstractFile;
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
import org.sleuthkit.datamodel.SleuthkitCase;
|
import org.sleuthkit.datamodel.SleuthkitCase;
|
||||||
@ -97,27 +76,21 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName());
|
private static final Logger logger = Logger.getLogger(KeywordSearchIngestModule.class.getName());
|
||||||
private IngestServices services = IngestServices.getInstance();
|
private IngestServices services = IngestServices.getInstance();
|
||||||
private Ingester ingester = null;
|
private Ingester ingester = null;
|
||||||
private volatile boolean commitIndex = false; //whether to commit index next time
|
|
||||||
private volatile boolean runSearcher = false; //whether to run searcher next time
|
|
||||||
private Timer commitTimer;
|
|
||||||
private Timer searchTimer;
|
|
||||||
private Indexer indexer;
|
private Indexer indexer;
|
||||||
private Searcher currentSearcher;
|
|
||||||
private Searcher finalSearcher;
|
|
||||||
private volatile boolean searcherDone = true; //mark as done, until it's inited
|
|
||||||
private Map<Keyword, List<Long>> currentResults;
|
|
||||||
//only search images from current ingest, not images previously ingested/indexed
|
//only search images from current ingest, not images previously ingested/indexed
|
||||||
//accessed read-only by searcher thread
|
//accessed read-only by searcher thread
|
||||||
private Set<Long> curDataSourceIds;
|
|
||||||
private static final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(true); //use fairness policy
|
private boolean startedSearching = false;
|
||||||
private static final Lock searcherLock = rwLock.writeLock();
|
|
||||||
private boolean processedFiles;
|
|
||||||
private SleuthkitCase caseHandle = null;
|
private SleuthkitCase caseHandle = null;
|
||||||
private static List<AbstractFileExtract> textExtractors;
|
private List<AbstractFileExtract> textExtractors;
|
||||||
private static AbstractFileStringExtract stringExtractor;
|
private AbstractFileStringExtract stringExtractor;
|
||||||
private final KeywordSearchJobSettings settings;
|
private final KeywordSearchJobSettings settings;
|
||||||
private boolean initialized = false;
|
private boolean initialized = false;
|
||||||
private Tika tikaFormatDetector;
|
private Tika tikaFormatDetector;
|
||||||
|
private long jobId;
|
||||||
|
private long dataSourceId;
|
||||||
|
private static AtomicInteger instanceCount = new AtomicInteger(0); //just used for logging
|
||||||
|
private int instanceNum = 0;
|
||||||
|
|
||||||
private enum IngestStatus {
|
private enum IngestStatus {
|
||||||
|
|
||||||
@ -132,6 +105,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
|
|
||||||
KeywordSearchIngestModule(KeywordSearchJobSettings settings) {
|
KeywordSearchIngestModule(KeywordSearchJobSettings settings) {
|
||||||
this.settings = settings;
|
this.settings = settings;
|
||||||
|
instanceNum = instanceCount.getAndIncrement();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -141,13 +115,12 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void startUp(IngestJobContext context) throws IngestModuleException {
|
public void startUp(IngestJobContext context) throws IngestModuleException {
|
||||||
logger.log(Level.INFO, "init()");
|
logger.log(Level.INFO, "Initializing instance {0}", instanceNum);
|
||||||
initialized = false;
|
initialized = false;
|
||||||
|
|
||||||
|
jobId = context.getJobId();
|
||||||
caseHandle = Case.getCurrentCase().getSleuthkitCase();
|
caseHandle = Case.getCurrentCase().getSleuthkitCase();
|
||||||
|
|
||||||
tikaFormatDetector = new Tika();
|
tikaFormatDetector = new Tika();
|
||||||
|
|
||||||
ingester = Server.getIngester();
|
ingester = Server.getIngester();
|
||||||
|
|
||||||
final Server server = KeywordSearch.getServer();
|
final Server server = KeywordSearch.getServer();
|
||||||
@ -209,31 +182,12 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));
|
NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.init.onlyIdxKwSkipMsg")));
|
||||||
}
|
}
|
||||||
|
|
||||||
processedFiles = false;
|
|
||||||
searcherDone = true; //make sure to start the initial currentSearcher
|
|
||||||
//keeps track of all results per run not to repeat reporting the same hits
|
|
||||||
currentResults = new HashMap<>();
|
|
||||||
|
|
||||||
curDataSourceIds = new HashSet<>();
|
|
||||||
|
|
||||||
indexer = new Indexer();
|
indexer = new Indexer();
|
||||||
|
|
||||||
final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000;
|
|
||||||
logger.log(Level.INFO, "Using commit interval (ms): {0}", updateIntervalMs);
|
|
||||||
logger.log(Level.INFO, "Using searcher interval (ms): {0}", updateIntervalMs);
|
|
||||||
|
|
||||||
commitTimer = new Timer(updateIntervalMs, new CommitTimerAction());
|
|
||||||
searchTimer = new Timer(updateIntervalMs, new SearchTimerAction());
|
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
|
|
||||||
commitTimer.start();
|
|
||||||
searchTimer.start();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ProcessResult process(AbstractFile abstractFile) {
|
public ProcessResult process(AbstractFile abstractFile) {
|
||||||
|
|
||||||
if (initialized == false) //error initializing indexing/Solr
|
if (initialized == false) //error initializing indexing/Solr
|
||||||
{
|
{
|
||||||
logger.log(Level.WARNING, "Skipping processing, module not initialized, file: {0}", abstractFile.getName());
|
logger.log(Level.WARNING, "Skipping processing, module not initialized, file: {0}", abstractFile.getName());
|
||||||
@ -242,8 +196,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
//add data source id of the file to the set, keeping track of images being ingested
|
//add data source id of the file to the set, keeping track of images being ingested
|
||||||
final long fileSourceId = caseHandle.getFileDataSource(abstractFile);
|
dataSourceId = caseHandle.getFileDataSource(abstractFile);
|
||||||
curDataSourceIds.add(fileSourceId);
|
|
||||||
|
|
||||||
} catch (TskCoreException ex) {
|
} catch (TskCoreException ex) {
|
||||||
logger.log(Level.SEVERE, "Error getting image id of file processed by keyword search: " + abstractFile.getName(), ex);
|
logger.log(Level.SEVERE, "Error getting image id of file processed by keyword search: " + abstractFile.getName(), ex);
|
||||||
@ -260,14 +213,16 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
return ProcessResult.OK;
|
return ProcessResult.OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
processedFiles = true;
|
|
||||||
|
|
||||||
//check if it's time to commit after previous processing
|
|
||||||
checkRunCommitSearch();
|
|
||||||
|
|
||||||
//index the file and content (if the content is supported)
|
//index the file and content (if the content is supported)
|
||||||
indexer.indexFile(abstractFile, true);
|
indexer.indexFile(abstractFile, true);
|
||||||
|
|
||||||
|
// Start searching if it hasn't started already
|
||||||
|
if (!startedSearching) {
|
||||||
|
List<String> keywordListNames = settings.getNamesOfEnabledKeyWordLists();
|
||||||
|
SearchRunner.getInstance().startJob(jobId, dataSourceId, keywordListNames);
|
||||||
|
startedSearching = true;
|
||||||
|
}
|
||||||
|
|
||||||
return ProcessResult.OK;
|
return ProcessResult.OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -277,39 +232,23 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void shutDown(boolean ingestJobCancelled) {
|
public void shutDown(boolean ingestJobCancelled) {
|
||||||
|
logger.log(Level.INFO, "Instance {0}", instanceNum);
|
||||||
|
|
||||||
if (initialized == false) {
|
if (initialized == false) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ingestJobCancelled) {
|
if (ingestJobCancelled) {
|
||||||
|
logger.log(Level.INFO, "Ingest job cancelled");
|
||||||
stop();
|
stop();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
commitTimer.stop();
|
// Remove from the search list and trigger final commit and final search
|
||||||
|
SearchRunner.getInstance().endJob(jobId);
|
||||||
//NOTE, we let the 1 before last searcher complete fully, and enqueue the last one
|
|
||||||
|
|
||||||
//cancel searcher timer, ensure unwanted searcher does not start
|
|
||||||
//before we start the final one
|
|
||||||
if (searchTimer.isRunning()) {
|
|
||||||
searchTimer.stop();
|
|
||||||
}
|
|
||||||
runSearcher = false;
|
|
||||||
|
|
||||||
logger.log(Level.INFO, "Running final index commit and search");
|
|
||||||
//final commit
|
|
||||||
commit();
|
|
||||||
|
|
||||||
postIndexSummary();
|
postIndexSummary();
|
||||||
|
|
||||||
//run one last search as there are probably some new files committed
|
|
||||||
List<String> keywordLists = settings.getNamesOfEnabledKeyWordLists();
|
|
||||||
if (!keywordLists.isEmpty() && processedFiles == true) {
|
|
||||||
finalSearcher = new Searcher(keywordLists, true); //final searcher run
|
|
||||||
finalSearcher.execute();
|
|
||||||
}
|
|
||||||
|
|
||||||
//log number of files / chunks in index
|
//log number of files / chunks in index
|
||||||
//signal a potential change in number of text_ingested files
|
//signal a potential change in number of text_ingested files
|
||||||
try {
|
try {
|
||||||
@ -320,8 +259,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
|
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
|
||||||
logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files/chunks: ", ex);
|
logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files/chunks: ", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
//cleanup done in final searcher
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -330,21 +267,7 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
private void stop() {
|
private void stop() {
|
||||||
logger.log(Level.INFO, "stop()");
|
logger.log(Level.INFO, "stop()");
|
||||||
|
|
||||||
//stop timer
|
SearchRunner.getInstance().stopJob(jobId);
|
||||||
commitTimer.stop();
|
|
||||||
//stop currentSearcher
|
|
||||||
if (currentSearcher != null) {
|
|
||||||
currentSearcher.cancel(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
//cancel searcher timer, ensure unwanted searcher does not start
|
|
||||||
if (searchTimer.isRunning()) {
|
|
||||||
searchTimer.stop();
|
|
||||||
}
|
|
||||||
runSearcher = false;
|
|
||||||
|
|
||||||
//commit uncommited files, don't search again
|
|
||||||
commit();
|
|
||||||
|
|
||||||
cleanup();
|
cleanup();
|
||||||
}
|
}
|
||||||
@ -354,15 +277,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
*/
|
*/
|
||||||
private void cleanup() {
|
private void cleanup() {
|
||||||
ingestStatus.clear();
|
ingestStatus.clear();
|
||||||
currentResults.clear();
|
|
||||||
curDataSourceIds.clear();
|
|
||||||
currentSearcher = null;
|
|
||||||
//finalSearcher = null; //do not collect, might be finalizing
|
|
||||||
|
|
||||||
commitTimer.stop();
|
|
||||||
searchTimer.stop();
|
|
||||||
commitTimer = null;
|
|
||||||
//searchTimer = null; // do not collect, final searcher might still be running, in which case it throws an exception
|
|
||||||
|
|
||||||
textExtractors.clear();
|
textExtractors.clear();
|
||||||
textExtractors = null;
|
textExtractors = null;
|
||||||
@ -373,19 +287,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
initialized = false;
|
initialized = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Commits index and notifies listeners of index update
|
|
||||||
*/
|
|
||||||
private void commit() {
|
|
||||||
if (initialized) {
|
|
||||||
logger.log(Level.INFO, "Commiting index");
|
|
||||||
ingester.commit();
|
|
||||||
logger.log(Level.INFO, "Index comitted");
|
|
||||||
//signal a potential change in number of text_ingested files
|
|
||||||
indexChangeNotify();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Posts inbox message with summary of text_ingested files
|
* Posts inbox message with summary of text_ingested files
|
||||||
*/
|
*/
|
||||||
@ -441,73 +342,6 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper method to notify listeners on index update
|
|
||||||
*/
|
|
||||||
private void indexChangeNotify() {
|
|
||||||
//signal a potential change in number of text_ingested files
|
|
||||||
try {
|
|
||||||
final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
|
|
||||||
KeywordSearch.fireNumIndexedFilesChange(null, new Integer(numIndexedFiles));
|
|
||||||
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
|
|
||||||
logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if time to commit, if so, run commit. Then run search if search
|
|
||||||
* timer is also set.
|
|
||||||
*/
|
|
||||||
void checkRunCommitSearch() {
|
|
||||||
if (commitIndex) {
|
|
||||||
logger.log(Level.INFO, "Commiting index");
|
|
||||||
commit();
|
|
||||||
commitIndex = false;
|
|
||||||
|
|
||||||
//after commit, check if time to run searcher
|
|
||||||
//NOTE commit/searcher timings don't need to align
|
|
||||||
//in worst case, we will run search next time after commit timer goes off, or at the end of ingest
|
|
||||||
if (searcherDone && runSearcher) {
|
|
||||||
//start search if previous not running
|
|
||||||
List<String> keywordLists = settings.getNamesOfEnabledKeyWordLists();
|
|
||||||
if (!keywordLists.isEmpty()) {
|
|
||||||
currentSearcher = new Searcher(keywordLists);
|
|
||||||
currentSearcher.execute();//searcher will stop timer and restart timer when done
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* CommitTimerAction to run by commitTimer Sets a flag to indicate we are
|
|
||||||
* ready for commit
|
|
||||||
*/
|
|
||||||
private class CommitTimerAction implements ActionListener {
|
|
||||||
|
|
||||||
private final Logger logger = Logger.getLogger(CommitTimerAction.class.getName());
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void actionPerformed(ActionEvent e) {
|
|
||||||
commitIndex = true;
|
|
||||||
logger.log(Level.INFO, "CommitTimer awake");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* SearchTimerAction to run by searchTimer Sets a flag to indicate we are
|
|
||||||
* ready to search
|
|
||||||
*/
|
|
||||||
private class SearchTimerAction implements ActionListener {
|
|
||||||
|
|
||||||
private final Logger logger = Logger.getLogger(SearchTimerAction.class.getName());
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void actionPerformed(ActionEvent e) {
|
|
||||||
runSearcher = true;
|
|
||||||
logger.log(Level.INFO, "SearchTimer awake");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* File indexer, processes and indexes known/allocated files,
|
* File indexer, processes and indexes known/allocated files,
|
||||||
* unknown/unallocated files and directories accordingly
|
* unknown/unallocated files and directories accordingly
|
||||||
@ -689,420 +523,4 @@ public final class KeywordSearchIngestModule extends IngestModuleAdapter impleme
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Searcher responsible for searching the current index and writing results
|
|
||||||
* to blackboard and the inbox. Also, posts results to listeners as Ingest
|
|
||||||
* data events. Searches entire index, and keeps track of only new results
|
|
||||||
* to report and save. Runs as a background thread.
|
|
||||||
*/
|
|
||||||
private final class Searcher extends SwingWorker<Object, Void> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Searcher has private copies/snapshots of the lists and keywords
|
|
||||||
*/
|
|
||||||
private List<Keyword> keywords; //keywords to search
|
|
||||||
private List<String> keywordLists; // lists currently being searched
|
|
||||||
private Map<String, KeywordList> keywordToList; //keyword to list name mapping
|
|
||||||
private AggregateProgressHandle progressGroup;
|
|
||||||
private final Logger logger = Logger.getLogger(Searcher.class.getName());
|
|
||||||
private boolean finalRun = false;
|
|
||||||
|
|
||||||
Searcher(List<String> keywordLists) {
|
|
||||||
this.keywordLists = new ArrayList<>(keywordLists);
|
|
||||||
this.keywords = new ArrayList<>();
|
|
||||||
this.keywordToList = new HashMap<>();
|
|
||||||
//keywords are populated as searcher runs
|
|
||||||
}
|
|
||||||
|
|
||||||
Searcher(List<String> keywordLists, boolean finalRun) {
|
|
||||||
this(keywordLists);
|
|
||||||
this.finalRun = finalRun;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Object doInBackground() throws Exception {
|
|
||||||
if (finalRun) {
|
|
||||||
logger.log(Level.INFO, "Pending start of new (final) searcher");
|
|
||||||
} else {
|
|
||||||
logger.log(Level.INFO, "Pending start of new searcher");
|
|
||||||
}
|
|
||||||
|
|
||||||
final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName")
|
|
||||||
+ (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : "");
|
|
||||||
progressGroup = AggregateProgressFactory.createSystemHandle(displayName + (" ("
|
|
||||||
+ NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.pendingMsg") + ")"), null, new Cancellable() {
|
|
||||||
@Override
|
|
||||||
public boolean cancel() {
|
|
||||||
logger.log(Level.INFO, "Cancelling the searcher by user.");
|
|
||||||
if (progressGroup != null) {
|
|
||||||
progressGroup.setDisplayName(displayName + " (" + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.cancelMsg") + "...)");
|
|
||||||
}
|
|
||||||
return Searcher.this.cancel(true);
|
|
||||||
}
|
|
||||||
}, null);
|
|
||||||
|
|
||||||
updateKeywords();
|
|
||||||
|
|
||||||
ProgressContributor[] subProgresses = new ProgressContributor[keywords.size()];
|
|
||||||
int i = 0;
|
|
||||||
for (Keyword keywordQuery : keywords) {
|
|
||||||
subProgresses[i] =
|
|
||||||
AggregateProgressFactory.createProgressContributor(keywordQuery.getQuery());
|
|
||||||
progressGroup.addContributor(subProgresses[i]);
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
|
|
||||||
progressGroup.start();
|
|
||||||
|
|
||||||
//block to ensure previous searcher is completely done with doInBackground()
|
|
||||||
//even after previous searcher cancellation, we need to check this
|
|
||||||
searcherLock.lock();
|
|
||||||
final StopWatch stopWatch = new StopWatch();
|
|
||||||
stopWatch.start();
|
|
||||||
try {
|
|
||||||
logger.log(Level.INFO, "Started a new searcher");
|
|
||||||
progressGroup.setDisplayName(displayName);
|
|
||||||
//make sure other searchers are not spawned
|
|
||||||
searcherDone = false;
|
|
||||||
runSearcher = false;
|
|
||||||
if (searchTimer.isRunning()) {
|
|
||||||
searchTimer.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
int keywordsSearched = 0;
|
|
||||||
|
|
||||||
//updateKeywords();
|
|
||||||
|
|
||||||
for (Keyword keywordQuery : keywords) {
|
|
||||||
if (this.isCancelled()) {
|
|
||||||
logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keywordQuery.getQuery());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
final String queryStr = keywordQuery.getQuery();
|
|
||||||
final KeywordList list = keywordToList.get(queryStr);
|
|
||||||
final String listName = list.getName();
|
|
||||||
|
|
||||||
//new subProgress will be active after the initial query
|
|
||||||
//when we know number of hits to start() with
|
|
||||||
if (keywordsSearched > 0) {
|
|
||||||
subProgresses[keywordsSearched - 1].finish();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
KeywordSearchQuery del = null;
|
|
||||||
|
|
||||||
boolean isRegex = !keywordQuery.isLiteral();
|
|
||||||
if (isRegex) {
|
|
||||||
del = new TermComponentQuery(keywordQuery);
|
|
||||||
} else {
|
|
||||||
del = new LuceneQuery(keywordQuery);
|
|
||||||
del.escape();
|
|
||||||
}
|
|
||||||
|
|
||||||
//limit search to currently ingested data sources
|
|
||||||
//set up a filter with 1 or more image ids OR'ed
|
|
||||||
final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, curDataSourceIds);
|
|
||||||
del.addFilter(dataSourceFilter);
|
|
||||||
|
|
||||||
Map<String, List<ContentHit>> queryResult;
|
|
||||||
|
|
||||||
try {
|
|
||||||
queryResult = del.performQuery();
|
|
||||||
} catch (NoOpenCoreException ex) {
|
|
||||||
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), ex);
|
|
||||||
//no reason to continue with next query if recovery failed
|
|
||||||
//or wait for recovery to kick in and run again later
|
|
||||||
//likely case has closed and threads are being interrupted
|
|
||||||
return null;
|
|
||||||
} catch (CancellationException e) {
|
|
||||||
logger.log(Level.INFO, "Cancel detected, bailing during keyword query: {0}", keywordQuery.getQuery());
|
|
||||||
return null;
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// calculate new results but substracting results already obtained in this ingest
|
|
||||||
// this creates a map of each keyword to the list of unique files that have that hit.
|
|
||||||
Map<Keyword, List<ContentHit>> newResults = filterResults(queryResult, isRegex);
|
|
||||||
|
|
||||||
if (!newResults.isEmpty()) {
|
|
||||||
|
|
||||||
//write results to BB
|
|
||||||
|
|
||||||
//new artifacts created, to report to listeners
|
|
||||||
Collection<BlackboardArtifact> newArtifacts = new ArrayList<>();
|
|
||||||
|
|
||||||
//scale progress bar more more granular, per result sub-progress, within per keyword
|
|
||||||
int totalUnits = newResults.size();
|
|
||||||
subProgresses[keywordsSearched].start(totalUnits);
|
|
||||||
int unitProgress = 0;
|
|
||||||
String queryDisplayStr = keywordQuery.getQuery();
|
|
||||||
if (queryDisplayStr.length() > 50) {
|
|
||||||
queryDisplayStr = queryDisplayStr.substring(0, 49) + "...";
|
|
||||||
}
|
|
||||||
subProgresses[keywordsSearched].progress(listName + ": " + queryDisplayStr, unitProgress);
|
|
||||||
|
|
||||||
|
|
||||||
/* cycle through the keywords returned -- only one unless it was a regexp */
|
|
||||||
for (final Keyword hitTerm : newResults.keySet()) {
|
|
||||||
//checking for cancellation between results
|
|
||||||
if (this.isCancelled()) {
|
|
||||||
logger.log(Level.INFO, "Cancel detected, bailing before new hit processed for query: {0}", keywordQuery.getQuery());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update progress display
|
|
||||||
String hitDisplayStr = hitTerm.getQuery();
|
|
||||||
if (hitDisplayStr.length() > 50) {
|
|
||||||
hitDisplayStr = hitDisplayStr.substring(0, 49) + "...";
|
|
||||||
}
|
|
||||||
subProgresses[keywordsSearched].progress(listName + ": " + hitDisplayStr, unitProgress);
|
|
||||||
//subProgresses[keywordsSearched].progress(unitProgress);
|
|
||||||
|
|
||||||
// this returns the unique files in the set with the first chunk that has a hit
|
|
||||||
Map<AbstractFile, Integer> contentHitsFlattened = ContentHit.flattenResults(newResults.get(hitTerm));
|
|
||||||
for (final AbstractFile hitFile : contentHitsFlattened.keySet()) {
|
|
||||||
|
|
||||||
// get the snippet for the first hit in the file
|
|
||||||
String snippet;
|
|
||||||
final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(hitTerm.getQuery());
|
|
||||||
int chunkId = contentHitsFlattened.get(hitFile);
|
|
||||||
try {
|
|
||||||
snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, isRegex, true);
|
|
||||||
} catch (NoOpenCoreException e) {
|
|
||||||
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
|
|
||||||
//no reason to continue
|
|
||||||
return null;
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// write the blackboard artifact for this keyword in this file
|
|
||||||
KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, snippet, listName);
|
|
||||||
if (written == null) {
|
|
||||||
logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()});
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
newArtifacts.add(written.getArtifact());
|
|
||||||
|
|
||||||
//generate an ingest inbox message for this keyword in this file
|
|
||||||
if (list.getIngestMessages()) {
|
|
||||||
StringBuilder subjectSb = new StringBuilder();
|
|
||||||
StringBuilder detailsSb = new StringBuilder();
|
|
||||||
//final int hitFiles = newResults.size();
|
|
||||||
|
|
||||||
if (!keywordQuery.isLiteral()) {
|
|
||||||
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl"));
|
|
||||||
} else {
|
|
||||||
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl"));
|
|
||||||
}
|
|
||||||
//subjectSb.append("<");
|
|
||||||
String uniqueKey = null;
|
|
||||||
BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
|
|
||||||
if (attr != null) {
|
|
||||||
final String keyword = attr.getValueString();
|
|
||||||
subjectSb.append(keyword);
|
|
||||||
uniqueKey = keyword.toLowerCase();
|
|
||||||
}
|
|
||||||
|
|
||||||
//subjectSb.append(">");
|
|
||||||
//String uniqueKey = queryStr;
|
|
||||||
|
|
||||||
//details
|
|
||||||
detailsSb.append("<table border='0' cellpadding='4' width='280'>");
|
|
||||||
//hit
|
|
||||||
detailsSb.append("<tr>");
|
|
||||||
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLThLbl"));
|
|
||||||
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
|
|
||||||
detailsSb.append("</tr>");
|
|
||||||
|
|
||||||
//preview
|
|
||||||
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
|
|
||||||
if (attr != null) {
|
|
||||||
detailsSb.append("<tr>");
|
|
||||||
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.previewThLbl"));
|
|
||||||
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
|
|
||||||
detailsSb.append("</tr>");
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//file
|
|
||||||
detailsSb.append("<tr>");
|
|
||||||
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.fileThLbl"));
|
|
||||||
detailsSb.append("<td>").append(hitFile.getParentPath()).append(hitFile.getName()).append("</td>");
|
|
||||||
|
|
||||||
detailsSb.append("</tr>");
|
|
||||||
|
|
||||||
|
|
||||||
//list
|
|
||||||
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID());
|
|
||||||
detailsSb.append("<tr>");
|
|
||||||
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.listThLbl"));
|
|
||||||
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
|
|
||||||
detailsSb.append("</tr>");
|
|
||||||
|
|
||||||
//regex
|
|
||||||
if (!keywordQuery.isLiteral()) {
|
|
||||||
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
|
|
||||||
if (attr != null) {
|
|
||||||
detailsSb.append("<tr>");
|
|
||||||
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExThLbl"));
|
|
||||||
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
|
|
||||||
detailsSb.append("</tr>");
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
detailsSb.append("</table>");
|
|
||||||
|
|
||||||
services.postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact()));
|
|
||||||
}
|
|
||||||
} //for each file hit
|
|
||||||
|
|
||||||
++unitProgress;
|
|
||||||
|
|
||||||
}//for each hit term
|
|
||||||
|
|
||||||
//update artifact browser
|
|
||||||
if (!newArtifacts.isEmpty()) {
|
|
||||||
services.fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
|
|
||||||
}
|
|
||||||
} //if has results
|
|
||||||
|
|
||||||
//reset the status text before it goes away
|
|
||||||
subProgresses[keywordsSearched].progress("");
|
|
||||||
|
|
||||||
++keywordsSearched;
|
|
||||||
|
|
||||||
} //for each keyword
|
|
||||||
|
|
||||||
} //end try block
|
|
||||||
catch (Exception ex) {
|
|
||||||
logger.log(Level.WARNING, "searcher exception occurred", ex);
|
|
||||||
} finally {
|
|
||||||
try {
|
|
||||||
finalizeSearcher();
|
|
||||||
stopWatch.stop();
|
|
||||||
logger.log(Level.INFO, "Searcher took to run: {0} secs.", stopWatch.getElapsedTimeSecs());
|
|
||||||
} finally {
|
|
||||||
searcherLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void done() {
|
|
||||||
// call get to see if there were any errors
|
|
||||||
try {
|
|
||||||
get();
|
|
||||||
} catch (InterruptedException | ExecutionException e) {
|
|
||||||
logger.log(Level.SEVERE, "Error performing keyword search: " + e.getMessage());
|
|
||||||
services.postMessage(IngestMessage.createErrorMessage(KeywordSearchModuleFactory.getModuleName(), "Error performing keyword search", e.getMessage()));
|
|
||||||
} // catch and ignore if we were cancelled
|
|
||||||
catch (java.util.concurrent.CancellationException ex) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sync-up the updated keywords from the currently used lists in the XML
|
|
||||||
*/
|
|
||||||
private void updateKeywords() {
|
|
||||||
KeywordSearchListsXML loader = KeywordSearchListsXML.getCurrent();
|
|
||||||
|
|
||||||
this.keywords.clear();
|
|
||||||
this.keywordToList.clear();
|
|
||||||
|
|
||||||
for (String name : this.keywordLists) {
|
|
||||||
KeywordList list = loader.getList(name);
|
|
||||||
for (Keyword k : list.getKeywords()) {
|
|
||||||
this.keywords.add(k);
|
|
||||||
this.keywordToList.put(k.getQuery(), list);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
//perform all essential cleanup that needs to be done right AFTER doInBackground() returns
|
|
||||||
//without relying on done() method that is not guaranteed to run after background thread completes
|
|
||||||
//NEED to call this method always right before doInBackground() returns
|
|
||||||
/**
|
|
||||||
* Performs the cleanup that needs to be done right AFTER
|
|
||||||
* doInBackground() returns without relying on done() method that is not
|
|
||||||
* guaranteed to run after background thread completes REQUIRED to call
|
|
||||||
* this method always right before doInBackground() returns
|
|
||||||
*/
|
|
||||||
private void finalizeSearcher() {
|
|
||||||
logger.log(Level.INFO, "Searcher finalizing");
|
|
||||||
SwingUtilities.invokeLater(new Runnable() {
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
progressGroup.finish();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
searcherDone = true; //next currentSearcher can start
|
|
||||||
|
|
||||||
if (finalRun) {
|
|
||||||
//this is the final searcher
|
|
||||||
logger.log(Level.INFO, "The final searcher in this ingest done.");
|
|
||||||
|
|
||||||
//run module cleanup
|
|
||||||
cleanup();
|
|
||||||
} else {
|
|
||||||
//start counting time for a new searcher to start
|
|
||||||
//unless final searcher is pending
|
|
||||||
if (finalSearcher == null) {
|
|
||||||
//we need a new Timer object, because restarting previus will not cause firing of the action
|
|
||||||
final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000;
|
|
||||||
searchTimer = new Timer(updateIntervalMs, new SearchTimerAction());
|
|
||||||
searchTimer.start();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//calculate new results but substracting results already obtained in this ingest
|
|
||||||
//update currentResults map with the new results
|
|
||||||
private Map<Keyword, List<ContentHit>> filterResults(Map<String, List<ContentHit>> queryResult, boolean isRegex) {
|
|
||||||
Map<Keyword, List<ContentHit>> newResults = new HashMap<>();
|
|
||||||
|
|
||||||
for (String termResult : queryResult.keySet()) {
|
|
||||||
List<ContentHit> queryTermResults = queryResult.get(termResult);
|
|
||||||
|
|
||||||
//translate to list of IDs that we keep track of
|
|
||||||
List<Long> queryTermResultsIDs = new ArrayList<>();
|
|
||||||
for (ContentHit ch : queryTermResults) {
|
|
||||||
queryTermResultsIDs.add(ch.getId());
|
|
||||||
}
|
|
||||||
|
|
||||||
Keyword termResultK = new Keyword(termResult, !isRegex);
|
|
||||||
List<Long> curTermResults = currentResults.get(termResultK);
|
|
||||||
if (curTermResults == null) {
|
|
||||||
currentResults.put(termResultK, queryTermResultsIDs);
|
|
||||||
newResults.put(termResultK, queryTermResults);
|
|
||||||
} else {
|
|
||||||
//some AbstractFile hits already exist for this keyword
|
|
||||||
for (ContentHit res : queryTermResults) {
|
|
||||||
if (!curTermResults.contains(res.getId())) {
|
|
||||||
//add to new results
|
|
||||||
List<ContentHit> newResultsFs = newResults.get(termResultK);
|
|
||||||
if (newResultsFs == null) {
|
|
||||||
newResultsFs = new ArrayList<>();
|
|
||||||
newResults.put(termResultK, newResultsFs);
|
|
||||||
}
|
|
||||||
newResultsFs.add(res);
|
|
||||||
curTermResults.add(res.getId());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return newResults;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -140,6 +140,7 @@ class KeywordSearchListsViewerPanel extends AbstractKeywordSearchPerformer {
|
|||||||
@Override
|
@Override
|
||||||
public void actionPerformed(ActionEvent e) {
|
public void actionPerformed(ActionEvent e) {
|
||||||
if (ingestRunning) {
|
if (ingestRunning) {
|
||||||
|
SearchRunner.getInstance().addKeywordListsToAllJobs(listsTableModel.getSelectedLists());
|
||||||
logger.log(Level.INFO, "Submitted enqueued lists to ingest");
|
logger.log(Level.INFO, "Submitted enqueued lists to ingest");
|
||||||
} else {
|
} else {
|
||||||
searchAction(e);
|
searchAction(e);
|
||||||
|
@ -66,7 +66,7 @@ final class KeywordSearchListsXML extends KeywordSearchListsAbstract {
|
|||||||
* RJCTODO: Move this one to the manager
|
* RJCTODO: Move this one to the manager
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
static KeywordSearchListsXML getCurrent() {
|
static synchronized KeywordSearchListsXML getCurrent() {
|
||||||
if (currentInstance == null) {
|
if (currentInstance == null) {
|
||||||
currentInstance = new KeywordSearchListsXML(CUR_LISTS_FILE);
|
currentInstance = new KeywordSearchListsXML(CUR_LISTS_FILE);
|
||||||
currentInstance.reload();
|
currentInstance.reload();
|
||||||
|
@ -0,0 +1,713 @@
|
|||||||
|
/*
|
||||||
|
* Autopsy Forensic Browser
|
||||||
|
*
|
||||||
|
* Copyright 2011 - 2014 Basis Technology Corp.
|
||||||
|
* Contact: carrier <at> sleuthkit <dot> org
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map.Entry;
|
||||||
|
import java.util.concurrent.CancellationException;
|
||||||
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
import java.util.logging.Level;
|
||||||
|
import javax.swing.SwingUtilities;
|
||||||
|
import javax.swing.SwingWorker;
|
||||||
|
import java.util.Timer;
|
||||||
|
import java.util.TimerTask;
|
||||||
|
import org.netbeans.api.progress.aggregate.AggregateProgressFactory;
|
||||||
|
import org.netbeans.api.progress.aggregate.AggregateProgressHandle;
|
||||||
|
import org.netbeans.api.progress.aggregate.ProgressContributor;
|
||||||
|
import org.openide.util.Cancellable;
|
||||||
|
import org.openide.util.NbBundle;
|
||||||
|
import org.sleuthkit.autopsy.coreutils.EscapeUtil;
|
||||||
|
import org.sleuthkit.autopsy.coreutils.Logger;
|
||||||
|
import org.sleuthkit.autopsy.coreutils.StopWatch;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestMessage;
|
||||||
|
import org.sleuthkit.autopsy.ingest.IngestServices;
|
||||||
|
import org.sleuthkit.autopsy.ingest.ModuleDataEvent;
|
||||||
|
import org.sleuthkit.datamodel.AbstractFile;
|
||||||
|
import org.sleuthkit.datamodel.BlackboardArtifact;
|
||||||
|
import org.sleuthkit.datamodel.BlackboardAttribute;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Singleton keyword search manager:
|
||||||
|
* Launches search threads for each job and performs commits, both on timed
|
||||||
|
* intervals.
|
||||||
|
*/
|
||||||
|
public final class SearchRunner {
|
||||||
|
private static final Logger logger = Logger.getLogger(SearchRunner.class.getName());
|
||||||
|
private static SearchRunner instance = null;
|
||||||
|
private IngestServices services = IngestServices.getInstance();
|
||||||
|
private Ingester ingester = null; //guarded by "ingester"
|
||||||
|
private volatile boolean updateTimerRunning = false;
|
||||||
|
private Timer updateTimer;
|
||||||
|
private Map<Long, SearchJobInfo> jobs = new HashMap<>(); //guarded by "this"
|
||||||
|
|
||||||
|
SearchRunner() {
|
||||||
|
ingester = Server.getIngester();
|
||||||
|
updateTimer = new Timer("SearchRunner update timer", true); // run as a daemon
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return the singleton object
|
||||||
|
*/
|
||||||
|
public static synchronized SearchRunner getInstance() {
|
||||||
|
if (instance == null) {
|
||||||
|
instance = new SearchRunner();
|
||||||
|
}
|
||||||
|
return instance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param jobId
|
||||||
|
* @param dataSourceId
|
||||||
|
* @param keywordListNames
|
||||||
|
*/
|
||||||
|
public synchronized void startJob(long jobId, long dataSourceId, List<String> keywordListNames) {
|
||||||
|
if (!jobs.containsKey(jobId)) {
|
||||||
|
logger.log(Level.INFO, "Adding job {0}", jobId);
|
||||||
|
SearchJobInfo jobData = new SearchJobInfo(jobId, dataSourceId, keywordListNames);
|
||||||
|
jobs.put(jobId, jobData);
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs.get(jobId).incrementModuleReferenceCount();
|
||||||
|
|
||||||
|
if (jobs.size() > 0) {
|
||||||
|
final int updateIntervalMs = KeywordSearchSettings.getUpdateFrequency().getTime() * 60 * 1000;
|
||||||
|
if (!updateTimerRunning) {
|
||||||
|
updateTimer.scheduleAtFixedRate(new UpdateTimerTask(), updateIntervalMs, updateIntervalMs);
|
||||||
|
updateTimerRunning = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform normal finishing of searching for this job, including one last
|
||||||
|
* commit and search. Blocks until the final search is complete.
|
||||||
|
* @param jobId
|
||||||
|
*/
|
||||||
|
public void endJob(long jobId) {
|
||||||
|
SearchJobInfo job;
|
||||||
|
boolean readyForFinalSearch = false;
|
||||||
|
synchronized(this) {
|
||||||
|
job = jobs.get(jobId);
|
||||||
|
if (job == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only do final search if this is the last module in this job to call endJob()
|
||||||
|
if(job.decrementModuleReferenceCount() == 0) {
|
||||||
|
jobs.remove(jobId);
|
||||||
|
readyForFinalSearch = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (readyForFinalSearch) {
|
||||||
|
commit();
|
||||||
|
doFinalSearch(job); //this will block until it's done
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Immediate stop and removal of job from SearchRunner. Cancels the
|
||||||
|
* associated search worker if it's still running.
|
||||||
|
* @param jobId
|
||||||
|
*/
|
||||||
|
public void stopJob(long jobId) {
|
||||||
|
logger.log(Level.INFO, "Stopping job {0}", jobId);
|
||||||
|
commit();
|
||||||
|
|
||||||
|
SearchJobInfo job;
|
||||||
|
synchronized(this) {
|
||||||
|
job = jobs.get(jobId);
|
||||||
|
if (job == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
//stop currentSearcher
|
||||||
|
SearchRunner.Searcher currentSearcher = job.getCurrentSearcher();
|
||||||
|
if ((currentSearcher != null) && (!currentSearcher.isDone())) {
|
||||||
|
currentSearcher.cancel(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
jobs.remove(jobId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add these lists to all of the jobs
|
||||||
|
* @param keywordListName
|
||||||
|
*/
|
||||||
|
public synchronized void addKeywordListsToAllJobs(List<String> keywordListNames) {
|
||||||
|
for(String listName : keywordListNames) {
|
||||||
|
logger.log(Level.INFO, "Adding keyword list {0} to all jobs", listName);
|
||||||
|
for(SearchJobInfo j : jobs.values()) {
|
||||||
|
j.addKeywordListName(listName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Commits index and notifies listeners of index update
|
||||||
|
*/
|
||||||
|
private void commit() {
|
||||||
|
synchronized(ingester) {
|
||||||
|
ingester.commit();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Signal a potential change in number of text_ingested files
|
||||||
|
try {
|
||||||
|
final int numIndexedFiles = KeywordSearch.getServer().queryNumIndexedFiles();
|
||||||
|
KeywordSearch.fireNumIndexedFilesChange(null, new Integer(numIndexedFiles));
|
||||||
|
} catch (NoOpenCoreException | KeywordSearchModuleException ex) {
|
||||||
|
logger.log(Level.WARNING, "Error executing Solr query to check number of indexed files: ", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A final search waits for any still-running workers, and then executes a
|
||||||
|
* new one and waits until that is done.
|
||||||
|
* @param job
|
||||||
|
*/
|
||||||
|
private void doFinalSearch(SearchJobInfo job) {
|
||||||
|
// Run one last search as there are probably some new files committed
|
||||||
|
logger.log(Level.INFO, "Running final search for jobid {0}", job.getJobId());
|
||||||
|
if (!job.getKeywordListNames().isEmpty()) {
|
||||||
|
try {
|
||||||
|
// In case this job still has a worker running, wait for it to finish
|
||||||
|
job.waitForCurrentWorker();
|
||||||
|
|
||||||
|
SearchRunner.Searcher finalSearcher = new SearchRunner.Searcher(job, true);
|
||||||
|
job.setCurrentSearcher(finalSearcher); //save the ref
|
||||||
|
finalSearcher.execute(); //start thread
|
||||||
|
|
||||||
|
// block until the search is complete
|
||||||
|
finalSearcher.get();
|
||||||
|
|
||||||
|
} catch (InterruptedException | ExecutionException ex) {
|
||||||
|
logger.log(Level.WARNING, "Job {1} final search thread failed: {2}", new Object[]{job.getJobId(), ex});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Timer triggered re-search for each job (does a single index commit first)
|
||||||
|
*/
|
||||||
|
private class UpdateTimerTask extends TimerTask {
|
||||||
|
private final Logger logger = Logger.getLogger(SearchRunner.UpdateTimerTask.class.getName());
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
// If no jobs then cancel the task. If more job(s) come along, a new task will start up.
|
||||||
|
if (jobs.isEmpty()) {
|
||||||
|
this.cancel(); //terminate this timer task
|
||||||
|
updateTimerRunning = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
commit();
|
||||||
|
|
||||||
|
synchronized(SearchRunner.this) {
|
||||||
|
// Spawn a search thread for each job
|
||||||
|
for(Entry<Long, SearchJobInfo> j : jobs.entrySet()) {
|
||||||
|
SearchJobInfo job = j.getValue();
|
||||||
|
// If no lists or the worker is already running then skip it
|
||||||
|
if (!job.getKeywordListNames().isEmpty() && !job.isWorkerRunning()) {
|
||||||
|
Searcher searcher = new Searcher(job);
|
||||||
|
job.setCurrentSearcher(searcher); //save the ref
|
||||||
|
searcher.execute(); //start thread
|
||||||
|
job.setWorkerRunning(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data structure to keep track of keyword lists, current results, and search
|
||||||
|
* running status for each jobid
|
||||||
|
*/
|
||||||
|
private class SearchJobInfo {
|
||||||
|
private final long jobId;
|
||||||
|
private final long dataSourceId;
|
||||||
|
// mutable state:
|
||||||
|
private volatile boolean workerRunning;
|
||||||
|
private List<String> keywordListNames; //guarded by SearchJobInfo.this
|
||||||
|
private Map<Keyword, List<Long>> currentResults; //guarded by SearchJobInfo.this
|
||||||
|
private SearchRunner.Searcher currentSearcher;
|
||||||
|
private AtomicLong moduleReferenceCount = new AtomicLong(0);
|
||||||
|
private final Object finalSearchLock = new Object(); //used for a condition wait
|
||||||
|
|
||||||
|
public SearchJobInfo(long jobId, long dataSourceId, List<String> keywordListNames) {
|
||||||
|
this.jobId = jobId;
|
||||||
|
this.dataSourceId = dataSourceId;
|
||||||
|
this.keywordListNames = new ArrayList<>(keywordListNames);
|
||||||
|
currentResults = new HashMap<>();
|
||||||
|
workerRunning = false;
|
||||||
|
currentSearcher = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getJobId() {
|
||||||
|
return jobId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getDataSourceId() {
|
||||||
|
return dataSourceId;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized List<String> getKeywordListNames() {
|
||||||
|
return new ArrayList<>(keywordListNames);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void addKeywordListName(String keywordListName) {
|
||||||
|
if (!keywordListNames.contains(keywordListName)) {
|
||||||
|
keywordListNames.add(keywordListName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized List<Long> currentKeywordResults(Keyword k) {
|
||||||
|
return currentResults.get(k);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void addKeywordResults(Keyword k, List<Long> resultsIDs) {
|
||||||
|
currentResults.put(k, resultsIDs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isWorkerRunning() {
|
||||||
|
return workerRunning;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setWorkerRunning(boolean flag) {
|
||||||
|
workerRunning = flag;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized SearchRunner.Searcher getCurrentSearcher() {
|
||||||
|
return currentSearcher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void setCurrentSearcher(SearchRunner.Searcher searchRunner) {
|
||||||
|
currentSearcher = searchRunner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementModuleReferenceCount() {
|
||||||
|
moduleReferenceCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
public long decrementModuleReferenceCount() {
|
||||||
|
return moduleReferenceCount.decrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** In case this job still has a worker running, wait for it to finish
|
||||||
|
*
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public void waitForCurrentWorker() throws InterruptedException {
|
||||||
|
synchronized(finalSearchLock) {
|
||||||
|
while(workerRunning) {
|
||||||
|
finalSearchLock.wait(); //wait() releases the lock
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unset workerRunning and wake up thread(s) waiting on finalSearchLock
|
||||||
|
*/
|
||||||
|
public void searchNotify() {
|
||||||
|
synchronized(finalSearchLock) {
|
||||||
|
workerRunning = false;
|
||||||
|
finalSearchLock.notify();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Searcher responsible for searching the current index and writing results
|
||||||
|
* to blackboard and the inbox. Also, posts results to listeners as Ingest
|
||||||
|
* data events. Searches entire index, and keeps track of only new results
|
||||||
|
* to report and save. Runs as a background thread.
|
||||||
|
*/
|
||||||
|
private final class Searcher extends SwingWorker<Object, Void> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Searcher has private copies/snapshots of the lists and keywords
|
||||||
|
*/
|
||||||
|
private SearchJobInfo job;
|
||||||
|
private List<Keyword> keywords; //keywords to search
|
||||||
|
private List<String> keywordListNames; // lists currently being searched
|
||||||
|
private Map<String, KeywordList> keywordToList; //keyword to list name mapping
|
||||||
|
private AggregateProgressHandle progressGroup;
|
||||||
|
private final Logger logger = Logger.getLogger(SearchRunner.Searcher.class.getName());
|
||||||
|
private boolean finalRun = false;
|
||||||
|
|
||||||
|
Searcher(SearchJobInfo job) {
|
||||||
|
this.job = job;
|
||||||
|
keywordListNames = job.getKeywordListNames();
|
||||||
|
keywords = new ArrayList<>();
|
||||||
|
keywordToList = new HashMap<>();
|
||||||
|
//keywords are populated as searcher runs
|
||||||
|
}
|
||||||
|
|
||||||
|
Searcher(SearchJobInfo job, boolean finalRun) {
|
||||||
|
this(job);
|
||||||
|
this.finalRun = finalRun;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Object doInBackground() throws Exception {
|
||||||
|
final String displayName = NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.displayName")
|
||||||
|
+ (finalRun ? (" - " + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.finalizeMsg")) : "");
|
||||||
|
final String pgDisplayName = displayName + (" (" + NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.doInBackGround.pendingMsg") + ")");
|
||||||
|
progressGroup = AggregateProgressFactory.createSystemHandle(pgDisplayName, null, new Cancellable() {
|
||||||
|
@Override
|
||||||
|
public boolean cancel() {
|
||||||
|
logger.log(Level.INFO, "Cancelling the searcher by user.");
|
||||||
|
if (progressGroup != null) {
|
||||||
|
progressGroup.setDisplayName(displayName + " (" + NbBundle.getMessage(this.getClass(), "SearchRunner.doInBackGround.cancelMsg") + "...)");
|
||||||
|
}
|
||||||
|
return SearchRunner.Searcher.this.cancel(true);
|
||||||
|
}
|
||||||
|
}, null);
|
||||||
|
|
||||||
|
updateKeywords();
|
||||||
|
|
||||||
|
ProgressContributor[] subProgresses = new ProgressContributor[keywords.size()];
|
||||||
|
int i = 0;
|
||||||
|
for (Keyword keywordQuery : keywords) {
|
||||||
|
subProgresses[i] = AggregateProgressFactory.createProgressContributor(keywordQuery.getQuery());
|
||||||
|
progressGroup.addContributor(subProgresses[i]);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
progressGroup.start();
|
||||||
|
|
||||||
|
final StopWatch stopWatch = new StopWatch();
|
||||||
|
stopWatch.start();
|
||||||
|
try {
|
||||||
|
progressGroup.setDisplayName(displayName);
|
||||||
|
|
||||||
|
int keywordsSearched = 0;
|
||||||
|
|
||||||
|
for (Keyword keywordQuery : keywords) {
|
||||||
|
if (this.isCancelled()) {
|
||||||
|
logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keywordQuery.getQuery());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final String queryStr = keywordQuery.getQuery();
|
||||||
|
final KeywordList list = keywordToList.get(queryStr);
|
||||||
|
final String listName = list.getName();
|
||||||
|
|
||||||
|
//new subProgress will be active after the initial query
|
||||||
|
//when we know number of hits to start() with
|
||||||
|
if (keywordsSearched > 0) {
|
||||||
|
subProgresses[keywordsSearched - 1].finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
KeywordSearchQuery del = null;
|
||||||
|
|
||||||
|
boolean isRegex = !keywordQuery.isLiteral();
|
||||||
|
if (isRegex) {
|
||||||
|
del = new TermComponentQuery(keywordQuery);
|
||||||
|
} else {
|
||||||
|
del = new LuceneQuery(keywordQuery);
|
||||||
|
del.escape();
|
||||||
|
}
|
||||||
|
|
||||||
|
//limit search to currently ingested data sources
|
||||||
|
//set up a filter with 1 or more image ids OR'ed
|
||||||
|
final KeywordQueryFilter dataSourceFilter = new KeywordQueryFilter(KeywordQueryFilter.FilterType.DATA_SOURCE, job.getDataSourceId());
|
||||||
|
del.addFilter(dataSourceFilter);
|
||||||
|
|
||||||
|
Map<String, List<ContentHit>> queryResult;
|
||||||
|
|
||||||
|
try {
|
||||||
|
queryResult = del.performQuery();
|
||||||
|
} catch (NoOpenCoreException ex) {
|
||||||
|
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), ex);
|
||||||
|
//no reason to continue with next query if recovery failed
|
||||||
|
//or wait for recovery to kick in and run again later
|
||||||
|
//likely case has closed and threads are being interrupted
|
||||||
|
return null;
|
||||||
|
} catch (CancellationException e) {
|
||||||
|
logger.log(Level.INFO, "Cancel detected, bailing during keyword query: {0}", keywordQuery.getQuery());
|
||||||
|
return null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.log(Level.WARNING, "Error performing query: " + keywordQuery.getQuery(), e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate new results by substracting results already obtained in this ingest
|
||||||
|
// this creates a map of each keyword to the list of unique files that have that hit.
|
||||||
|
Map<Keyword, List<ContentHit>> newResults = filterResults(queryResult, isRegex);
|
||||||
|
|
||||||
|
if (!newResults.isEmpty()) {
|
||||||
|
|
||||||
|
//write results to BB
|
||||||
|
|
||||||
|
//new artifacts created, to report to listeners
|
||||||
|
Collection<BlackboardArtifact> newArtifacts = new ArrayList<>();
|
||||||
|
|
||||||
|
//scale progress bar more more granular, per result sub-progress, within per keyword
|
||||||
|
int totalUnits = newResults.size();
|
||||||
|
subProgresses[keywordsSearched].start(totalUnits);
|
||||||
|
int unitProgress = 0;
|
||||||
|
String queryDisplayStr = keywordQuery.getQuery();
|
||||||
|
if (queryDisplayStr.length() > 50) {
|
||||||
|
queryDisplayStr = queryDisplayStr.substring(0, 49) + "...";
|
||||||
|
}
|
||||||
|
subProgresses[keywordsSearched].progress(listName + ": " + queryDisplayStr, unitProgress);
|
||||||
|
|
||||||
|
// cycle through the keywords returned -- only one unless it was a regexp
|
||||||
|
for (final Keyword hitTerm : newResults.keySet()) {
|
||||||
|
//checking for cancellation between results
|
||||||
|
if (this.isCancelled()) {
|
||||||
|
logger.log(Level.INFO, "Cancel detected, bailing before new hit processed for query: {0}", keywordQuery.getQuery());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// update progress display
|
||||||
|
String hitDisplayStr = hitTerm.getQuery();
|
||||||
|
if (hitDisplayStr.length() > 50) {
|
||||||
|
hitDisplayStr = hitDisplayStr.substring(0, 49) + "...";
|
||||||
|
}
|
||||||
|
subProgresses[keywordsSearched].progress(listName + ": " + hitDisplayStr, unitProgress);
|
||||||
|
|
||||||
|
// this returns the unique files in the set with the first chunk that has a hit
|
||||||
|
Map<AbstractFile, Integer> contentHitsFlattened = ContentHit.flattenResults(newResults.get(hitTerm));
|
||||||
|
for (final AbstractFile hitFile : contentHitsFlattened.keySet()) {
|
||||||
|
|
||||||
|
// get the snippet for the first hit in the file
|
||||||
|
String snippet;
|
||||||
|
final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(hitTerm.getQuery());
|
||||||
|
int chunkId = contentHitsFlattened.get(hitFile);
|
||||||
|
try {
|
||||||
|
snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, isRegex, true);
|
||||||
|
} catch (NoOpenCoreException e) {
|
||||||
|
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
|
||||||
|
//no reason to continue
|
||||||
|
return null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// write the blackboard artifact for this keyword in this file
|
||||||
|
KeywordWriteResult written = del.writeToBlackBoard(hitTerm.getQuery(), hitFile, snippet, listName);
|
||||||
|
if (written == null) {
|
||||||
|
logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()});
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
newArtifacts.add(written.getArtifact());
|
||||||
|
|
||||||
|
//generate an ingest inbox message for this keyword in this file
|
||||||
|
if (list.getIngestMessages()) {
|
||||||
|
StringBuilder subjectSb = new StringBuilder();
|
||||||
|
StringBuilder detailsSb = new StringBuilder();
|
||||||
|
|
||||||
|
if (!keywordQuery.isLiteral()) {
|
||||||
|
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExpHitLbl"));
|
||||||
|
} else {
|
||||||
|
subjectSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitLbl"));
|
||||||
|
}
|
||||||
|
String uniqueKey = null;
|
||||||
|
BlackboardAttribute attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getTypeID());
|
||||||
|
if (attr != null) {
|
||||||
|
final String keyword = attr.getValueString();
|
||||||
|
subjectSb.append(keyword);
|
||||||
|
uniqueKey = keyword.toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
//details
|
||||||
|
detailsSb.append("<table border='0' cellpadding='4' width='280'>");
|
||||||
|
//hit
|
||||||
|
detailsSb.append("<tr>");
|
||||||
|
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.kwHitThLbl"));
|
||||||
|
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
|
||||||
|
detailsSb.append("</tr>");
|
||||||
|
|
||||||
|
//preview
|
||||||
|
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getTypeID());
|
||||||
|
if (attr != null) {
|
||||||
|
detailsSb.append("<tr>");
|
||||||
|
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.previewThLbl"));
|
||||||
|
detailsSb.append("<td>").append(EscapeUtil.escapeHtml(attr.getValueString())).append("</td>");
|
||||||
|
detailsSb.append("</tr>");
|
||||||
|
}
|
||||||
|
|
||||||
|
//file
|
||||||
|
detailsSb.append("<tr>");
|
||||||
|
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.fileThLbl"));
|
||||||
|
detailsSb.append("<td>").append(hitFile.getParentPath()).append(hitFile.getName()).append("</td>");
|
||||||
|
detailsSb.append("</tr>");
|
||||||
|
|
||||||
|
//list
|
||||||
|
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_SET_NAME.getTypeID());
|
||||||
|
detailsSb.append("<tr>");
|
||||||
|
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.listThLbl"));
|
||||||
|
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
|
||||||
|
detailsSb.append("</tr>");
|
||||||
|
|
||||||
|
//regex
|
||||||
|
if (!keywordQuery.isLiteral()) {
|
||||||
|
attr = written.getAttribute(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getTypeID());
|
||||||
|
if (attr != null) {
|
||||||
|
detailsSb.append("<tr>");
|
||||||
|
detailsSb.append(NbBundle.getMessage(this.getClass(), "KeywordSearchIngestModule.regExThLbl"));
|
||||||
|
detailsSb.append("<td>").append(attr.getValueString()).append("</td>");
|
||||||
|
detailsSb.append("</tr>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
detailsSb.append("</table>");
|
||||||
|
|
||||||
|
services.postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact()));
|
||||||
|
}
|
||||||
|
} //for each file hit
|
||||||
|
|
||||||
|
++unitProgress;
|
||||||
|
|
||||||
|
}//for each hit term
|
||||||
|
|
||||||
|
//update artifact browser
|
||||||
|
if (!newArtifacts.isEmpty()) {
|
||||||
|
services.fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
|
||||||
|
}
|
||||||
|
} //if has results
|
||||||
|
|
||||||
|
//reset the status text before it goes away
|
||||||
|
subProgresses[keywordsSearched].progress("");
|
||||||
|
|
||||||
|
++keywordsSearched;
|
||||||
|
|
||||||
|
} //for each keyword
|
||||||
|
|
||||||
|
} //end try block
|
||||||
|
catch (Exception ex) {
|
||||||
|
logger.log(Level.WARNING, "searcher exception occurred", ex);
|
||||||
|
} finally {
|
||||||
|
try {
|
||||||
|
finalizeSearcher();
|
||||||
|
stopWatch.stop();
|
||||||
|
|
||||||
|
logger.log(Level.INFO, "Searcher took to run: {0} secs.", stopWatch.getElapsedTimeSecs());
|
||||||
|
} finally {
|
||||||
|
// In case a thread is waiting on this worker to be done
|
||||||
|
job.searchNotify();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void done() {
|
||||||
|
// call get to see if there were any errors
|
||||||
|
try {
|
||||||
|
get();
|
||||||
|
} catch (InterruptedException | ExecutionException e) {
|
||||||
|
logger.log(Level.SEVERE, "Error performing keyword search: " + e.getMessage());
|
||||||
|
services.postMessage(IngestMessage.createErrorMessage(KeywordSearchModuleFactory.getModuleName(), "Error performing keyword search", e.getMessage()));
|
||||||
|
} // catch and ignore if we were cancelled
|
||||||
|
catch (java.util.concurrent.CancellationException ex) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sync-up the updated keywords from the currently used lists in the XML
|
||||||
|
*/
|
||||||
|
private void updateKeywords() {
|
||||||
|
KeywordSearchListsXML loader = KeywordSearchListsXML.getCurrent();
|
||||||
|
|
||||||
|
keywords.clear();
|
||||||
|
keywordToList.clear();
|
||||||
|
|
||||||
|
|
||||||
|
for (String name : keywordListNames) {
|
||||||
|
KeywordList list = loader.getList(name);
|
||||||
|
for (Keyword k : list.getKeywords()) {
|
||||||
|
this.keywords.add(k);
|
||||||
|
this.keywordToList.put(k.getQuery(), list);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs the cleanup that needs to be done right AFTER
|
||||||
|
* doInBackground() returns without relying on done() method that is not
|
||||||
|
* guaranteed to run.
|
||||||
|
*/
|
||||||
|
private void finalizeSearcher() {
|
||||||
|
SwingUtilities.invokeLater(new Runnable() {
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
progressGroup.finish();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
//calculate new results but substracting results already obtained in this ingest
|
||||||
|
//update currentResults map with the new results
|
||||||
|
private Map<Keyword, List<ContentHit>> filterResults(Map<String, List<ContentHit>> queryResult, boolean isRegex) {
|
||||||
|
Map<Keyword, List<ContentHit>> newResults = new HashMap<>();
|
||||||
|
|
||||||
|
for (String termResult : queryResult.keySet()) {
|
||||||
|
List<ContentHit> queryTermResults = queryResult.get(termResult);
|
||||||
|
|
||||||
|
//translate to list of IDs that we keep track of
|
||||||
|
List<Long> queryTermResultsIDs = new ArrayList<>();
|
||||||
|
for (ContentHit ch : queryTermResults) {
|
||||||
|
queryTermResultsIDs.add(ch.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
Keyword termResultK = new Keyword(termResult, !isRegex);
|
||||||
|
List<Long> curTermResults = job.currentKeywordResults(termResultK);
|
||||||
|
if (curTermResults == null) {
|
||||||
|
job.addKeywordResults(termResultK, queryTermResultsIDs);
|
||||||
|
newResults.put(termResultK, queryTermResults);
|
||||||
|
} else {
|
||||||
|
//some AbstractFile hits already exist for this keyword
|
||||||
|
for (ContentHit res : queryTermResults) {
|
||||||
|
if (!curTermResults.contains(res.getId())) {
|
||||||
|
//add to new results
|
||||||
|
List<ContentHit> newResultsFs = newResults.get(termResultK);
|
||||||
|
if (newResultsFs == null) {
|
||||||
|
newResultsFs = new ArrayList<>();
|
||||||
|
newResults.put(termResultK, newResultsFs);
|
||||||
|
}
|
||||||
|
newResultsFs.add(res);
|
||||||
|
curTermResults.add(res.getId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return newResults;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user