TSK-277 Add more advanced regular expression searching

- initial TermsComponent regex support without full results navigation/preview
This commit is contained in:
adam-m 2011-12-22 14:41:34 -05:00
parent 0a413a98b6
commit c33608d61f
3 changed files with 178 additions and 0 deletions

View File

@ -6,6 +6,15 @@
<code-name-base>org.sleuthkit.autopsy.keywordsearch</code-name-base>
<suite-component/>
<module-dependencies>
<dependency>
<code-name-base>org.netbeans.api.progress</code-name-base>
<build-prerequisite/>
<compile-dependency/>
<run-dependency>
<release-version>1</release-version>
<specification-version>1.24.1</specification-version>
</run-dependency>
</dependency>
<dependency>
<code-name-base>org.openide.awt</code-name-base>
<build-prerequisite/>

View File

@ -18,12 +18,42 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import javax.swing.SwingWorker;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.client.solrj.response.TermsResponse.Term;
import org.netbeans.api.progress.ProgressHandle;
import org.netbeans.api.progress.ProgressHandleFactory;
import org.openide.nodes.AbstractNode;
import org.openide.nodes.ChildFactory;
import org.openide.nodes.Children;
import org.openide.nodes.Node;
import org.openide.windows.TopComponent;
import org.sleuthkit.autopsy.corecomponents.DataResultTopComponent;
import org.sleuthkit.autopsy.datamodel.KeyValueNode;
import org.sleuthkit.autopsy.datamodel.KeyValueThing;
public class RegexQuery implements KeywordSearchQuery {
private static final int TERMS_UNLIMITED = -1;
//corresponds to field in Solr schema, analyzed with white-space tokenizer only
private static final String TERMS_SEARCH_FIELD = "text_ws";
private static final String TERMS_HANDLER = "/terms";
private static final int TERMS_TIMEOUT = 90 * 1000; //in ms
private String query;
private static Logger logger = Logger.getLogger(RegexQuery.class.getName());
public RegexQuery(String query) {
this.query = query;
@ -44,5 +74,138 @@ public class RegexQuery implements KeywordSearchQuery {
@Override
public void execute() {
final SolrQuery q = new SolrQuery();
q.setQueryType(TERMS_HANDLER);
q.setTerms(true);
q.setTermsLimit(TERMS_UNLIMITED);
//q.setTermsLimit(200);
//q.setTermsRegexFlag(regexFlag);
//q.setTermsRaw(true);
q.setTermsRegex(query);
q.addTermsField(TERMS_SEARCH_FIELD);
q.setTimeAllowed(TERMS_TIMEOUT);
logger.log(Level.INFO, "Executing TermsComponent query: " + q.toString());
final SwingWorker worker = new RegexQueryWorker(q);
worker.execute();
}
/**
* map Terms to generic Nodes with key/value pairs properties
* @param terms
*/
private void publishNodes(List<Term> terms) {
Collection<KeyValueThing> things = new ArrayList<KeyValueThing>();
Iterator<Term> it = terms.iterator();
int termID = 1;
long totalMatches = 0;
while (it.hasNext()) {
Term term = it.next();
Map<String, Object> kvs = new LinkedHashMap<String, Object>();
//kvs.put("RegEx Match", term.getTerm());
long matches = term.getFrequency();
kvs.put("#files", matches);
things.add(new KeyValueThing(term.getTerm(), kvs, termID));
totalMatches += matches;
}
Node rootNode = null;
if (things.size() > 0) {
Children childThingNodes =
Children.create(new RegexResultChildFactory(things), true);
rootNode = new AbstractNode(childThingNodes);
} else {
rootNode = Node.EMPTY;
}
String pathText = "RegEx query: " + query + " Total file matches: " + Long.toString(totalMatches);
TopComponent searchResultWin = DataResultTopComponent.createInstance("Keyword search", pathText, rootNode, things.size());
searchResultWin.requestActive(); // make it the active top component
}
class RegexResultChildFactory extends ChildFactory<KeyValueThing> {
Collection<KeyValueThing> things;
RegexResultChildFactory(Collection<KeyValueThing> things) {
this.things = things;
}
@Override
protected boolean createKeys(List<KeyValueThing> toPopulate) {
return toPopulate.addAll(things);
}
@Override
protected Node createNodeForKey(KeyValueThing thing) {
return new KeyValueNode(thing, Children.LEAF);
}
}
class RegexQueryWorker extends SwingWorker<List<Term>, Void> {
private SolrQuery q;
private ProgressHandle progress;
RegexQueryWorker(SolrQuery q) {
this.q = q;
}
@Override
protected List<Term> doInBackground() throws Exception {
progress = ProgressHandleFactory.createHandle("RegEx query task");
progress.start();
progress.progress("Running RegEx query.");
Server.Core solrCore = KeywordSearch.getServer().getCore();
List<Term> terms = null;
try {
TermsResponse tr = solrCore.queryTerms(q);
terms = tr.getTerms(TERMS_SEARCH_FIELD);
} catch (SolrServerException ex) {
logger.log(Level.SEVERE, "Error executing the regex terms query: " + query, ex);
return null; //no need to create result view, just display error dialog
}
progress.progress("RegEx query completed.");
//debug query
StringBuilder sb = new StringBuilder();
for (Term t : terms) {
sb.append(t.getTerm() + " : " + t.getFrequency() + "\n");
}
logger.log(Level.INFO, "TermsComponent query result: " + sb.toString());
//end debug query
return terms;
}
@Override
protected void done() {
if (!this.isCancelled()) {
try {
List<Term> terms = get();
publishNodes(terms);
} catch (InterruptedException e) {
logger.log(Level.INFO, "Exception while executing regex query,", e);
} catch (ExecutionException e) {
logger.log(Level.INFO, "Exception while executing regex query,", e);
} finally {
progress.finish();
}
}
}
}
}

View File

@ -37,6 +37,7 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
import org.apache.solr.client.solrj.request.CoreAdminRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.openide.modules.InstalledFileLocator;
import org.openide.util.Exceptions;
import org.sleuthkit.autopsy.casemodule.Case;
@ -273,6 +274,11 @@ class Server {
return solrCore.query(sq);
}
TermsResponse queryTerms(SolrQuery sq) throws SolrServerException {
QueryResponse qres = solrCore.query(sq);
return qres.getTermsResponse();
}
void close() {
try {
CoreAdminRequest.unloadCore(this.name, solr);