added query manager and child factories supporting single/multiple, word/regex queries, and 2 ways of presenting nodes (detailed view with all hits, and collapsed view)

This commit is contained in:
adam-m 2012-01-04 15:08:12 -05:00
parent c890d69ac5
commit b8139e8c05
6 changed files with 233 additions and 113 deletions

View File

@ -79,12 +79,18 @@ public class KeywordSearchDataExplorer implements DataExplorer {
* @param solrQuery
*/
private void search(String query, QueryType queryType) {
KeywordSearchQueryManager man = new KeywordSearchQueryManager(query, queryType, Presentation.DETAIL);
//TODO populate map with queries for keyword list search here
//Map<String, Boolean>qmap = new LinkedHashMap<String, Boolean>();
//qmap.put(query, Boolean.FALSE);
//KeywordSearchQueryManager man = new KeywordSearchQueryManager(query, queryType, Presentation.COLLAPSE);
KeywordSearchQueryManager man = new KeywordSearchQueryManager(query, queryType, Presentation.COLLAPSE);
if (man.validate()) {
man.execute();
} else {
displayErrorDialog("Invalid query syntax: " + query);
displayErrorDialog("Invalid query syntax." );
}
}

View File

@ -18,7 +18,9 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.util.Collection;
import java.util.List;
import org.apache.solr.client.solrj.response.TermsResponse.Term;
import org.sleuthkit.datamodel.FsContent;
public interface KeywordSearchQuery {
@ -59,5 +61,11 @@ public interface KeywordSearchQuery {
*/
public String getEscapedQueryString();
/**
* get terms associated with the query if any
* @return collection of terms associated with the query
*/
public Collection<Term>getTerms();
}

View File

@ -19,11 +19,19 @@
package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.solr.client.solrj.response.TermsResponse.Term;
import org.openide.nodes.AbstractNode;
import org.openide.nodes.Children;
import org.openide.nodes.Node;
import org.openide.windows.TopComponent;
import org.sleuthkit.autopsy.corecomponents.DataResultTopComponent;
import org.sleuthkit.autopsy.datamodel.KeyValueThing;
import org.sleuthkit.autopsy.keywordsearch.KeywordSearch.QueryType;
import org.sleuthkit.datamodel.FsContent;
@ -81,29 +89,45 @@ public class KeywordSearchQueryManager implements KeywordSearchQuery {
default:
;
}
escape();
queryDelegates.add(del);
}
escape();
}
@Override
public void execute() {
//execute and present the query
//delegate query to query objects and presentation child factories
if (queryType == QueryType.WORD || presentation == Presentation.DETAIL) {
for (KeywordSearchQuery q : queryDelegates) {
q.execute();
}
} else {
//Collapsed view
Collection<KeyValueThing> things = new ArrayList<KeyValueThing>();
int queryID = 0;
for (KeywordSearchQuery q : queryDelegates) {
List<FsContent> fsContents = q.performQuery();
//TODO create view, send to proper factory
Map<String, Object> kvs = new LinkedHashMap<String, Object>();
final String queryStr = q.getQueryString();
things.add(new KeyValueThingQuery(queryStr, kvs, ++queryID, q));
}
Node rootNode = null;
if (things.size() > 0) {
Children childThingNodes =
Children.create(new KeywordSearchResultFactory(queries.keySet(), things, Presentation.COLLAPSE), true);
rootNode = new AbstractNode(childThingNodes);
} else {
rootNode = Node.EMPTY;
}
final String pathText = "Keyword query";
TopComponent searchResultWin = DataResultTopComponent.createInstance("Keyword search", pathText, rootNode, things.size());
searchResultWin.requestActive();
}
}
@Override
@ -123,6 +147,19 @@ public class KeywordSearchQueryManager implements KeywordSearchQuery {
return null;
}
@Override
public boolean validate() {
boolean allValid = true;
for (KeywordSearchQuery tcq : queryDelegates) {
if (!tcq.validate()) {
logger.log(Level.WARNING, "Query has invalid syntax: " + tcq.getQueryString());
allValid = false;
break;
}
}
return allValid;
}
@Override
public String getEscapedQueryString() {
StringBuilder sb = new StringBuilder();
@ -144,15 +181,24 @@ public class KeywordSearchQueryManager implements KeywordSearchQuery {
}
@Override
public boolean validate() {
boolean allValid = true;
for (KeywordSearchQuery tcq : queryDelegates) {
if (!tcq.validate()) {
logger.log(Level.WARNING, "Query has invalid syntax: " + tcq.getQueryString());
allValid = false;
break;
}
}
return allValid;
public Collection<Term> getTerms() {
return null;
}
}
/*
* custom KeyValueThing that also stores query object to execute
*/
class KeyValueThingQuery extends KeyValueThing {
private KeywordSearchQuery query;
KeywordSearchQuery getQuery() {
return query;
}
public KeyValueThingQuery(String name, Map<String, Object> map, int id, KeywordSearchQuery query) {
super(name, map, id);
this.query = query;
}
}

View File

@ -21,13 +21,14 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.solr.client.solrj.response.TermsResponse.Term;
import org.openide.nodes.ChildFactory;
import org.openide.nodes.Children;
import org.openide.nodes.Node;
@ -114,11 +115,23 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
@Override
protected boolean createKeys(List<KeyValueThing> toPopulate) {
int id = 0;
for (String query : queries) {
Map<String, Object> map = new LinkedHashMap<String, Object>();
initCommonProperties(map);
setCommonProperty(map, CommonPropertyTypes.QUERY, query);
toPopulate.add(new KeyValueThing(query, map, ++id));
if (presentation == Presentation.DETAIL) {
for (String query : queries) {
Map<String, Object> map = new LinkedHashMap<String, Object>();
initCommonProperties(map);
setCommonProperty(map, CommonPropertyTypes.QUERY, query);
toPopulate.add(new KeyValueThing(query, map, ++id));
}
} else {
for (KeyValueThing thing : things) {
//Map<String, Object> map = new LinkedHashMap<String, Object>();
Map<String, Object> map = thing.getMap();
initCommonProperties(map);
final String query = thing.getName();
setCommonProperty(map, CommonPropertyTypes.QUERY, query);
//toPopulate.add(new KeyValueThing(query, map, ++id));
toPopulate.add(thing);
}
}
return true;
@ -129,52 +142,79 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
ChildFactory<KeyValueThing> childFactory = null;
switch (presentation) {
case COLLAPSE:
childFactory = null;
childFactory = new ResultCollapsedChildFactory(thing);
break;
case DETAIL:
childFactory = new ResulTermsMatchesChildFactory(things);
break;
default:
default:
}
return new KeyValueNode(thing, Children.create(childFactory, true));
return new KeyValueNode(thing, Children.create(childFactory, true));
}
/**
* factory produces collapsed view of all fscontent matches per query
* the node produced is a child node
* The factory actually executes query.
*/
class ResulCollapsedChildFactory extends ChildFactory<KeyValueThing> {
class ResultCollapsedChildFactory extends ChildFactory<KeyValueThing> {
KeyValueThing queryThing;
ResulCollapsedChildFactory(KeyValueThing queryThing) {
ResultCollapsedChildFactory(KeyValueThing queryThing) {
this.queryThing = queryThing;
}
@Override
protected boolean createKeys(List<KeyValueThing> toPopulate) {
String origQuery = queryThing.getName();
TermComponentQuery tcq = new TermComponentQuery(origQuery);
Map<String,Object> map = new LinkedHashMap<String,Object>();
if (tcq.validate()) {
map.put("query_valid", true);
return true;
}
else {
map.put("query_valid", false);
final String origQuery = queryThing.getName();
final KeyValueThingQuery queryThingQuery = (KeyValueThingQuery) queryThing;
final KeywordSearchQuery tcq = queryThingQuery.getQuery();
if (!tcq.validate()) {
//TODO mark the particular query node RED
return false;
}
//return toPopulate.addAll(things);
//execute the query and get fscontents matching
List<FsContent> fsContents = tcq.performQuery();
//construct a Solr query using aggregated terms to get highlighting
//the query is executed later on demand
StringBuilder highlightQuery = new StringBuilder();
Collection<Term> terms = tcq.getTerms();
for (Term term : terms) {
final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
highlightQuery.append(termS);
highlightQuery.append(" ");
}
int resID = 0;
for (FsContent f : fsContents) {
//get unique match result files
Map<String, Object> resMap = new LinkedHashMap<String, Object>();
AbstractFsContentNode.fillPropertyMap(resMap, f);
setCommonProperty(resMap, CommonPropertyTypes.MATCH, f.getName());
toPopulate.add(new KeyValueThingContent(f.getName(), resMap, ++resID, f, highlightQuery.toString()));
}
return true;
}
@Override
protected Node createNodeForKey(KeyValueThing thing) {
return new KeyValueNode(thing, Children.LEAF);
//return new KeyValueNode(thing, Children.LEAF);
//return new KeyValueNode(thing, Children.create(new ResultFilesChildFactory(thing), true));
final KeyValueThingContent thingContent = (KeyValueThingContent) thing;
final Content content = thingContent.getContent();
final String query = thingContent.getQuery();
Node kvNode = new KeyValueNode(thingContent, Children.LEAF);
//wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization
HighlightedMatchesSource highlights = new HighlightedMatchesSource(content, query);
return new KeywordSearchFilterNode(highlights, kvNode, query);
}
}
@ -223,7 +263,7 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
List<FsContent> matches = filesQuery.performQuery();
//get unique match result files
Set<FsContent> uniqueMatches = new TreeSet<FsContent>();
Set<FsContent> uniqueMatches = new LinkedHashSet<FsContent>();
uniqueMatches.addAll(matches);
int resID = 0;
@ -250,12 +290,12 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
if (contentStr != null) {//if not null, some error getting from Solr, handle it by not filtering out
//perform java regex to validate match from Solr
String origQuery = thingContent.getQuery();
//escape the regex query because it may contain special characters from the previous match
//since it's a match result, we can assume literal pattern
origQuery = Pattern.quote(origQuery);
Pattern p = Pattern.compile(origQuery, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Matcher m = p.matcher(contentStr);
matchFound = m.find();
}
@ -269,31 +309,29 @@ public class KeywordSearchResultFactory extends ChildFactory<KeyValueThing> {
return null;
}
}
}
}
/*
* custom KeyValueThing that also stores retrieved Content and query string used
*/
class KeyValueThingContent extends KeyValueThing {
private Content content;
private String query;
Content getContent() {
return content;
}
/*
* custom KeyValueThing that also stores retrieved Content and query string used
*/
class KeyValueThingContent extends KeyValueThing {
String getQuery() {
return query;
}
private Content content;
private String query;
Content getContent() {
return content;
}
String getQuery() {
return query;
}
public KeyValueThingContent(String name, Map<String, Object> map, int id, Content content, String query) {
super(name, map, id);
this.content = content;
this.query = query;
}
public KeyValueThingContent(String name, Map<String, Object> map, int id, Content content, String query) {
super(name, map, id);
this.content = content;
this.query = query;
}
}
}

View File

@ -18,17 +18,17 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse.Term;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.openide.nodes.Node;
@ -67,6 +67,11 @@ public class LuceneQuery implements KeywordSearchQuery {
public String getQueryString() {
return this.query;
}
@Override
public Collection<Term>getTerms() {
return null;
}
/**
* Just perform the query and return result without updating the GUI
@ -129,10 +134,11 @@ public class LuceneQuery implements KeywordSearchQuery {
escape();
List<FsContent> matches = performQuery();
String pathText = "Lucene query: " + query;
String pathText = "Keyword query: " + query;
Node rootNode = new KeywordSearchNode(matches, query);
Node filteredRootNode = new TableFilterNode(rootNode, true);
TopComponent searchResultWin = DataResultTopComponent.createInstance("Keyword search", pathText, filteredRootNode, matches.size());
searchResultWin.requestActive(); // make it the active top component
}

View File

@ -20,12 +20,12 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ExecutionException;
import java.util.logging.Level;
import java.util.logging.Logger;
@ -59,25 +59,34 @@ public class TermComponentQuery implements KeywordSearchQuery {
private String termsQuery;
private String queryEscaped;
private boolean isEscaped;
private List<Term> terms;
public TermComponentQuery(String query) {
this.termsQuery = query;
this.queryEscaped = query;
isEscaped = false;
terms = null;
}
@Override
public void escape() {
//treat as literal
//TODO for actual literal query to work in Java/Solr
//might need to either: use terms prefix (not regex) query with the literal
//or append .* to the literal regex
queryEscaped = Pattern.quote(termsQuery);
isEscaped = true;
}
@Override
public boolean validate() {
if (queryEscaped.equals("")) {
return false;
}
boolean valid = true;
try {
Pattern.compile(termsQuery);
Pattern.compile(queryEscaped);
} catch (PatternSyntaxException ex1) {
valid = false;
} catch (IllegalArgumentException ex2) {
@ -98,7 +107,7 @@ public class TermComponentQuery implements KeywordSearchQuery {
//q.setTermsLimit(200);
//q.setTermsRegexFlag(regexFlag);
//q.setTermsRaw(true);
q.setTermsRegex(termsQuery);
q.setTermsRegex(queryEscaped);
q.addTermsField(TERMS_SEARCH_FIELD);
q.setTimeAllowed(TERMS_TIMEOUT);
@ -107,25 +116,23 @@ public class TermComponentQuery implements KeywordSearchQuery {
}
/*
* execute query and return terms
* helper method, can be called from the same or threaded query context
* execute query and return terms, helper method
*/
protected List<Term> executeQuery(SolrQuery q) {
Server.Core solrCore = KeywordSearch.getServer().getCore();
List<Term> terms = null;
List<Term> termsCol = null;
try {
TermsResponse tr = solrCore.queryTerms(q);
terms = tr.getTerms(TERMS_SEARCH_FIELD);
return terms;
termsCol = tr.getTerms(TERMS_SEARCH_FIELD);
return termsCol;
} catch (SolrServerException ex) {
logger.log(Level.SEVERE, "Error executing the regex terms query: " + termsQuery, ex);
return null; //no need to create result view, just display error dialog
}
}
@Override
@Override
public String getEscapedQueryString() {
return this.queryEscaped;
}
@ -134,7 +141,12 @@ public class TermComponentQuery implements KeywordSearchQuery {
public String getQueryString() {
return this.termsQuery;
}
@Override
public Collection<Term> getTerms() {
return terms;
}
/**
* return collapsed matches with all files for the query
* without per match breakdown
@ -144,29 +156,40 @@ public class TermComponentQuery implements KeywordSearchQuery {
List<FsContent> results = new ArrayList();
final SolrQuery q = createQuery();
List<Term> terms = executeQuery(q);
terms = executeQuery(q);
//get unique match result files
Set<FsContent> uniqueMatches = new TreeSet<FsContent>();
//execute per term Solr query to get files
//more costly
/*
Set<FsContent> uniqueMatches = new HashSet<FsContent>();
for (Term term : terms) {
String word = term.getTerm();
LuceneQuery filesQuery = new LuceneQuery(word);
filesQuery.escape();
List<FsContent> matches = filesQuery.performQuery();
uniqueMatches.addAll(matches);
}
String word = term.getTerm();
LuceneQuery filesQuery = new LuceneQuery(word);
filesQuery.escape();
List<FsContent> matches = filesQuery.performQuery();
uniqueMatches.addAll(matches);
}*/
//filter out non-matching files
//escape regex if needed
//(if the original query was not literal, this must be)
String literalQuery = null;
if (isEscaped) {
literalQuery = this.queryEscaped;
} else {
literalQuery = Pattern.quote(this.termsQuery);
//combine the terms into single Solr query to get files
//TODO limited by GET length limit, try POST ?
StringBuilder filesQueryB = new StringBuilder();
for (Term term : terms) {
final String termS = KeywordSearchUtil.escapeLuceneQuery(term.getTerm());
filesQueryB.append(termS);
filesQueryB.append(" ");
}
LuceneQuery filesQuery = new LuceneQuery(filesQueryB.toString());
filesQuery.escape();
List<FsContent> uniqueMatches = filesQuery.performQuery();
//filter out non-matching files using the original query (whether literal or not)
//TODO this could be costly, for now just testing how it performs
for (FsContent f : uniqueMatches) {
Pattern p = Pattern.compile(literalQuery, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
Pattern p = Pattern.compile(queryEscaped, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
final String contentStr = KeywordSearch.getServer().getCore().getSolrContent(f);
Matcher m = p.matcher(contentStr);
if (m.find()) {
@ -174,6 +197,7 @@ public class TermComponentQuery implements KeywordSearchQuery {
}
}
return results;
}
@ -219,7 +243,7 @@ public class TermComponentQuery implements KeywordSearchQuery {
rootNode = Node.EMPTY;
}
final String pathText = "RegEx query";
final String pathText = "Term query";
// String pathText = "RegEx query: " + termsQuery
//+ " Files with exact matches: " + Long.toString(totalMatches) + " (also listing approximate matches)";
@ -243,18 +267,10 @@ public class TermComponentQuery implements KeywordSearchQuery {
progress.start();
progress.progress("Running Terms query.");
List<Term> terms = executeQuery(q);
terms = executeQuery(q);
progress.progress("Terms query completed.");
//debug query
//StringBuilder sb = new StringBuilder();
//for (Term t : terms) {
// sb.append(t.getTerm() + " : " + t.getFrequency() + "\n");
//}
//logger.log(Level.INFO, "TermsComponent query result: " + sb.toString());
//end debug query
return terms;
}