More-complete javadoc & comments

This commit is contained in:
Peter J. Martel 2011-12-07 15:38:21 -05:00
parent 37849c2c0a
commit a895df2859
17 changed files with 214 additions and 81 deletions

View File

@ -39,8 +39,12 @@ public interface DataContentViewer {
public String getTitle();
/**
* Get new DataContentViewer instance.
* Get new DataContentViewer instance. (This method is weird. We use the
* instance returned by the Lookup as a factory for the instances that
* are actually used.)
*/
// TODO: extract the factory method out into a seperate interface that
// is used for the Lookup.
public DataContentViewer getInstance();
/**

View File

@ -32,6 +32,8 @@
<target name="init" depends="basic-init,files-init,build-init,-javac-init,init-ivy">
<!-- fetch all the dependencies from Ivy and stick them in the right places -->
<ivy:resolve/>
<ivy:retrieve conf="autopsy" sync="true" pattern="/release/modules/ext/[artifact]-[revision](-[classifier]).[ext]" />
<ivy:retrieve conf="solr-war" sync="true" pattern="/release/solr/webapps/solr.war" />

View File

@ -2,7 +2,10 @@
<ivy-module version="2.0">
<info organisation="org.sleuthkit.autopsy" module="keywordsearch"/>
<configurations >
<!-- module dependencies -->
<conf name="autopsy"/>
<!-- Solr server dependencies -->
<conf name="solr-libs"/>
<conf name="solr-war"/>
<conf name="start-solr"/>

View File

@ -23,15 +23,15 @@ import java.awt.event.ItemListener;
import java.util.Collections;
import java.util.List;
/**
* Panel displays HTML content sent to ExtractedContentViewer, and provides
* a combo-box to select between multiple sources.
*/
class ExtractedContentPanel extends javax.swing.JPanel {
/** Creates new form ExtractedContentPanel */
ExtractedContentPanel() {
initComponents();
// DefaultCaret caret = (DefaultCaret)extractedTextPane.getCaret();
// caret.setUpdatePolicy(DefaultCaret.NEVER_UPDATE);
extractedTextPane.setContentType("text/html");
sourceComboBox.addItemListener(new ItemListener() {
@ -90,6 +90,12 @@ class ExtractedContentPanel extends javax.swing.JPanel {
private javax.swing.JComboBox sourceComboBox;
// End of variables declaration//GEN-END:variables
/**
* Set the available sources (selects the first source in the list by
* default)
* @param sources
*/
void setSources(List<MarkupSource> sources) {
sourceComboBox.removeAllItems();
setPanelText(null);

View File

@ -33,7 +33,11 @@ import org.sleuthkit.autopsy.corecomponentinterfaces.DataContentViewer;
import org.sleuthkit.autopsy.datamodel.ContentNode;
import org.apache.commons.lang.StringEscapeUtils;
/**
* Displays marked-up (HTML) content for a Node. The sources are all the
* MarkupSource items in the selected Node's lookup, plus the content that
* Solr extracted (if there is any).
*/
@ServiceProvider(service = DataContentViewer.class)
public class ExtractedContentViewer implements DataContentViewer {
@ -46,18 +50,20 @@ public class ExtractedContentViewer implements DataContentViewer {
@Override
public void setNode(final ContentNode selectedNode) {
// to clear it
// to clear the viewer
if (selectedNode == null) {
resetComponent();
return;
}
// custom markup from the node (if available) and default markup
// fetched from solr
// sources are custom markup from the node (if available) and default
// markup is fetched from solr
List<MarkupSource> sources = new ArrayList<MarkupSource>();
sources.addAll(((Node) selectedNode).getLookup().lookupAll(MarkupSource.class));
if (solrHasContent(selectedNode)) {
sources.add(new MarkupSource() {
@Override
@ -76,6 +82,7 @@ public class ExtractedContentViewer implements DataContentViewer {
return "Extracted Content";
}
});
}
// first source will be the default displayed
setPanel(sources);
@ -112,10 +119,26 @@ public class ExtractedContentViewer implements DataContentViewer {
Collection<? extends MarkupSource> sources = ((Node) node).getLookup().lookupAll(MarkupSource.class);
if (!sources.isEmpty()) {
return true;
return !sources.isEmpty() || solrHasContent(node);
}
/**
* Set the MarkupSources for the panel to display (safe to call even if the
* panel hasn't been created yet)
* @param sources
*/
private void setPanel(List<MarkupSource> sources) {
if (panel != null) {
panel.setSources(sources);
}
}
/**
* Check if Solr has extracted content for a given node
* @param node
* @return true if Solr has content, else false
*/
private boolean solrHasContent(ContentNode node) {
Server.Core solrCore = KeywordSearch.getServer().getCore();
SolrQuery q = new SolrQuery();
q.setQuery("*:*");
@ -130,12 +153,13 @@ public class ExtractedContentViewer implements DataContentViewer {
}
}
private void setPanel(List<MarkupSource> sources) {
if (panel != null) {
panel.setSources(sources);
}
}
/**
* Get extracted content for a node from Solr
* @param cNode a node that has extracted content in Solr (check with
* solrHasContent(ContentNode))
* @return the extracted content
* @throws SolrServerException if something goes wrong
*/
private String getSolrContent(ContentNode cNode) throws SolrServerException {
Server.Core solrCore = KeywordSearch.getServer().getCore();
SolrQuery q = new SolrQuery();
@ -143,8 +167,6 @@ public class ExtractedContentViewer implements DataContentViewer {
q.addFilterQuery("id:" + cNode.getContent().getId());
q.setFields("content");
//TODO: for debugging, remove
String queryURL = q.toString();
String content = (String) solrCore.query(q).getResults().get(0).getFieldValue("content");
return content;
}

View File

@ -40,9 +40,19 @@ import org.sleuthkit.datamodel.TskException;
import org.sleuthkit.datamodel.Volume;
import org.sleuthkit.datamodel.VolumeSystem;
/**
* Visitor for getting all the files to try to index from any Content object.
* Currently gets all the files with a file extensions that match a list of
* document types that Tika/Solr-Cell supports.
*/
class GetIngestableFilesContentVisitor implements ContentVisitor<Collection<FsContent>> {
private static Logger logger = Logger.getLogger(GetIngestableFilesContentVisitor.class.getName());
private static final Logger logger = Logger.getLogger(GetIngestableFilesContentVisitor.class.getName());
// TODO: use a more robust method than checking file extension to determine
// whether to try a file
// supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
private static final String[] supportedExtensions = {"tar", "jar", "zip", "bzip2",
"gz", "tgz", "doc", "xls", "ppt", "rtf", "pdf", "html", "xhtml", "txt",
@ -52,6 +62,8 @@ class GetIngestableFilesContentVisitor implements ContentVisitor<Collection<FsCo
private static final String extensionsLikePredicate;
static {
// build the query fragment for matching file extensions
StringBuilder likes = new StringBuilder("0");
for (String ext : supportedExtensions) {
@ -68,6 +80,12 @@ class GetIngestableFilesContentVisitor implements ContentVisitor<Collection<FsCo
return getAllFromChildren(drctr);
}
/**
* Get the part of a file name after (not including) the last '.' and
* coerced to lowercase.
* @param fileName
* @return the file extension, or an empty string if there is none
*/
private String getExtension(String fileName) {
int lastDot = fileName.lastIndexOf(".");
@ -90,6 +108,9 @@ class GetIngestableFilesContentVisitor implements ContentVisitor<Collection<FsCo
@Override
public Collection<FsContent> visit(FileSystem fs) {
// Files in the database have a filesystem field, so it's quick to
// get all the matching files for an entire filesystem with a query
SleuthkitCase sc = Case.getCurrentCase().getSleuthkitCase();
String query = "SELECT * FROM tsk_files WHERE fs_obj_id = " + fs.getId()
@ -119,11 +140,17 @@ class GetIngestableFilesContentVisitor implements ContentVisitor<Collection<FsCo
return getAllFromChildren(vs);
}
private Collection<FsContent> getAllFromChildren(Content c) {
/**
* Aggregate all the matches from visiting the children Content objects of the
* one passed
* @param parent
* @return
*/
private Collection<FsContent> getAllFromChildren(Content parent) {
Collection<FsContent> all = new ArrayList<FsContent>();
try {
for (Content child : c.getChildren()) {
for (Content child : parent.getChildren()) {
all.addAll(child.accept(this));
}
} catch (TskException ex) {

View File

@ -26,6 +26,10 @@ import org.apache.solr.client.solrj.response.QueryResponse;
import org.sleuthkit.autopsy.keywordsearch.Server.Core;
import org.sleuthkit.datamodel.Content;
/**
* Gets extracted content from Solr with the parts that match the query
* highlighted
*/
class HighlightedMatchesSource implements MarkupSource {
private static final Logger logger = Logger.getLogger(HighlightedMatchesSource.class.getName());
@ -43,8 +47,6 @@ class HighlightedMatchesSource implements MarkupSource {
this.solrCore = solrCore;
}
@Override
public String getMarkup() {
@ -56,17 +58,13 @@ class HighlightedMatchesSource implements MarkupSource {
q.setHighlightSimplePost("</span>");
q.setHighlightFragsize(0); // don't fragment the highlight
//TODO: remove (only for debugging)
String queryString = q.toString();
try {
QueryResponse response = solrCore.query(q);
List<String> contentHighlights = response.getHighlighting().get(Long.toString(content.getId())).get("content");
if (contentHighlights == null) {
return "<span style=\"background:red\">No matches in content.</span>";
} else {
// extracted content (minus highlight tags) is HTML-escaped
return "<pre>" + contentHighlights.get(0).trim() + "</pre>";
}
} catch (SolrServerException ex) {

View File

@ -39,6 +39,10 @@ import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.FsContent;
/**
* Action adds all supported files from the given Content object and its
* children to the Solr index.
*/
public class IndexContentFilesAction extends AbstractAction {
private static final Logger logger = Logger.getLogger(IndexContentFilesAction.class.getName());
@ -47,6 +51,11 @@ public class IndexContentFilesAction extends AbstractAction {
private String name;
private Server.Core solrCore;
/**
* New action
* @param c source Content object to get files from
* @param name name to refer to the source by when displaying progress
*/
public IndexContentFilesAction(Content c, String name) {
this(c, name, KeywordSearch.getServer().getCore());
}
@ -61,8 +70,7 @@ public class IndexContentFilesAction extends AbstractAction {
@Override
public void actionPerformed(ActionEvent e) {
// create the popUp window for it
// create the popUp window to display progress
String title = "Indexing files in " + name;
final JFrame frame = new JFrame(title);
@ -81,6 +89,7 @@ public class IndexContentFilesAction extends AbstractAction {
setProgress(0);
// track number complete or with errors
int fileCount = files.size();
int finishedFiles = 0;
int problemFiles = 0;
@ -124,6 +133,8 @@ public class IndexContentFilesAction extends AbstractAction {
} finally {
popUpWindow.setVisible(false);
popUpWindow.dispose();
// notify user if there were problem files
if (problemFiles > 0) {
displayProblemFilesDialog(problemFiles);
}
@ -133,6 +144,7 @@ public class IndexContentFilesAction extends AbstractAction {
@Override
protected void process(List<String> messages) {
// display the latest message
if (!messages.isEmpty()) {
panel.setStatusText(messages.get(messages.size() - 1));
}

View File

@ -20,6 +20,9 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.awt.event.ActionListener;
/**
* Displays progress as files are indexed
*/
class IndexProgressPanel extends javax.swing.JPanel {
/** Creates new form IndexProgressPanel */

View File

@ -35,7 +35,7 @@ import org.apache.solr.common.util.ContentStream;
import org.sleuthkit.datamodel.FsContent;
/**
* Handles ingesting files to a Solr server, given the url string for it
* Handles indexing files on a Solr core.
*/
class Ingester {
@ -62,9 +62,8 @@ class Ingester {
* index. commit() should be called once you're done ingesting files.
*
* @param f File to ingest
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException if
* there was an error processing the given file, but the Solr server is
* probably fine.
* @throws IngesterException if there was an error processing a specific
* file, but the Solr server is probably fine.
*/
void ingest(FsContent f) throws IngesterException {
Map<String, String> fields = new HashMap<String, String>();
@ -109,6 +108,10 @@ class Ingester {
uncommitedIngests = true;
}
/**
* Tells Solr to commit (necessary before ingested files will appear in
* searches)
*/
void commit() {
uncommitedIngests = false;
try {
@ -121,12 +124,20 @@ class Ingester {
}
}
/**
* Helper to set document fields
* @param up request with document
* @param fields map of field-names->values
*/
private static void setFields(ContentStreamUpdateRequest up, Map<String, String> fields) {
for (Entry<String, String> field : fields.entrySet()) {
up.setParam("literal." + field.getKey(), field.getValue());
}
}
/**
* ContentStream to read() the data from a FsContent object
*/
private static class FscContentStream implements ContentStream {
FsContent f;
@ -166,6 +177,10 @@ class Ingester {
}
}
/**
* Indicates that there was an error with the specific ingest operation,
* but it's still okay to continue ingesting files.
*/
static class IngesterException extends Exception {
IngesterException(String message, Throwable ex) {

View File

@ -23,6 +23,10 @@ import java.util.logging.Logger;
import org.openide.modules.ModuleInstall;
import org.sleuthkit.autopsy.casemodule.Case;
/**
* Starts up the Solr server when the module is loaded, and stops it when the
* application is closed.
*/
public class Installer extends ModuleInstall {
@Override
@ -36,17 +40,12 @@ public class Installer extends ModuleInstall {
if (server.isRunning()) {
logger.log(Level.WARNING, "Already a Solr server on out port, maybe leftoveer from a previous run. Trying to shut it down...");
logger.log(Level.WARNING, "Already a Solr server on out port, maybe leftover from a previous run. Trying to shut it down...");
// Send the stop message in case there's a solr server lingering from
// a previous run of Autopsy that didn't exit cleanly
server.stop();
try {
Thread.sleep(10000); // let it die
} catch (InterruptedException ex) {
throw new RuntimeException(ex);
}
if (server.isRunning()) {
throw new IllegalStateException("There's already a server running on our port that can't be shutdown.");
} else {

View File

@ -22,6 +22,9 @@ import java.beans.PropertyChangeEvent;
import java.beans.PropertyChangeListener;
import org.sleuthkit.autopsy.casemodule.Case;
/**
* Static class to track singletons for KeywordSearch module
*/
class KeywordSearch {
private static final String BASE_URL = "http://localhost:8983/solr/";
@ -36,6 +39,9 @@ class KeywordSearch {
throw new AssertionError();
}
/**
* Listener to swap cores when the case changes
*/
static class CaseChangeListener implements PropertyChangeListener {
CaseChangeListener() {

View File

@ -39,6 +39,9 @@ import org.sleuthkit.autopsy.corecomponents.DataResultTopComponent;
import org.sleuthkit.datamodel.FsContent;
import org.sleuthkit.datamodel.SleuthkitCase;
/**
* Provides a data explorer to perform Solr searches with
*/
@ServiceProvider(service = DataExplorer.class, position = 300)
public class KeywordSearchDataExplorer implements DataExplorer {
@ -61,10 +64,14 @@ public class KeywordSearchDataExplorer implements DataExplorer {
if (theInstance == null) {
theInstance = this;
} else {
throw new RuntimeException("NOOO!!! Mulitple instances of KeywordSearchTopComponent! BAD!");
throw new RuntimeException("Tried to instantiate mulitple instances of KeywordSearchTopComponent.");
}
}
/**
* Executes a query and populates a DataResult tab with the results
* @param solrQuery
*/
private void search(String solrQuery) {
List<FsContent> matches = new ArrayList<FsContent>();
@ -72,8 +79,6 @@ public class KeywordSearchDataExplorer implements DataExplorer {
boolean allMatchesFetched = false;
final int ROWS_PER_FETCH = 10000;
Server.Core solrCore = KeywordSearch.getServer().getCore();
SolrQuery q = new SolrQuery();

View File

@ -33,6 +33,9 @@ import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.FsContent;
import org.sleuthkit.datamodel.TskException;
/**
* Root Node for keyword search results
*/
class KeywordSearchNode extends AbstractNode implements ContentNode {
private String solrQuery;
@ -40,13 +43,14 @@ class KeywordSearchNode extends AbstractNode implements ContentNode {
KeywordSearchNode(List<FsContent> keys, final String solrQuery) {
super(new RootContentChildren(keys) {
// Use filter node to add a MarkupSource for the search results
// to the lookup
@Override
protected Node[] createNodes(Content key) {
Node[] originalNodes = super.createNodes(key);
Node[] filterNodes = new Node[originalNodes.length];
// Use filter node to add a MarkupSource for the search results
// to the lookup
int i = 0;
for (Node original : originalNodes) {
MarkupSource markup = new HighlightedMatchesSource(key, solrQuery);

View File

@ -18,6 +18,10 @@
*/
package org.sleuthkit.autopsy.keywordsearch;
/**
* Interface to provide HTML markup (to be displayed in ExtractedContentViewer)
* in a Node's lookup
*/
public interface MarkupSource {
/**

View File

@ -23,6 +23,9 @@ import java.io.InputStream;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.TskException;
/**
* InputStream to read bytes from a Content object's data
*/
class ReadContentInputStream extends InputStream {
private long position;

View File

@ -37,6 +37,9 @@ import org.openide.modules.InstalledFileLocator;
import org.openide.util.Exceptions;
import org.sleuthkit.autopsy.casemodule.Case;
/**
* Handles for keeping track of a Solr server and its cores
*/
class Server {
private static final Logger logger = Logger.getLogger(Server.class.getName());
@ -48,6 +51,10 @@ class Server {
private String instanceDir;
private File solrFolder;
/**
* New instance for the server at the given URL
* @param url should be something like "http://localhost:8983/solr/"
*/
Server(String url) {
try {
this.solr = new CommonsHttpSolrServer(url);
@ -61,7 +68,7 @@ class Server {
/**
* Helper class to handle output from Solr
* Helper threads to handle stderr/stdout from Solr process
*/
private static class InputStreamPrinter extends Thread {
@ -88,8 +95,9 @@ class Server {
/**
* Tries to start a Solr instance. Returns immediately (probably before
* the server is ready) and doesn't check whether it was successful.
* Tries to start a Solr instance in a separate process. Returns immediately
* (probably before the server is ready) and doesn't check whether it was
* successful.
*/
void start() {
logger.log(Level.INFO, "Starting Solr server from: " + solrFolder.getAbsolutePath());
@ -180,13 +188,23 @@ class Server {
/**** end single-case specific methods ****/
/**
* Open a core for the given case
* @param c
* @return
*/
Core openCore(Case c) {
String sep = File.separator;
String dataDir = c.getCaseDirectory() + sep + "keywordsearch" + sep + "data";
return this.openCore(DEFAULT_CORE_NAME, new File(dataDir));
}
/**
* Open a new core
* @param coreName name to refer to the core by in Solr
* @param dataDir directory to load/store the core data from/to
* @return new core
*/
Core openCore(String coreName, File dataDir) {
try {
if (!dataDir.exists()) {
@ -211,8 +229,10 @@ class Server {
class Core {
// handle to the core in Solr
private String name;
// server to access a core needs to be built from a URL with the
// the server to access a core needs to be built from a URL with the
// core in it, and is only good for core-specific operations
private SolrServer solrCore;
@ -233,7 +253,7 @@ class Server {
return solrCore.query(sq);
}
void close () {
void close() {
try {
CoreAdminRequest.unloadCore(this.name, solr);
} catch (SolrServerException ex) {