TSK-270 Provide Hit Navigation in keyword search results

- now using html anchors, much more simple and accurate method, also should be easier to extend to other file types
This commit is contained in:
adam-m 2011-12-13 15:36:29 -05:00
parent 54a5abff8e
commit c484666b7d
5 changed files with 127 additions and 149 deletions

View File

@ -16,152 +16,116 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import org.apache.commons.lang.StringEscapeUtils;
public class ExtractedContentFind { public class ExtractedContentFind {
private static final Logger logger = Logger.getLogger(ExtractedContentFind.class.getName()); private static final Logger logger = Logger.getLogger(ExtractedContentFind.class.getName());
ExtractedContentFind(ExtractedContentViewer viewer) { public ExtractedContentFind() {
this.viewer = viewer;
findIndex = new HashMap<MarkupSource, ArrayList<Long>>();
curIndex = new HashMap<MarkupSource, Integer>(); curIndex = new HashMap<MarkupSource, Integer>();
} }
private HashMap<MarkupSource, Integer> curIndex;
private HashMap<MarkupSource, ArrayList<Long>>findIndex; public static final int INDEX_NOT_FOUND = -2;
private HashMap<MarkupSource, Integer>curIndex; public static final int INDEX_INITIALIZED = -1;
private ExtractedContentViewer viewer;
public static int INDEX_INITIALIZED = -1;
public static int INDEX_NOT_FOUND = -2;
public int getCurrentIndexTotal(MarkupSource source) { public int getCurrentIndexTotal(MarkupSource source) {
ArrayList<Long> index = indexSource(source); return source.getNumberHits();
return index.size();
} }
public int getCurrentIndexI(MarkupSource source) { public int getCurrentIndexI(MarkupSource source) {
indexSource(source);
Integer curI = curIndex.get(source); Integer curI = curIndex.get(source);
if (curI != null) if (curI != null) {
return curI; return curI;
else return -1; } else {
return -1;
}
} }
/**
* get next line number corresponding to indexed match, no wrapping
* requires call to hasNext() first
* or INDEX_NOT_FOUND if no next hit
* @param source
* @return line number where match occurs
*/
public long getNext(MarkupSource source) {
ArrayList<Long> index = indexSource(source);
int total = index.size();
int cur = curIndex.get(source);
if (total == 0 || cur == total -1) return INDEX_NOT_FOUND;
++cur;
//update curIndex location
curIndex.put(source, cur);
return index.get(cur);
}
/** /**
* *
* @param source * @param source
* @return true if the source has next hit * @return true if the source has next hit
*/ */
public boolean hasNext(MarkupSource source) { public boolean hasNext(MarkupSource source) {
ArrayList<Long> index = indexSource(source); int total = source.getNumberHits();
int total = index.size();
int cur = curIndex.get(source); int cur = curIndex.get(source);
if (total == 0) return false; if (total == 0) {
else if (cur == INDEX_INITIALIZED) return true;
else if (cur == total - 1)
return false; return false;
} else if (cur == INDEX_INITIALIZED) {
return true;
} else if (cur == total - 1) {
return false;
}
return true; return true;
} }
/** /**
* *
* @param source * @param source
* @return true if the source has previous hit * @return true if the source has previous hit
*/ */
public boolean hasPrevious(MarkupSource source) { public boolean hasPrevious(MarkupSource source) {
ArrayList<Long> index = indexSource(source); int total = source.getNumberHits();
int total = index.size();
int cur = curIndex.get(source); int cur = curIndex.get(source);
if (total == 0) return false; if (total == 0) {
else if (cur == INDEX_INITIALIZED) return false; return false;
else if (cur == 0) return false; } else if (cur == INDEX_INITIALIZED) {
return false;
} else if (cur == 0) {
return false;
}
return true; return true;
} }
/** /**
* get previous line number corresponding to indexed match, no wrapping * get next index
* requires call to hasNext() first
* or INDEX_NOT_FOUND if no next hit
* @param source
* @return line number where match occurs
*/
public long getNext(MarkupSource source) {
int total = source.getNumberHits();
int cur = curIndex.get(source);
if (total == 0 || cur == total - 1) {
return INDEX_NOT_FOUND;
}
++cur;
//update curIndex location
curIndex.put(source, cur);
return cur;
}
/**
* get previous index
* requires call to hasPrevious() first * requires call to hasPrevious() first
* or INDEX_NOT_FOUND if no previous hit * or INDEX_NOT_FOUND if no previous hit
* @param source * @param source
* @return line number where match occurs * @return line number where match occurs
*/ */
public long getPrevious(MarkupSource source) { public long getPrevious(MarkupSource source) {
ArrayList<Long> index = indexSource(source); int total = source.getNumberHits();
int total = index.size();
int cur = curIndex.get(source); int cur = curIndex.get(source);
if (total == 0 || cur == 0) return INDEX_NOT_FOUND; if (total == 0 || cur == 0) {
return INDEX_NOT_FOUND;
}
--cur; --cur;
//update curIndex location //update curIndex location
curIndex.put(source, cur); curIndex.put(source, cur);
return index.get(cur); return cur;
} }
/** /**
* Add MarkupSource to find functionality, or return if already exists for that source. * initialize find functionality with the source
* @param source MarkupSource to add to find * @param source MarkupSource to initialize find with
*/ */
private ArrayList<Long> indexSource(MarkupSource source) { public void init(MarkupSource source) {
//return if already indexed if (curIndex.get(source) == null)
ArrayList<Long> indexed = findIndex.get(source); curIndex.put(source, INDEX_INITIALIZED);
if (indexed != null || source.isSearchable() == false)
return indexed;
indexed = new ArrayList<Long>();
String markup = source.getMarkup();
//logger.log(Level.INFO,markup);
final String indexSearchTok = source.getSearchToken();
if (indexSearchTok == null || indexSearchTok.equals("")) {
return indexed;
}
final int indexSearchTokLen = indexSearchTok.length();
long docOffset = 0;
long index = -1;
while ((index = markup.indexOf(indexSearchTok, (int)docOffset)) >= 0) {
//TODO check if (int) cast above presents limitation for large files
//calculate and store index stripping all markup for scrolling to work properly
//need to map index to content with no html
//try cheat: compensata fot highlight tags (might be other things, such as escape chars)
//perfectly we'd scan both documents at same time and map index from one to another
indexed.add(index);
docOffset = index + indexSearchTokLen; //next offset past the keyword
}
//add indices to index collection
findIndex.put(source, indexed);
//add current for tracking
curIndex.put(source, INDEX_INITIALIZED);
return indexed;
} }
} }

View File

@ -18,19 +18,13 @@
*/ */
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import java.awt.Point;
import java.awt.Rectangle;
import java.awt.event.ActionListener; import java.awt.event.ActionListener;
import java.awt.event.ItemEvent; import java.awt.event.ItemEvent;
import java.awt.event.ItemListener; import java.awt.event.ItemListener;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import javax.swing.JViewport;
import javax.swing.SwingUtilities;
import org.apache.commons.logging.Log;
/** /**
* Panel displays HTML content sent to ExtractedContentViewer, and provides * Panel displays HTML content sent to ExtractedContentViewer, and provides
@ -55,6 +49,7 @@ class ExtractedContentPanel extends javax.swing.JPanel {
} }
} }
}); });
setSources(Collections.EMPTY_LIST); setSources(Collections.EMPTY_LIST);
} }
@ -196,7 +191,6 @@ class ExtractedContentPanel extends javax.swing.JPanel {
private void setPanelText(String text) { private void setPanelText(String text) {
extractedTextPane.setText(text); extractedTextPane.setText(text);
extractedTextPane.setCaretPosition(0); extractedTextPane.setCaretPosition(0);
logger.log(Level.INFO, extractedTextPane.getText());
} }
private void initControls() { private void initControls() {
@ -204,31 +198,9 @@ class ExtractedContentPanel extends javax.swing.JPanel {
hitNextButton.setEnabled(false); hitNextButton.setEnabled(false);
} }
/**
* public void scrollToAnchor(String anchor) {
* @param offset to scroll to extractedTextPane.scrollToReference(anchor);
*/
public void scrollTo(int offset) {
//extractedTextPane.setCaretPosition(offset);
//
JViewport viewport = (JViewport) SwingUtilities.getAncestorOfClass(JViewport.class, extractedTextPane);
if (viewport == null) {
return;
}
int height = viewport.getExtentSize().height;
try {
Rectangle viewRectangle = extractedTextPane.modelToView(offset);
if (viewRectangle == null) {
return;
}
int y = viewRectangle.y - height / 2;
y = Math.max(0, y);
y = Math.min(y, extractedTextPane.getHeight() - height);
viewport.setViewPosition(new Point(0, y));
} catch (javax.swing.text.BadLocationException ex) {
logger.log(Level.WARNING, "Failed scrolling to index " + offset);
}
} }
/** /**

View File

@ -48,7 +48,7 @@ public class ExtractedContentViewer implements DataContentViewer {
private ExtractedContentFind find; private ExtractedContentFind find;
public ExtractedContentViewer() { public ExtractedContentViewer() {
find = new ExtractedContentFind(this); find = new ExtractedContentFind();
} }
@Override @Override
@ -93,9 +93,14 @@ public class ExtractedContentViewer implements DataContentViewer {
} }
@Override @Override
public String getSearchToken() { public String getAnchorPrefix() {
return ""; return "";
} }
@Override
public int getNumberHits() {
return 0;
}
}); });
} }
@ -202,10 +207,9 @@ public class ExtractedContentViewer implements DataContentViewer {
MarkupSource source = panel.getSelectedSource(); MarkupSource source = panel.getSelectedSource();
if (find.hasNext(source)) { if (find.hasNext(source)) {
long indexVal = find.getNext(source); long indexVal = find.getNext(source);
logger.log(Level.INFO, "INDEX NEXT: " + indexVal);
//scroll //scroll
panel.scrollTo((int)indexVal); panel.scrollToAnchor(source.getAnchorPrefix() + Long.toString(indexVal));
//update display //update display
panel.updateCurrentDisplay(find.getCurrentIndexI(source) + 1); panel.updateCurrentDisplay(find.getCurrentIndexI(source) + 1);
@ -229,10 +233,10 @@ public class ExtractedContentViewer implements DataContentViewer {
MarkupSource source = panel.getSelectedSource(); MarkupSource source = panel.getSelectedSource();
if (find.hasPrevious(source)) { if (find.hasPrevious(source)) {
long indexVal = find.getPrevious(source); long indexVal = find.getPrevious(source);
logger.log(Level.INFO, "INDEX PREVIOUS: " + indexVal);
//scroll
panel.scrollTo((int)indexVal);
//scroll
panel.scrollToAnchor(source.getAnchorPrefix() + Long.toString(indexVal));
//update display //update display
panel.updateCurrentDisplay(find.getCurrentIndexI(source) + 1); panel.updateCurrentDisplay(find.getCurrentIndexI(source) + 1);
panel.updateTotalDisplay(find.getCurrentIndexTotal(source)); panel.updateTotalDisplay(find.getCurrentIndexTotal(source));
@ -253,10 +257,10 @@ public class ExtractedContentViewer implements DataContentViewer {
@Override @Override
public void actionPerformed(ActionEvent e) { public void actionPerformed(ActionEvent e) {
MarkupSource source = panel.getSelectedSource(); MarkupSource source = panel.getSelectedSource();
//setup find buttons //setup find controls
if (source != null && source.isSearchable()) { if (source != null && source.isSearchable()) {
find.init(source);
panel.updateCurrentDisplay(find.getCurrentIndexI(source) + 1); panel.updateCurrentDisplay(find.getCurrentIndexI(source) + 1);
panel.updateTotalDisplay(find.getCurrentIndexTotal(source)); panel.updateTotalDisplay(find.getCurrentIndexTotal(source));

View File

@ -19,6 +19,7 @@
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import java.util.List; import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
@ -35,10 +36,12 @@ class HighlightedMatchesSource implements MarkupSource {
private static final Logger logger = Logger.getLogger(HighlightedMatchesSource.class.getName()); private static final Logger logger = Logger.getLogger(HighlightedMatchesSource.class.getName());
private static final String HIGHLIGHT_PRE = "<span style=\"background:yellow\">"; private static final String HIGHLIGHT_PRE = "<span style=\"background:yellow\">";
private static final String HIGHLIGHT_POST = "</span>"; private static final String HIGHLIGHT_POST = "</span>";
private static final String ANCHOR_PREFIX = HighlightedMatchesSource.class.getName() + "_";
Content content; private Content content;
String solrQuery; private String solrQuery;
Core solrCore; private Core solrCore;
private int numberHits;
HighlightedMatchesSource(Content content, String solrQuery) { HighlightedMatchesSource(Content content, String solrQuery) {
this(content, solrQuery, KeywordSearch.getServer().getCore()); this(content, solrQuery, KeywordSearch.getServer().getCore());
@ -68,7 +71,9 @@ class HighlightedMatchesSource implements MarkupSource {
return "<span style=\"background:red\">No matches in content.</span>"; return "<span style=\"background:red\">No matches in content.</span>";
} else { } else {
// extracted content (minus highlight tags) is HTML-escaped // extracted content (minus highlight tags) is HTML-escaped
return "<pre>" + contentHighlights.get(0).trim() + "</pre>"; String highlightedContent = contentHighlights.get(0).trim();
highlightedContent = insertAnchors(highlightedContent);
return "<pre>" + highlightedContent + "</pre>";
} }
} catch (SolrServerException ex) { } catch (SolrServerException ex) {
throw new RuntimeException(ex); throw new RuntimeException(ex);
@ -86,7 +91,35 @@ class HighlightedMatchesSource implements MarkupSource {
} }
@Override @Override
public String getSearchToken() { public String getAnchorPrefix() {
return HIGHLIGHT_PRE; return ANCHOR_PREFIX;
}
@Override
public int getNumberHits() {
return numberHits;
}
private String insertAnchors(String searchableContent) {
int searchOffset = 0;
int index = -1;
StringBuilder buf = new StringBuilder(searchableContent);
final String searchToken = HIGHLIGHT_PRE;
final int indexSearchTokLen = searchToken.length();
final String insertPre = "<a name=\"" + ANCHOR_PREFIX;
final String insertPost = "\"></a>";
int count = 0;
while ((index = buf.indexOf(searchToken, searchOffset)) >= 0) {
String insertString = insertPre + Integer.toString(count) + insertPost;
int insertStringLen = insertString.length();
buf.insert(index, insertString);
searchOffset = index + indexSearchTokLen + insertStringLen ; //next offset past this anchor
++count;
}
this.numberHits = count;
return buf.toString();
} }
} }

View File

@ -37,11 +37,16 @@ public interface MarkupSource {
boolean isSearchable(); boolean isSearchable();
/** /**
* If searchable markup, returns search token, otherwise return empty string * If searchable markup, returns prefix of anchor, otherwise return empty string
* TODO pull up into SearchableMarkupSource abstract class
* @return * @return
*/ */
String getSearchToken(); String getAnchorPrefix();
/**
* if searchable markup, returns number of hits found and encoded in the markup
* @return
*/
int getNumberHits();
/** /**
* @return title of markup source * @return title of markup source