finish implementing all the cases for highlighting

This commit is contained in:
millmanorama 2017-02-15 13:48:43 +01:00
parent edd03a66c1
commit 6abe26d6de
5 changed files with 102 additions and 156 deletions

View File

@ -110,15 +110,13 @@ public class ExtractedContentViewer implements DataContentViewer {
BlackboardArtifact artifact = nodeLookup.lookup(BlackboardArtifact.class); BlackboardArtifact artifact = nodeLookup.lookup(BlackboardArtifact.class);
if (hits != null) { if (hits != null) {
highlightedHitText = new HighlightedText(content.getId(), hits); highlightedHitText = new HighlightedText(content.getId(), hits);
} else { } else if (artifact != null && artifact.getArtifactTypeID()
if (artifact != null && artifact.getArtifactTypeID() == BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT.getTypeID()) {
== BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT.getTypeID()) { // if the artifact is an account artifact, get an account text .
// if the artifact is an account artifact, get an account text . highlightedHitText = getAccountsText(content, nodeLookup);
highlightedHitText = getAccountsText(content, nodeLookup); } else if (artifact != null && artifact.getArtifactTypeID()
} else if (artifact != null && artifact.getArtifactTypeID() == BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT.getTypeID()) {
== BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT.getTypeID()) { highlightedHitText = new HighlightedText(artifact);
highlightedHitText = new HighlightedText(artifact);
}
} }
if (highlightedHitText != null) { if (highlightedHitText != null) {
indexedTextSources.add(highlightedHitText); indexedTextSources.add(highlightedHitText);
@ -298,16 +296,16 @@ public class ExtractedContentViewer implements DataContentViewer {
return false; return false;
} }
/** // /**
* Is there any marked up indexed text in the look up of this node? This // * Is there any marked up indexed text in the look up of this node? This
* will be the case if the node is for a keyword hit artifact produced // * will be the case if the node is for a keyword hit artifact produced
* by either an ad hoc keyword search result (keyword search toolbar // * by either an ad hoc keyword search result (keyword search toolbar
* widgets) or a keyword search by the keyword search ingest module. // * widgets) or a keyword search by the keyword search ingest module.
*/ // */
Collection<? extends IndexedText> sources = node.getLookup().lookupAll(IndexedText.class); // Collection<? extends IndexedText> sources = node.getLookup().lookupAll(IndexedText.class);
if (sources.isEmpty() == false) { // if (sources.isEmpty() == false) {
return true; // return true;
} // }
/* /*
* Is there a credit card artifact in the lookup * Is there a credit card artifact in the lookup
@ -315,7 +313,9 @@ public class ExtractedContentViewer implements DataContentViewer {
Collection<? extends BlackboardArtifact> artifacts = node.getLookup().lookupAll(BlackboardArtifact.class); Collection<? extends BlackboardArtifact> artifacts = node.getLookup().lookupAll(BlackboardArtifact.class);
if (artifacts != null) { if (artifacts != null) {
for (BlackboardArtifact art : artifacts) { for (BlackboardArtifact art : artifacts) {
if (art.getArtifactTypeID() == BlackboardArtifact.ARTIFACT_TYPE.TSK_ACCOUNT.getTypeID()) { final int artifactTypeID = art.getArtifactTypeID();
if (artifactTypeID == TSK_ACCOUNT.getTypeID()
|| artifactTypeID == TSK_KEYWORD_HIT.getTypeID()) {
return true; return true;
} }
} }

View File

@ -18,7 +18,6 @@
*/ */
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import com.ibm.icu.text.UnicodeSet;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -28,7 +27,6 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
import java.util.function.Function;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringEscapeUtils;
@ -40,14 +38,13 @@ import org.apache.solr.common.SolrDocumentList;
import org.openide.util.Exceptions; import org.openide.util.Exceptions;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.openide.util.NbBundle.Messages; import org.openide.util.NbBundle.Messages;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil;
import org.sleuthkit.autopsy.coreutils.Version; import org.sleuthkit.autopsy.coreutils.Version;
import org.sleuthkit.autopsy.keywordsearch.KeywordQueryFilter.FilterType; import org.sleuthkit.autopsy.keywordsearch.KeywordQueryFilter.FilterType;
import org.sleuthkit.autopsy.keywordsearch.KeywordSearch.QueryType;
import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.TskCoreException; import org.sleuthkit.datamodel.TskCoreException;
/** /**
@ -89,6 +86,7 @@ class HighlightedText implements IndexedText {
private boolean isPageInfoLoaded = false; private boolean isPageInfoLoaded = false;
private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT); private static final boolean DEBUG = (Version.getBuildType() == Version.Type.DEVELOPMENT);
private BlackboardArtifact artifact; private BlackboardArtifact artifact;
private KeywordSearch.QueryType qt;
/** /**
* This constructor is used when keyword hits are accessed from the ad-hoc * This constructor is used when keyword hits are accessed from the ad-hoc
@ -123,24 +121,51 @@ class HighlightedText implements IndexedText {
} }
private void loadPageInfoFromArtifact() throws TskCoreException, NumberFormatException { private void loadPageInfoFromArtifact() throws TskCoreException, NumberFormatException {
final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
this.keywords.add(keyword);
KeywordSearch.QueryType qt = KeywordSearch.QueryType.values()[artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE).getValueInt()]; final BlackboardAttribute qtAttribute = artifact.getAttribute(TSK_KEYWORD_SEARCH_TYPE);
this.keywords.add(artifact.getAttribute(TSK_KEYWORD).getValueString());
String chunkIDsString = artifact.getAttribute(TSK_KEYWORD_HIT_DOCUMENT_IDS).getValueString();
Set<String> chunkIDs = Arrays.stream(chunkIDsString.split(",")).map(StringUtils::strip).collect(Collectors.toSet());
for (String solrDocumentId : chunkIDs) {
int chunkID;
final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR);
if (-1 != separatorIndex) {
chunkID = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1)); qt = (qtAttribute != null)
} else { ? KeywordSearch.QueryType.values()[qtAttribute.getValueInt()] : null;
chunkID = 0; final BlackboardAttribute docIDsArtifact = artifact.getAttribute(TSK_KEYWORD_HIT_DOCUMENT_IDS);
if (qt == QueryType.REGEX && docIDsArtifact != null) {
//regex search records the chunks in the artifact
String chunkIDsString = docIDsArtifact.getValueString();
Set<String> chunkIDs = Arrays.stream(chunkIDsString.split(",")).map(StringUtils::strip).collect(Collectors.toSet());
for (String solrDocumentId : chunkIDs) {
int chunkID;
final int separatorIndex = solrDocumentId.indexOf(Server.CHUNK_ID_SEPARATOR);
if (-1 != separatorIndex) {
chunkID = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1));
} else {
chunkID = 0;
}
pages.add(chunkID);
numberOfHitsPerPage.put(chunkID, 0);
currentHitPerPage.put(chunkID, 0);
}
this.currentPage = pages.stream().sorted().findFirst().orElse(1);
isPageInfoLoaded = true;
} else {
/*
* non-regex searches don't record the chunks in the artifacts, so
* we need to look them up
*/
Keyword keywordQuery = new Keyword(keyword, true);
KeywordSearchQuery chunksQuery
= new LuceneQuery(new KeywordList(Arrays.asList(keywordQuery)), keywordQuery);
chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId));
try {
hits = chunksQuery.performQuery();
loadPageInfoFromHits();
} catch (KeywordSearchModuleException | NoOpenCoreException ex) {
logger.log(Level.SEVERE, "Could not perform the query to get chunk info and get highlights:" + keywordQuery.getSearchTerm(), ex); //NON-NLS
MessageNotifyUtil.Notify.error(Bundle.HighlightedText_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage());
} }
pages.add(chunkID);
numberOfHitsPerPage.put(chunkID, 0);
currentHitPerPage.put(chunkID, 0);
} }
} }
@ -154,45 +179,6 @@ class HighlightedText implements IndexedText {
* *
* @return * @return
*/ */
static private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, Content content) {
if (literal_query) {
//literal, treat as non-regex, non-term component query
return constructEscapedSolrQuery(query.getQueryString());
} else //construct a Solr query using aggregated terms to get highlighting
//the query is executed later on demand
{
if (queryResults.getKeywords().size() == 1) {
//simple case, no need to process subqueries and do special escaping
Keyword keyword = queryResults.getKeywords().iterator().next();
return constructEscapedSolrQuery(keyword.getSearchTerm());
} else {
//find terms for this content hit
List<Keyword> hitTerms = new ArrayList<>();
for (Keyword keyword : queryResults.getKeywords()) {
for (KeywordHit hit : queryResults.getResults(keyword)) {
if (hit.getContent().equals(content)) {
hitTerms.add(keyword);
break; //go to next term
}
}
}
StringBuilder highlightQuery = new StringBuilder();
final int lastTerm = hitTerms.size() - 1;
int curTerm = 0;
for (Keyword term : hitTerms) {
//escape subqueries, MAKE SURE they are not escaped again later
highlightQuery.append(constructEscapedSolrQuery(term.getSearchTerm()));
if (lastTerm != curTerm) {
highlightQuery.append(" "); //acts as OR ||
}
++curTerm;
}
return highlightQuery.toString();
}
}
}
/** /**
* Constructs a complete, escaped Solr query that is ready to be used. * Constructs a complete, escaped Solr query that is ready to be used.
@ -236,9 +222,7 @@ class HighlightedText implements IndexedText {
*/ loadPageInfoFromArtifact(); */ loadPageInfoFromArtifact();
} else if (hasChunks) { } else if (hasChunks) {
// if the file has chunks, get pages with hits, sorted // if the file has chunks, get pages with hits, sorted
if (loadPageInfoFromHits()) { loadPageInfoFromHits();
//JMTOD: look at error handeling and return values...
}
} else { } else {
//non-regex, no chunks //non-regex, no chunks
this.numberPages = 1; this.numberPages = 1;
@ -246,29 +230,12 @@ class HighlightedText implements IndexedText {
numberOfHitsPerPage.put(1, 0); numberOfHitsPerPage.put(1, 0);
pages.add(1); pages.add(1);
currentHitPerPage.put(1, 0); currentHitPerPage.put(1, 0);
isPageInfoLoaded = true;
} }
isPageInfoLoaded = true;
} }
private boolean loadPageInfoFromHits() { private void loadPageInfoFromHits() {
// /*
// * If this is being called from the artifacts / dir tree, then we need
// * to perform the search to get the highlights.
// */
// if (hits == null) {
//
// Keyword keywordQuery = new Keyword(keywordHitQuery, true);
// KeywordSearchQuery chunksQuery
// = new LuceneQuery(new KeywordList(Arrays.asList(keywordQuery)), keywordQuery);
// chunksQuery.addFilter(new KeywordQueryFilter(FilterType.CHUNK, this.objectId));
// try {
// hits = chunksQuery.performQuery();
// } catch (KeywordSearchModuleException | NoOpenCoreException ex) {
// logger.log(Level.SEVERE, "Could not perform the query to get chunk info and get highlights:" + keywordQuery.getSearchTerm(), ex); //NON-NLS
// MessageNotifyUtil.Notify.error(Bundle.HighlightedText_query_exception_msg() + keywordQuery.getSearchTerm(), ex.getCause().getMessage());
// return true;
// }
//// }
//organize the hits by page, filter as needed //organize the hits by page, filter as needed
TreeSet<Integer> pagesSorted = new TreeSet<>(); TreeSet<Integer> pagesSorted = new TreeSet<>();
@ -277,8 +244,9 @@ class HighlightedText implements IndexedText {
int chunkID = hit.getChunkId(); int chunkID = hit.getChunkId();
if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) { if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
pagesSorted.add(chunkID); pagesSorted.add(chunkID);
if (StringUtils.isNotBlank(hit.getHit())) {
this.keywords.add(hit.getHit()); this.keywords.add(hit.getHit());
}
} }
} }
} }
@ -293,7 +261,7 @@ class HighlightedText implements IndexedText {
pages.add(page); pages.add(page);
currentHitPerPage.put(page, 0); //set current hit to 0th currentHitPerPage.put(page, 0); //set current hit to 0th
} }
return false; isPageInfoLoaded = true;
} }
@Override @Override
@ -410,26 +378,29 @@ class HighlightedText implements IndexedText {
} }
final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr); final String filterQuery = Server.Schema.ID.toString() + ":" + KeywordSearchUtil.escapeLuceneQuery(contentIdStr);
// if (isRegex) { if (artifact != null && qt == QueryType.REGEX) {
q.setQuery(filterQuery); q.setQuery(filterQuery);
q.addField(Server.Schema.CONTENT_STR.toString()); q.addField(Server.Schema.CONTENT_STR.toString());
// } else { } else {
// // input query has already been properly constructed and escaped final String highlightQuery = keywords.stream()
// q.setQuery(keywordHitQuery); .map(HighlightedText::constructEscapedSolrQuery)
// q.addField(Server.Schema.TEXT.toString()); .collect(Collectors.joining(" "));
// q.addFilterQuery(filterQuery);
// q.addHighlightField(LuceneQuery.HIGHLIGHT_FIELD); q.setQuery(highlightQuery);
// q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH q.addField(Server.Schema.TEXT.toString());
// q.addFilterQuery(filterQuery);
// //tune the highlighter q.addHighlightField(LuceneQuery.HIGHLIGHT_FIELD);
// q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS q.setHighlightFragsize(0); // don't fragment the highlight, works with original highlighter, or needs "single" list builder with FVH
// q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
// q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS //tune the highlighter
// q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS q.setParam("hl.useFastVectorHighlighter", "on"); //fast highlighter scales better than standard one NON-NLS
// q.setParam("hl.tag.pre", HIGHLIGHT_PRE); //makes sense for FastVectorHighlighter only NON-NLS
// //docs says makes sense for the original Highlighter only, but not really q.setParam("hl.tag.post", HIGHLIGHT_POST); //makes sense for FastVectorHighlighter only NON-NLS
// q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS q.setParam("hl.fragListBuilder", "single"); //makes sense for FastVectorHighlighter only NON-NLS
// }
//docs says makes sense for the original Highlighter only, but not really
q.setParam("hl.maxAnalyzedChars", Server.HL_ANALYZE_CHARS_UNLIMITED); //NON-NLS
}
try { try {
QueryResponse response = solrServer.query(q, METHOD.POST); QueryResponse response = solrServer.query(q, METHOD.POST);
@ -532,8 +503,7 @@ class HighlightedText implements IndexedText {
for (String unquotedKeyword : keywords) { for (String unquotedKeyword : keywords) {
int textOffset = 0; int textOffset = 0;
int hitOffset; int hitOffset;
while ((hitOffset = StringUtils.indexOfIgnoreCase(text, unquotedKeyword, textOffset)) != -1) {
while ((hitOffset = text.indexOf(unquotedKeyword, textOffset)) != -1) {
// Append the portion of text up to (but not including) the hit. // Append the portion of text up to (but not including) the hit.
highlightedText.append(text.substring(textOffset, hitOffset)); highlightedText.append(text.substring(textOffset, hitOffset));
// Add in the highlighting around the keyword. // Add in the highlighting around the keyword.
@ -542,12 +512,11 @@ class HighlightedText implements IndexedText {
highlightedText.append(HIGHLIGHT_POST); highlightedText.append(HIGHLIGHT_POST);
// Advance the text offset past the keyword. // Advance the text offset past the keyword.
textOffset = hitOffset + unquotedKeyword.length() + 1; textOffset = hitOffset + unquotedKeyword.length();
} }
// Append the remainder of text field
highlightedText.append(text.substring(textOffset, text.length()));
if (highlightedText.length() > 0) { if (highlightedText.length() > 0) {
// Append the remainder of text field and return.
highlightedText.append(text.substring(textOffset, text.length()));
} else { } else {
return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg"); return NbBundle.getMessage(this.getClass(), "HighlightedMatchesSource.getMarkup.noMatchMsg");

View File

@ -45,7 +45,7 @@ import org.sleuthkit.datamodel.File;
*/ */
class KeywordSearchFilterNode extends FilterNode { class KeywordSearchFilterNode extends FilterNode {
KeywordSearchFilterNode(HighlightedText highlights, Node original) { KeywordSearchFilterNode(QueryResults highlights, Node original) {
super(original, null, new ProxyLookup(Lookups.singleton(highlights), original.getLookup())); super(original, null, new ProxyLookup(Lookups.singleton(highlights), original.getLookup()));
} }

View File

@ -147,7 +147,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
int hitNumber = 0; int hitNumber = 0;
List<KeyValueQueryContent> tempList = new ArrayList<>(); List<KeyValueQueryContent> tempList = new ArrayList<>();
// final SetMultimap<Long, KeywordHit> orgnizeByDocID = orgnizeByDocID(queryResults);
for (KeywordHit hit : getOneHitPerObject(queryResults)) { for (KeywordHit hit : getOneHitPerObject(queryResults)) {
/** /**
@ -169,12 +168,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
properties.put(TSK_KEYWORD_PREVIEW.getDisplayName(), hit.getSnippet()); properties.put(TSK_KEYWORD_PREVIEW.getDisplayName(), hit.getSnippet());
} }
//JMTODO: I don't understand this comment or the below code...
//@@@ USE ConentHit in UniqueFileMap instead of the below search
//get unique match result files
// BC: @@@ THis is really ineffecient. We should keep track of this when
// we flattened the list of files to the unique files.
// final String highlightQueryEscaped = getHighlightQuery(queryRequest, queryRequest.isLiteral(), queryResults, content);
String hitName = hit.isArtifactHit() String hitName = hit.isArtifactHit()
? hit.getArtifact().getDisplayName() + " Artifact" //NON-NLS ? hit.getArtifact().getDisplayName() + " Artifact" //NON-NLS
: contentName; : contentName;
@ -220,18 +213,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
return hits.values(); return hits.values();
} }
SetMultimap<Long, KeywordHit> orgnizeByDocID(QueryResults queryResults) {
SetMultimap<Long, KeywordHit> hits = TreeMultimap.create(Long::compare, Comparator.comparing(KeywordHit::getChunkId));
for (Keyword keyWord : queryResults.getKeywords()) {
for (KeywordHit hit : queryResults.getResults(keyWord)) {
hits.put(hit.getSolrObjectId(), hit);
}
}
return hits;
}
@Override @Override
protected Node createNodeForKey(KeyValueQueryContent key) { protected Node createNodeForKey(KeyValueQueryContent key) {
final Content content = key.getContent(); final Content content = key.getContent();
@ -240,9 +221,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content)); Node kvNode = new KeyValueNode(key, Children.LEAF, Lookups.singleton(content));
//wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization //wrap in KeywordSearchFilterNode for the markup content, might need to override FilterNode for more customization
// store the data in HighlightedMatchesSource so that it can be looked up (in content viewer) return new KeywordSearchFilterNode(hits, kvNode);
HighlightedText highlights = new HighlightedText(key.getSolrObjectId(), hits);
return new KeywordSearchFilterNode(highlights, kvNode);
} }
/** /**
@ -277,8 +256,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
this.hits = hits; this.hits = hits;
this.query = query; this.query = query;
// boolean isRegex = hits.getQuery().isLiteral() == false;
// this.chunkIDs = chunkIDs;
} }
Content getContent() { Content getContent() {

View File

@ -239,7 +239,7 @@ class LuceneQuery implements KeywordSearchQuery {
for (Object content_obj : content) { for (Object content_obj : content) {
String content_str = (String) content_obj; String content_str = (String) content_obj;
//for new schemas, check that the hit is before the chunk/window boundary. //for new schemas, check that the hit is before the chunk/window boundary.
int firstOccurence = StringUtils.indexOf(content_str.toLowerCase(), strippedQueryString.toLowerCase()); int firstOccurence = StringUtils.indexOfIgnoreCase(content_str, strippedQueryString);
//there is no chunksize field for "parent" entries in the index //there is no chunksize field for "parent" entries in the index
if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) { if (chunkSize == null || chunkSize == 0 || (firstOccurence > -1 && firstOccurence < chunkSize)) {
matches.add(createKeywordtHit(highlightResponse, docId)); matches.add(createKeywordtHit(highlightResponse, docId));
@ -324,7 +324,7 @@ class LuceneQuery implements KeywordSearchQuery {
} }
} }
return new KeywordHit(docId, snippet); return new KeywordHit(docId, snippet, keywordString);
} }
/** /**