use regex query to set up paging of accounts if the document id isn't in the artifact

This commit is contained in:
millmanorama 2017-04-20 17:31:11 +02:00
parent b4c3a4922c
commit 12d883a74b
2 changed files with 52 additions and 34 deletions

View File

@ -18,23 +18,19 @@
*/ */
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterators; import com.google.common.collect.Iterators;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.Optional;
import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.TreeSet;
import java.util.logging.Level; import java.util.logging.Level;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.GuardedBy;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrRequest.METHOD; import org.apache.solr.client.solrj.SolrRequest.METHOD;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
@ -69,6 +65,7 @@ class AccountsText implements IndexedText {
private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID); private static final BlackboardAttribute.Type TSK_KEYWORD_SEARCH_DOCUMENT_ID = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_SEARCH_DOCUMENT_ID);
private static final BlackboardAttribute.Type TSK_CARD_NUMBER = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER); private static final BlackboardAttribute.Type TSK_CARD_NUMBER = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_CARD_NUMBER);
private static final BlackboardAttribute.Type TSK_KEYWORD = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD); private static final BlackboardAttribute.Type TSK_KEYWORD = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD);
private static final BlackboardAttribute.Type TSK_KEYWORD_REGEXP = new BlackboardAttribute.Type(BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP);
private static final String FIELD = Server.Schema.CONTENT_STR.toString(); private static final String FIELD = Server.Schema.CONTENT_STR.toString();
@ -89,7 +86,8 @@ class AccountsText implements IndexedText {
*/ */
private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>(); private final TreeMap<Integer, Integer> numberOfHitsPerPage = new TreeMap<>();
/* /*
* set of pages, used for iterating back and forth. Only stores pages with hits * set of pages, used for iterating back and forth. Only stores pages with
* hits
*/ */
private final Set<Integer> pages = numberOfHitsPerPage.keySet(); private final Set<Integer> pages = numberOfHitsPerPage.keySet();
/* /*
@ -142,7 +140,7 @@ class AccountsText implements IndexedText {
@NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.") @NbBundle.Messages("AccountsText.nextPage.exception.msg=No next page.")
public int nextPage() { public int nextPage() {
if (hasNextPage()) { if (hasNextPage()) {
currentPage =Iterators.get(pages.iterator(),getIndexOfCurrentPage() + 1); currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() + 1);
return currentPage; return currentPage;
} else { } else {
throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg()); throw new IllegalStateException(Bundle.AccountsText_nextPage_exception_msg());
@ -153,7 +151,7 @@ class AccountsText implements IndexedText {
@NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.") @NbBundle.Messages("AccountsText.previousPage.exception.msg=No previous page.")
public int previousPage() { public int previousPage() {
if (hasPreviousPage()) { if (hasPreviousPage()) {
currentPage = Iterators.get(pages.iterator(),getIndexOfCurrentPage() - 1); currentPage = Iterators.get(pages.iterator(), getIndexOfCurrentPage() - 1);
return currentPage; return currentPage;
} else { } else {
throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg()); throw new IllegalStateException(Bundle.AccountsText_previousPage_exception_msg());
@ -215,7 +213,7 @@ class AccountsText implements IndexedText {
* Initialize this object with information about which pages/chunks have * Initialize this object with information about which pages/chunks have
* hits. Multiple calls will not change the initial results. * hits. Multiple calls will not change the initial results.
*/ */
synchronized private void loadPageInfo() throws IllegalStateException, TskCoreException { synchronized private void loadPageInfo() throws IllegalStateException, TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
if (isPageInfoLoaded) { if (isPageInfoLoaded) {
return; return;
} }
@ -236,31 +234,55 @@ class AccountsText implements IndexedText {
isPageInfoLoaded = true; isPageInfoLoaded = true;
} }
private void addToPagingInfo(BlackboardArtifact artifact) throws IllegalStateException, TskCoreException { /**
* Load the paging info from the QueryResults object.
*/
synchronized private void loadPageInfoFromHits(QueryResults hits) {
//organize the hits by page, filter as needed
for (Keyword k : hits.getKeywords()) {
for (KeywordHit hit : hits.getResults(k)) {
int chunkID = hit.getChunkId();
if (chunkID != 0 && this.solrObjectId == hit.getSolrObjectId()) {
String hit1 = hit.getHit();
if (accountNumbers.stream().anyMatch(hit1::contains)) {
numberOfHitsPerPage.put(chunkID, 0); //unknown number of matches in the page
currentHitPerPage.put(chunkID, 0); //set current hit to 0th
}
}
}
}
}
private void addToPagingInfo(BlackboardArtifact artifact) throws IllegalStateException, TskCoreException, KeywordSearchModuleException, NoOpenCoreException {
if (solrObjectId != artifact.getObjectID()) { if (solrObjectId != artifact.getObjectID()) {
throw new IllegalStateException("not all artifacts are from the same object!"); throw new IllegalStateException("not all artifacts are from the same object!");
} }
final String keyword = artifact.getAttribute(TSK_KEYWORD).getValueString();
this.accountNumbers.add(keyword);
accountNumbers.add(artifact.getAttribute(TSK_CARD_NUMBER).getValueString()); accountNumbers.add(artifact.getAttribute(TSK_CARD_NUMBER).getValueString());
final BlackboardAttribute keywordAttribute = artifact.getAttribute(TSK_KEYWORD);
if (keywordAttribute != null) {
accountNumbers.add(keywordAttribute.getValueString());
}
List<String> rawDocIDs = new ArrayList<>();
final BlackboardAttribute docID = artifact.getAttribute(TSK_KEYWORD_SEARCH_DOCUMENT_ID); Optional<Integer> chunkID =
if (docID != null) { Optional.ofNullable(artifact.getAttribute(TSK_KEYWORD_SEARCH_DOCUMENT_ID))
rawDocIDs.add(docID.getValueString()); .map(BlackboardAttribute::getValueString)
} .map(String::trim)
.map(kwsdocID -> StringUtils.substringAfterLast(kwsdocID, Server.CHUNK_ID_SEPARATOR))
.map(Integer::valueOf);
rawDocIDs.stream() if (chunkID.isPresent()) {
.map(String::trim) numberOfHitsPerPage.put(chunkID.get(), 0);
.map(t -> StringUtils.substringAfterLast(t, Server.CHUNK_ID_SEPARATOR)) currentHitPerPage.put(chunkID.get(), 0);
.map(Integer::valueOf) } else {
.forEach(chunkID -> { // Run a query to figure out which chunks for the current object have
numberOfHitsPerPage.put(chunkID, 0); // hits for this keyword.
currentHitPerPage.put(chunkID, 0); Keyword keywordQuery = new Keyword("(%?)(B?)([0-9][ \\-]*?){12,19}(\\^?)", false, false, "", keyword);
}); KeywordSearchQuery chunksQuery = KeywordSearchUtil.getQueryForKeyword(keywordQuery, new KeywordList(Arrays.asList(keywordQuery)));
chunksQuery.addFilter(new KeywordQueryFilter(KeywordQueryFilter.FilterType.CHUNK, this.solrObjectId));
QueryResults hits = chunksQuery.performQuery();
loadPageInfoFromHits(hits);
}
} }
@Override @Override
@ -288,8 +310,8 @@ class AccountsText implements IndexedText {
QueryResponse queryResponse = solrServer.query(q, METHOD.POST); QueryResponse queryResponse = solrServer.query(q, METHOD.POST);
String highlightedText String highlightedText =
= HighlightedText.attemptManualHighlighting( HighlightedText.attemptManualHighlighting(
queryResponse.getResults(), queryResponse.getResults(),
Server.Schema.CONTENT_STR.toString(), Server.Schema.CONTENT_STR.toString(),
accountNumbers accountNumbers

View File

@ -206,16 +206,12 @@ class HighlightedText implements IndexedText {
for (KeywordHit hit : hits.getResults(k)) { for (KeywordHit hit : hits.getResults(k)) {
int chunkID = hit.getChunkId(); int chunkID = hit.getChunkId();
if (artifact != null) { if (artifact != null) {
if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) { if (chunkID != 0 && this.objectId == hit.getSolrObjectId()) {
String hit1 = hit.getHit(); String hit1 = hit.getHit();
if (keywords.stream().anyMatch(hit1::contains)) { if (keywords.stream().anyMatch(hit1::contains)) {
numberOfHitsPerPage.put(chunkID, 0); //unknown number of matches in the page numberOfHitsPerPage.put(chunkID, 0); //unknown number of matches in the page
currentHitPerPage.put(chunkID, 0); //set current hit to 0th currentHitPerPage.put(chunkID, 0); //set current hit to 0th
// if (StringUtils.isNotBlank(hit.getHit())) {
// this.keywords.add(hit.getHit());
// }
} }
} }
} else { } else {