Interim checkin: first draft of artifact keyword search hit artifact generation

This commit is contained in:
Richard Cordovano 2015-01-07 17:04:41 -05:00
parent dfe4c27d30
commit b992035188
3 changed files with 206 additions and 189 deletions

View File

@ -18,45 +18,80 @@
*/ */
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskCoreException;
/** /**
* Stores the fact that file or an artifact associated had a keyword hit. * Stores the fact that file or an artifact had a keyword hit.
* <p>
* Instances of this class are immutable, so they are thread-safe.
*/ */
class KeywordHit { class KeywordHit {
private final String documentId; private final String solrDocumentId;
private final long objectId; private final long objectId;
private final int chunkId; private final int chunkId;
private final String snippet; private final String snippet;
private final AbstractFile file;
BlackboardArtifact artifact;
KeywordHit(String documentId, String snippet) { KeywordHit(String solrDocumentId, String snippet) throws TskCoreException {
this.documentId = documentId; /**
final int separatorIndex = documentId.indexOf(Server.ID_CHUNK_SEP); * Store the Solr document id.
*/
this.solrDocumentId = solrDocumentId;
/**
* Parse the Solr document id to get the object id and chunk id. There
* will only be a chunk if the text in the object was divided into
* chunks.
*/
final int separatorIndex = solrDocumentId.indexOf(Server.ID_CHUNK_SEP);
if (separatorIndex != -1) { if (separatorIndex != -1) {
this.objectId = Long.parseLong(documentId.substring(0, separatorIndex)); this.objectId = Long.parseLong(solrDocumentId.substring(0, separatorIndex));
this.chunkId = Integer.parseInt(documentId.substring(separatorIndex + 1)); this.chunkId = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1));
} else { } else {
this.objectId = Long.parseLong(documentId); this.objectId = Long.parseLong(solrDocumentId);
this.chunkId = 0; this.chunkId = 0;
} }
this.snippet = snippet;
/**
* Look up the file associated with the keyword hit. If the high order
* bit of the object id is set, the hit was for an artifact. In this
* case, look up the artifact as well.
*/
SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase();
long fileId;
if (this.objectId < 0) {
long artifactId = this.objectId - 0x8000000000000000L;
this.artifact = caseDb.getBlackboardArtifact(artifactId);
fileId = artifact.getObjectID();
} else {
fileId = this.objectId;
}
this.file = caseDb.getAbstractFileById(fileId);
/**
* Store the text snippet.
*/
this.snippet = snippet;
} }
String getDocumentId() { String getSolrDocumentId() {
return this.documentId; return this.solrDocumentId;
} }
long getObjectId() { long getObjectId() {
return this.objectId; return this.objectId;
} }
int getChunkId() { boolean hasChunkId() {
return this.chunkId; return this.chunkId != 0;
} }
boolean isChunk() { int getChunkId() {
return this.chunkId != 0; return this.chunkId;
} }
boolean hasSnippet() { boolean hasSnippet() {
@ -67,8 +102,16 @@ class KeywordHit {
return this.snippet; return this.snippet;
} }
AbstractFile getFile() {
return this.file;
}
BlackboardArtifact getArtifact() {
return this.artifact;
}
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) { // RJCTODO: Fix
if (null == obj) { if (null == obj) {
return false; return false;
} }
@ -80,7 +123,7 @@ class KeywordHit {
} }
@Override @Override
public int hashCode() { public int hashCode() { // RJCTODO: Fix
int hash = 3; int hash = 3;
hash = 41 * hash + (int) this.objectId + this.chunkId; hash = 41 * hash + (int) this.objectId + this.chunkId;
return hash; return hash;

View File

@ -1,7 +1,7 @@
/* /*
* Autopsy Forensic Browser * Autopsy Forensic Browser
* *
* Copyright 2011-2014 Basis Technology Corp. * Copyright 2011-2015 Basis Technology Corp.
* Contact: carrier <at> sleuthkit <dot> org * Contact: carrier <at> sleuthkit <dot> org
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
@ -21,13 +21,14 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.awt.EventQueue; import java.awt.EventQueue;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.logging.Level; import java.util.logging.Level;
import org.openide.util.NbBundle; import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import javax.swing.SwingWorker; import javax.swing.SwingWorker;
@ -48,8 +49,6 @@ import org.sleuthkit.datamodel.AbstractFile;
import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute;
import org.sleuthkit.datamodel.Content; import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.FsContent;
import org.sleuthkit.datamodel.TskData.TSK_DB_FILES_TYPE_ENUM;
/** /**
* Node factory that performs the keyword search and creates children nodes for * Node factory that performs the keyword search and creates children nodes for
@ -65,23 +64,23 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
public static enum CommonPropertyTypes { public static enum CommonPropertyTypes {
KEYWORD { KEYWORD {
@Override @Override
public String toString() { public String toString() {
return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getDisplayName(); return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getDisplayName();
} }
}, },
REGEX { REGEX {
@Override @Override
public String toString() { public String toString() {
return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getDisplayName(); return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getDisplayName();
} }
}, },
CONTEXT { CONTEXT {
@Override @Override
public String toString() { public String toString() {
return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getDisplayName(); return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getDisplayName();
} }
}, },
} }
private Collection<QueryRequest> queryRequests; private Collection<QueryRequest> queryRequests;
private final DataResultTopComponent viewer; //viewer driving this child node factory private final DataResultTopComponent viewer; //viewer driving this child node factory
@ -92,7 +91,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
this.viewer = viewer; this.viewer = viewer;
} }
/** /**
* call this at least for the parent Node, to make sure all common * call this at least for the parent Node, to make sure all common
* properties are displayed as columns (since we are doing lazy child Node * properties are displayed as columns (since we are doing lazy child Node
@ -132,86 +130,64 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
initCommonProperties(map); initCommonProperties(map);
final String query = queryRequest.getQueryString(); final String query = queryRequest.getQueryString();
setCommonProperty(map, CommonPropertyTypes.KEYWORD, query); setCommonProperty(map, CommonPropertyTypes.KEYWORD, query);
setCommonProperty(map, CommonPropertyTypes.REGEX, Boolean.valueOf(!queryRequest.getQuery().isLiteral())); setCommonProperty(map, CommonPropertyTypes.REGEX, !queryRequest.getQuery().isLiteral());
createFlatKeys(queryRequest, toPopulate); createFlatKeys(queryRequest, toPopulate);
} }
return true; return true;
} }
/** /**
* *
* @param queryRequest * @param queryRequest
* @param toPopulate * @param toPopulate
* @return * @return
*/ */
protected boolean createFlatKeys(QueryRequest queryRequest, List<KeyValueQueryContent> toPopulate) { private boolean createFlatKeys(QueryRequest queryRequest, List<KeyValueQueryContent> toPopulate) {
/**
* Check the validity of the requested query.
*/
final KeywordSearchQuery keywordSearchQuery = queryRequest.getQuery(); final KeywordSearchQuery keywordSearchQuery = queryRequest.getQuery();
if (!keywordSearchQuery.validate()) { if (!keywordSearchQuery.validate()) {
//TODO mark the particular query node RED //TODO mark the particular query node RED
return false; return false;
} }
//execute the query and get fscontents matching /**
* Execute the requested query.
*/
QueryResults queryResults; QueryResults queryResults;
try { try {
queryResults = keywordSearchQuery.performQuery(); queryResults = keywordSearchQuery.performQuery();
} catch (NoOpenCoreException ex) { } catch (NoOpenCoreException ex) {
logger.log(Level.WARNING, "Could not perform the query. ", ex); //NON-NLS logger.log(Level.SEVERE, "Could not perform the query " + keywordSearchQuery.getQueryString(), ex); //NON-NLS
return false; return false;
} }
int id = 0;
String listName = queryRequest.getQuery().getKeywordList().getName();
final boolean literal_query = keywordSearchQuery.isLiteral();
int resID = 0;
List<KeyValueQueryContent> tempList = new ArrayList<>(); List<KeyValueQueryContent> tempList = new ArrayList<>();
final Map<AbstractFile, KeywordHit> uniqueFileMap = queryResults.getUniqueFiles(); for (KeywordHit hit : getOneHitPerObject(queryResults)) {
for (final AbstractFile f : uniqueFileMap.keySet()) { /**
* Get file properties.
*/
Map<String, Object> properties = new LinkedHashMap<>();
AbstractFile file = hit.getFile();
AbstractFsContentNode.fillPropertyMap(properties, file);
/**
* Add a snippet property, if available.
*/
if (hit.hasSnippet()) {
setCommonProperty(properties, CommonPropertyTypes.CONTEXT, hit.getSnippet());
}
//@@@ USE ConentHit in UniqueFileMap instead of the below search //@@@ USE ConentHit in UniqueFileMap instead of the below search
//get unique match result files //get unique match result files
Map<String, Object> resMap = new LinkedHashMap<>();
// BC: @@@ THis is really ineffecient. We should keep track of this when // BC: @@@ THis is really ineffecient. We should keep track of this when
// we flattened the list of files to the unique files. // we flattened the list of files to the unique files.
final String highlightQueryEscaped = getHighlightQuery(keywordSearchQuery, keywordSearchQuery.isLiteral(), queryResults, file);
/* Find a keyword in that file so that we can generate a tempList.add(new KeyValueQueryContent(file.getName(), properties, ++id, file, highlightQueryEscaped, keywordSearchQuery, queryResults));
* single snippet for it.
*/
KeywordHit chit = uniqueFileMap.get(f);
if (chit.hasSnippet()) {
setCommonProperty(resMap, CommonPropertyTypes.CONTEXT, chit.getSnippet());
}
// boolean hitFound = false;
// for (String hitKey : queryResults.getKeywords()) {
// for (ContentHit contentHit : queryResults.getResults(hitKey)) {
// if (contentHit.getContent().equals(f)) {
// hitFound = true;
// if (contentHit.hasSnippet() && (KeywordSearchUtil.escapeLuceneQuery(hitKey) != null)) {
// setCommonProperty(resMap, CommonPropertyTypes.CONTEXT, contentHit.getSnippet());
// }
// break;
// }
// }
// if (hitFound) {
// break;
// }
// }
if (f.getType() == TSK_DB_FILES_TYPE_ENUM.FS) {
AbstractFsContentNode.fillPropertyMap(resMap, (FsContent) f);
}
final String highlightQueryEscaped = getHighlightQuery(keywordSearchQuery, literal_query, queryResults, f);
tempList.add(new KeyValueQueryContent(f.getName(), resMap, ++resID, f, highlightQueryEscaped, keywordSearchQuery, queryResults));
} }
// Add all the nodes to toPopulate at once. Minimizes node creation // Add all the nodes to toPopulate at once. Minimizes node creation
@ -222,21 +198,36 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
//cannot reuse snippet in BlackboardResultWriter //cannot reuse snippet in BlackboardResultWriter
//because for regex searches in UI we compress results by showing a file per regex once (even if multiple term hits) //because for regex searches in UI we compress results by showing a file per regex once (even if multiple term hits)
//whereas in bb we write every hit per file separately //whereas in bb we write every hit per file separately
new BlackboardResultWriter(queryResults, listName).execute(); new BlackboardResultWriter(queryResults, queryRequest.getQuery().getKeywordList().getName()).execute();
return true; return true;
} }
List<KeywordHit> getOneHitPerObject(QueryResults queryResults) {
List<KeywordHit> hits = new ArrayList<>();
Set<Long> uniqueObjectIds = new HashSet<>();
for (Keyword keyWord : queryResults.getKeywords()) {
for (KeywordHit hit : queryResults.getResults(keyWord)) {
long objectId = hit.getObjectId();
if (!uniqueObjectIds.contains(objectId)) {
uniqueObjectIds.add(objectId);
hits.add(hit);
}
}
}
return hits;
}
/** /**
* Return the string used to later have SOLR highlight the document with. * Return the string used to later have SOLR highlight the document with.
* *
* @param query * @param query
* @param literal_query * @param literal_query
* @param queryResults * @param queryResults
* @param f * @param file
* @return * @return
*/ */
private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, AbstractFile f) { private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, AbstractFile file) {
String highlightQueryEscaped; String highlightQueryEscaped;
if (literal_query) { if (literal_query) {
//literal, treat as non-regex, non-term component query //literal, treat as non-regex, non-term component query
@ -253,12 +244,10 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
} else { } else {
//find terms for this file hit //find terms for this file hit
List<String> hitTerms = new ArrayList<>(); List<String> hitTerms = new ArrayList<>();
for (Keyword term : queryResults.getKeywords()) { for (Keyword keyword : queryResults.getKeywords()) {
List<KeywordHit> hitList = queryResults.getResults(term); for (KeywordHit hit : queryResults.getResults(keyword)) {
if (hit.getFile().equals(file)) {
for (KeywordHit h : hitList) { hitTerms.add(keyword.toString());
if (h.getContent().equals(f)) {
hitTerms.add(term.toString());
break; //go to next term break; //go to next term
} }
} }
@ -288,7 +277,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
return highlightQueryEscaped; return highlightQueryEscaped;
} }
@Override @Override
protected Node createNodeForKey(KeyValueQueryContent key) { protected Node createNodeForKey(KeyValueQueryContent key) {
final Content content = key.getContent(); final Content content = key.getContent();
@ -314,7 +303,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
private QueryResults hits; private QueryResults hits;
private KeywordSearchQuery query; private KeywordSearchQuery query;
/** /**
* NOTE Parameters are defined based on how they are currently used in * NOTE Parameters are defined based on how they are currently used in
* practice * practice
@ -335,7 +323,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
this.hits = hits; this.hits = hits;
this.query = query; this.query = query;
} }
Content getContent() { Content getContent() {
return content; return content;
} }
@ -353,7 +341,6 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
} }
} }
/** /**
* worker for writing results to bb, with progress bar, cancellation, and * worker for writing results to bb, with progress bar, cancellation, and
* central registry of workers to be stopped when case is closed * central registry of workers to be stopped when case is closed
@ -369,7 +356,7 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
private String listName; private String listName;
private QueryResults hits; private QueryResults hits;
private Collection<BlackboardArtifact> newArtifacts = new ArrayList<>(); private Collection<BlackboardArtifact> newArtifacts = new ArrayList<>();
private static final int QUERY_DISPLAY_LEN = 40; private static final int QUERY_DISPLAY_LEN = 40;
BlackboardResultWriter(QueryResults hits, String listName) { BlackboardResultWriter(QueryResults hits, String listName) {
this.hits = hits; this.hits = hits;
@ -398,13 +385,13 @@ class KeywordSearchResultFactory extends ChildFactory<KeyValueQueryContent> {
try { try {
progress = ProgressHandleFactory.createHandle( progress = ProgressHandleFactory.createHandle(
NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.progress.saving", queryDisp), new Cancellable() { NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.progress.saving", queryDisp), new Cancellable() {
@Override @Override
public boolean cancel() { public boolean cancel() {
return BlackboardResultWriter.this.cancel(true); return BlackboardResultWriter.this.cancel(true);
} }
}); });
// Create blackboard artifacts // Create blackboard artifacts
newArtifacts = hits.writeAllHitsToBlackBoard(progress, null, this, false); newArtifacts = hits.writeAllHitsToBlackBoard(progress, null, this, false);
} finally { } finally {

View File

@ -21,6 +21,7 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -40,123 +41,89 @@ import org.sleuthkit.datamodel.BlackboardArtifact;
import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.BlackboardAttribute;
/** /**
* Stores the results from running a SOLR query (which could contain multiple keywords). * Stores the results from running a SOLR query (which could contain multiple
* * keywords).
*
*/ */
class QueryResults { class QueryResults {
private static final Logger logger = Logger.getLogger(QueryResults.class.getName()); private static final Logger logger = Logger.getLogger(QueryResults.class.getName());
private KeywordSearchQuery keywordSearchQuery; private KeywordSearchQuery keywordSearchQuery;
// maps Keyword object to its hits // maps Keyword object to its hits
private Map<Keyword, List<KeywordHit>> results = new HashMap<>(); private Map<Keyword, List<KeywordHit>> results = new HashMap<>();
private KeywordList keywordList; private KeywordList keywordList;
QueryResults (KeywordSearchQuery query, KeywordList keywordList) { QueryResults(KeywordSearchQuery query, KeywordList keywordList) {
this.keywordSearchQuery = query; this.keywordSearchQuery = query;
this.keywordList = keywordList; this.keywordList = keywordList;
} }
void addResult(Keyword keyword, List<KeywordHit> hits) { void addResult(Keyword keyword, List<KeywordHit> hits) {
results.put(keyword, hits); results.put(keyword, hits);
} }
KeywordList getKeywordList() { KeywordList getKeywordList() {
return keywordList; return keywordList;
} }
KeywordSearchQuery getQuery() { KeywordSearchQuery getQuery() {
return keywordSearchQuery; return keywordSearchQuery;
} }
List<KeywordHit> getResults(Keyword keyword) { List<KeywordHit> getResults(Keyword keyword) {
return results.get(keyword); return results.get(keyword);
} }
Set<Keyword> getKeywords() { Set<Keyword> getKeywords() {
return results.keySet(); return results.keySet();
} }
/**
* Get the unique set of files across all keywords in the results
* @param results
* @return
*/
List<KeywordHit> getUniqueFiles() {
List<KeywordHit> uniqueHits = new ArrayList<>();
for (Keyword keyWord : getKeywords()) {
for (KeywordHit hit : getResults(keyWord)) {
AbstractFile abstractFile = hit.getContent();
//flatten, record first chunk encountered
if (!uniqueHits.containsKey(abstractFile)) {
uniqueHits.put(abstractFile, hit);
}
}
}
return uniqueHits;
}
/** /**
* Get the unique set of files for a specific keyword * Creates a blackboard artifacts for the hits. makes one artifact per
* @param keyword * keyword per file (i.e. if a keyword hits several times in the file, only
* @return Map of Abstract files and the chunk with the first hit * one artifact is created)
*/ *
Map<AbstractFile, Integer> getUniqueFiles(Keyword keyword) {
Map<AbstractFile, Integer> ret = new LinkedHashMap<>();
for (KeywordHit h : getResults(keyword)) {
AbstractFile f = h.getContent();
if (!ret.containsKey(f)) {
ret.put(f, h.getChunkId());
}
}
return ret;
}
/**
* Creates a blackboard artifacts for the hits. makes one artifact per keyword per file (i.e. if a keyword hits several times in teh file, only one artifact is created)
* @param listName * @param listName
* @param progress can be null * @param progress can be null
* @param subProgress can be null * @param subProgress can be null
* @param notifyInbox flag indicating whether or not to call writeSingleFileInboxMessage() for each hit * @param notifyInbox flag indicating whether or not to call
* writeSingleFileInboxMessage() for each hit
* @return list of new artifactsPerFile * @return list of new artifactsPerFile
*/ */
public Collection<BlackboardArtifact> writeAllHitsToBlackBoard(ProgressHandle progress, ProgressContributor subProgress, SwingWorker<Object, Void> worker, boolean notifyInbox) { Collection<BlackboardArtifact> writeAllHitsToBlackBoard(ProgressHandle progress, ProgressContributor subProgress, SwingWorker<Object, Void> worker, boolean notifyInbox) {
final Collection<BlackboardArtifact> newArtifacts = new ArrayList<>(); final Collection<BlackboardArtifact> newArtifacts = new ArrayList<>();
if (progress != null) { if (progress != null) {
progress.start(getKeywords().size()); progress.start(getKeywords().size());
} }
int unitProgress = 0; int unitProgress = 0;
for (final Keyword hitTerm : getKeywords()) { for (final Keyword keyword : getKeywords()) {
if (worker.isCancelled()) { if (worker.isCancelled()) {
logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", hitTerm.getQuery()); //NON-NLS logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keyword.getQuery()); //NON-NLS
break; break;
} }
// Update progress object(s), if any // Update progress object(s), if any
if (progress != null) { if (progress != null) {
progress.progress(hitTerm.toString(), unitProgress); progress.progress(keyword.toString(), unitProgress);
} }
if (subProgress != null) { if (subProgress != null) {
String hitDisplayStr = hitTerm.getQuery(); String hitDisplayStr = keyword.getQuery();
if (hitDisplayStr.length() > 50) { if (hitDisplayStr.length() > 50) {
hitDisplayStr = hitDisplayStr.substring(0, 49) + "..."; hitDisplayStr = hitDisplayStr.substring(0, 49) + "...";
} }
subProgress.progress(keywordList.getName() + ": " + hitDisplayStr, unitProgress); subProgress.progress(keywordList.getName() + ": " + hitDisplayStr, unitProgress);
} }
// this returns the unique files in the set with the first chunk that has a hit for (KeywordHit hit : getOneHitPerObject(keyword)) {
Map<AbstractFile, Integer> flattened = getUniqueFiles(hitTerm); String termString = keyword.getQuery();
int chunkId = hit.getChunkId();
for (AbstractFile hitFile : flattened.keySet()) {
String termString = hitTerm.getQuery();
int chunkId = flattened.get(hitFile);
final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString); final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString);
String snippet; String snippet;
try { try {
snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, !keywordSearchQuery.isLiteral(), true); snippet = LuceneQuery.querySnippet(snippetQuery, hit.getObjectId(), chunkId, !keywordSearchQuery.isLiteral(), true); // RJCTODO: IS this right?
} catch (NoOpenCoreException e) { } catch (NoOpenCoreException e) {
logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); //NON-NLS logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); //NON-NLS
//no reason to continue //no reason to continue
@ -166,35 +133,55 @@ class QueryResults {
continue; continue;
} }
if (snippet != null) { if (snippet != null) {
KeywordCachedArtifact writeResult = keywordSearchQuery.writeSingleFileHitsToBlackBoard(termString, hitFile, snippet, keywordList.getName()); KeywordCachedArtifact writeResult = keywordSearchQuery.writeSingleFileHitsToBlackBoard(termString, hit.getFile(), snippet, keywordList.getName()); // RJCTODO: Probably not right
if (writeResult != null) { if (writeResult != null) {
newArtifacts.add(writeResult.getArtifact()); newArtifacts.add(writeResult.getArtifact());
if (notifyInbox) { if (notifyInbox) {
writeSingleFileInboxMessage(writeResult, hitFile); writeSingleFileInboxMessage(writeResult, hit.getFile()); // RJCTODO: Probably not right
} }
} else { } else {
logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()}); //NON-NLS logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hit.getFile(), keyword.toString()}); //NON-NLS
} }
} }
} }
++unitProgress; ++unitProgress;
} }
// Update artifact browser // Update artifact browser
if (!newArtifacts.isEmpty()) { if (!newArtifacts.isEmpty()) {
IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts)); IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts));
} }
return newArtifacts; return newArtifacts;
} }
/**
* RJCTODO: Update Get the unique set of files for a specific keyword
*
* @param keyword
* @return Map of Abstract files and the chunk with the first hit
*/
private List<KeywordHit> getOneHitPerObject(Keyword keyword) {
List<KeywordHit> hits = new ArrayList<>();
Set<Long> uniqueObjectIds = new HashSet<>();
for (KeywordHit hit : getResults(keyword)) {
long objectId = hit.getObjectId();
if (!uniqueObjectIds.contains(objectId)) {
uniqueObjectIds.add(objectId);
hits.add(hit);
}
}
return hits;
}
/** /**
* Generate an ingest inbox message for given keyword in given file * Generate an ingest inbox message for given keyword in given file
*
* @param written * @param written
* @param hitFile * @param hitFile
*/ */
public void writeSingleFileInboxMessage(KeywordCachedArtifact written, AbstractFile hitFile) { private void writeSingleFileInboxMessage(KeywordCachedArtifact written, AbstractFile hitFile) {
StringBuilder subjectSb = new StringBuilder(); StringBuilder subjectSb = new StringBuilder();
StringBuilder detailsSb = new StringBuilder(); StringBuilder detailsSb = new StringBuilder();
@ -255,5 +242,5 @@ class QueryResults {
IngestServices.getInstance().postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact())); IngestServices.getInstance().postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact()));
} }
} }