From b992035188049e51f81f1185fe9160060a8aab82 Mon Sep 17 00:00:00 2001 From: Richard Cordovano Date: Wed, 7 Jan 2015 17:04:41 -0500 Subject: [PATCH] Interim checkin: first draft of artifact keyword search hit artifact generation --- .../autopsy/keywordsearch/KeywordHit.java | 81 +++++++-- .../KeywordSearchResultFactory.java | 167 ++++++++---------- .../autopsy/keywordsearch/QueryResults.java | 147 +++++++-------- 3 files changed, 206 insertions(+), 189 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java index c8bd0c8aaa..87b8a4545c 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordHit.java @@ -18,45 +18,80 @@ */ package org.sleuthkit.autopsy.keywordsearch; +import org.sleuthkit.autopsy.casemodule.Case; +import org.sleuthkit.datamodel.AbstractFile; +import org.sleuthkit.datamodel.BlackboardArtifact; +import org.sleuthkit.datamodel.SleuthkitCase; +import org.sleuthkit.datamodel.TskCoreException; + /** - * Stores the fact that file or an artifact associated had a keyword hit. - *

- * Instances of this class are immutable, so they are thread-safe. + * Stores the fact that file or an artifact had a keyword hit. */ class KeywordHit { - private final String documentId; + private final String solrDocumentId; private final long objectId; private final int chunkId; private final String snippet; + private final AbstractFile file; + BlackboardArtifact artifact; - KeywordHit(String documentId, String snippet) { - this.documentId = documentId; - final int separatorIndex = documentId.indexOf(Server.ID_CHUNK_SEP); + KeywordHit(String solrDocumentId, String snippet) throws TskCoreException { + /** + * Store the Solr document id. + */ + this.solrDocumentId = solrDocumentId; + + /** + * Parse the Solr document id to get the object id and chunk id. There + * will only be a chunk if the text in the object was divided into + * chunks. + */ + final int separatorIndex = solrDocumentId.indexOf(Server.ID_CHUNK_SEP); if (separatorIndex != -1) { - this.objectId = Long.parseLong(documentId.substring(0, separatorIndex)); - this.chunkId = Integer.parseInt(documentId.substring(separatorIndex + 1)); + this.objectId = Long.parseLong(solrDocumentId.substring(0, separatorIndex)); + this.chunkId = Integer.parseInt(solrDocumentId.substring(separatorIndex + 1)); } else { - this.objectId = Long.parseLong(documentId); + this.objectId = Long.parseLong(solrDocumentId); this.chunkId = 0; } - this.snippet = snippet; + + /** + * Look up the file associated with the keyword hit. If the high order + * bit of the object id is set, the hit was for an artifact. In this + * case, look up the artifact as well. + */ + SleuthkitCase caseDb = Case.getCurrentCase().getSleuthkitCase(); + long fileId; + if (this.objectId < 0) { + long artifactId = this.objectId - 0x8000000000000000L; + this.artifact = caseDb.getBlackboardArtifact(artifactId); + fileId = artifact.getObjectID(); + } else { + fileId = this.objectId; + } + this.file = caseDb.getAbstractFileById(fileId); + + /** + * Store the text snippet. + */ + this.snippet = snippet; } - String getDocumentId() { - return this.documentId; + String getSolrDocumentId() { + return this.solrDocumentId; } long getObjectId() { return this.objectId; } - int getChunkId() { - return this.chunkId; + boolean hasChunkId() { + return this.chunkId != 0; } - boolean isChunk() { - return this.chunkId != 0; + int getChunkId() { + return this.chunkId; } boolean hasSnippet() { @@ -67,8 +102,16 @@ class KeywordHit { return this.snippet; } + AbstractFile getFile() { + return this.file; + } + + BlackboardArtifact getArtifact() { + return this.artifact; + } + @Override - public boolean equals(Object obj) { + public boolean equals(Object obj) { // RJCTODO: Fix if (null == obj) { return false; } @@ -80,7 +123,7 @@ class KeywordHit { } @Override - public int hashCode() { + public int hashCode() { // RJCTODO: Fix int hash = 3; hash = 41 * hash + (int) this.objectId + this.chunkId; return hash; diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java index 9850d83354..f8e73db55c 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchResultFactory.java @@ -1,7 +1,7 @@ /* * Autopsy Forensic Browser * - * Copyright 2011-2014 Basis Technology Corp. + * Copyright 2011-2015 Basis Technology Corp. * Contact: carrier sleuthkit org * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,13 +21,14 @@ package org.sleuthkit.autopsy.keywordsearch; import java.awt.EventQueue; import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.logging.Level; - import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.Logger; import javax.swing.SwingWorker; @@ -48,8 +49,6 @@ import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; import org.sleuthkit.datamodel.Content; -import org.sleuthkit.datamodel.FsContent; -import org.sleuthkit.datamodel.TskData.TSK_DB_FILES_TYPE_ENUM; /** * Node factory that performs the keyword search and creates children nodes for @@ -65,23 +64,23 @@ class KeywordSearchResultFactory extends ChildFactory { public static enum CommonPropertyTypes { KEYWORD { - @Override - public String toString() { - return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getDisplayName(); - } - }, + @Override + public String toString() { + return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD.getDisplayName(); + } + }, REGEX { - @Override - public String toString() { - return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getDisplayName(); - } - }, + @Override + public String toString() { + return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_REGEXP.getDisplayName(); + } + }, CONTEXT { - @Override - public String toString() { - return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getDisplayName(); - } - }, + @Override + public String toString() { + return BlackboardAttribute.ATTRIBUTE_TYPE.TSK_KEYWORD_PREVIEW.getDisplayName(); + } + }, } private Collection queryRequests; private final DataResultTopComponent viewer; //viewer driving this child node factory @@ -92,7 +91,6 @@ class KeywordSearchResultFactory extends ChildFactory { this.viewer = viewer; } - /** * call this at least for the parent Node, to make sure all common * properties are displayed as columns (since we are doing lazy child Node @@ -132,86 +130,64 @@ class KeywordSearchResultFactory extends ChildFactory { initCommonProperties(map); final String query = queryRequest.getQueryString(); setCommonProperty(map, CommonPropertyTypes.KEYWORD, query); - setCommonProperty(map, CommonPropertyTypes.REGEX, Boolean.valueOf(!queryRequest.getQuery().isLiteral())); + setCommonProperty(map, CommonPropertyTypes.REGEX, !queryRequest.getQuery().isLiteral()); createFlatKeys(queryRequest, toPopulate); } return true; } - - /** * * @param queryRequest * @param toPopulate * @return */ - protected boolean createFlatKeys(QueryRequest queryRequest, List toPopulate) { + private boolean createFlatKeys(QueryRequest queryRequest, List toPopulate) { + /** + * Check the validity of the requested query. + */ final KeywordSearchQuery keywordSearchQuery = queryRequest.getQuery(); - if (!keywordSearchQuery.validate()) { //TODO mark the particular query node RED return false; } - //execute the query and get fscontents matching + /** + * Execute the requested query. + */ QueryResults queryResults; try { queryResults = keywordSearchQuery.performQuery(); } catch (NoOpenCoreException ex) { - logger.log(Level.WARNING, "Could not perform the query. ", ex); //NON-NLS + logger.log(Level.SEVERE, "Could not perform the query " + keywordSearchQuery.getQueryString(), ex); //NON-NLS return false; } - - String listName = queryRequest.getQuery().getKeywordList().getName(); - - final boolean literal_query = keywordSearchQuery.isLiteral(); - - int resID = 0; - + int id = 0; List tempList = new ArrayList<>(); - final Map uniqueFileMap = queryResults.getUniqueFiles(); - for (final AbstractFile f : uniqueFileMap.keySet()) { + for (KeywordHit hit : getOneHitPerObject(queryResults)) { + /** + * Get file properties. + */ + Map properties = new LinkedHashMap<>(); + AbstractFile file = hit.getFile(); + AbstractFsContentNode.fillPropertyMap(properties, file); + + /** + * Add a snippet property, if available. + */ + if (hit.hasSnippet()) { + setCommonProperty(properties, CommonPropertyTypes.CONTEXT, hit.getSnippet()); + } //@@@ USE ConentHit in UniqueFileMap instead of the below search //get unique match result files - Map resMap = new LinkedHashMap<>(); - - // BC: @@@ THis is really ineffecient. We should keep track of this when - // we flattened the list of files to the unique files. + // we flattened the list of files to the unique files. + final String highlightQueryEscaped = getHighlightQuery(keywordSearchQuery, keywordSearchQuery.isLiteral(), queryResults, file); - /* Find a keyword in that file so that we can generate a - * single snippet for it. - */ - - KeywordHit chit = uniqueFileMap.get(f); - if (chit.hasSnippet()) { - setCommonProperty(resMap, CommonPropertyTypes.CONTEXT, chit.getSnippet()); - } - -// boolean hitFound = false; -// for (String hitKey : queryResults.getKeywords()) { -// for (ContentHit contentHit : queryResults.getResults(hitKey)) { -// if (contentHit.getContent().equals(f)) { -// hitFound = true; -// if (contentHit.hasSnippet() && (KeywordSearchUtil.escapeLuceneQuery(hitKey) != null)) { -// setCommonProperty(resMap, CommonPropertyTypes.CONTEXT, contentHit.getSnippet()); -// } -// break; -// } -// } -// if (hitFound) { -// break; -// } -// } - if (f.getType() == TSK_DB_FILES_TYPE_ENUM.FS) { - AbstractFsContentNode.fillPropertyMap(resMap, (FsContent) f); - } - final String highlightQueryEscaped = getHighlightQuery(keywordSearchQuery, literal_query, queryResults, f); - tempList.add(new KeyValueQueryContent(f.getName(), resMap, ++resID, f, highlightQueryEscaped, keywordSearchQuery, queryResults)); + tempList.add(new KeyValueQueryContent(file.getName(), properties, ++id, file, highlightQueryEscaped, keywordSearchQuery, queryResults)); } // Add all the nodes to toPopulate at once. Minimizes node creation @@ -222,21 +198,36 @@ class KeywordSearchResultFactory extends ChildFactory { //cannot reuse snippet in BlackboardResultWriter //because for regex searches in UI we compress results by showing a file per regex once (even if multiple term hits) //whereas in bb we write every hit per file separately - new BlackboardResultWriter(queryResults, listName).execute(); + new BlackboardResultWriter(queryResults, queryRequest.getQuery().getKeywordList().getName()).execute(); return true; } + List getOneHitPerObject(QueryResults queryResults) { + List hits = new ArrayList<>(); + Set uniqueObjectIds = new HashSet<>(); + for (Keyword keyWord : queryResults.getKeywords()) { + for (KeywordHit hit : queryResults.getResults(keyWord)) { + long objectId = hit.getObjectId(); + if (!uniqueObjectIds.contains(objectId)) { + uniqueObjectIds.add(objectId); + hits.add(hit); + } + } + } + return hits; + } + /** * Return the string used to later have SOLR highlight the document with. * * @param query * @param literal_query * @param queryResults - * @param f + * @param file * @return */ - private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, AbstractFile f) { + private String getHighlightQuery(KeywordSearchQuery query, boolean literal_query, QueryResults queryResults, AbstractFile file) { String highlightQueryEscaped; if (literal_query) { //literal, treat as non-regex, non-term component query @@ -253,12 +244,10 @@ class KeywordSearchResultFactory extends ChildFactory { } else { //find terms for this file hit List hitTerms = new ArrayList<>(); - for (Keyword term : queryResults.getKeywords()) { - List hitList = queryResults.getResults(term); - - for (KeywordHit h : hitList) { - if (h.getContent().equals(f)) { - hitTerms.add(term.toString()); + for (Keyword keyword : queryResults.getKeywords()) { + for (KeywordHit hit : queryResults.getResults(keyword)) { + if (hit.getFile().equals(file)) { + hitTerms.add(keyword.toString()); break; //go to next term } } @@ -288,7 +277,7 @@ class KeywordSearchResultFactory extends ChildFactory { return highlightQueryEscaped; } - + @Override protected Node createNodeForKey(KeyValueQueryContent key) { final Content content = key.getContent(); @@ -314,7 +303,6 @@ class KeywordSearchResultFactory extends ChildFactory { private QueryResults hits; private KeywordSearchQuery query; - /** * NOTE Parameters are defined based on how they are currently used in * practice @@ -335,7 +323,7 @@ class KeywordSearchResultFactory extends ChildFactory { this.hits = hits; this.query = query; } - + Content getContent() { return content; } @@ -353,7 +341,6 @@ class KeywordSearchResultFactory extends ChildFactory { } } - /** * worker for writing results to bb, with progress bar, cancellation, and * central registry of workers to be stopped when case is closed @@ -369,7 +356,7 @@ class KeywordSearchResultFactory extends ChildFactory { private String listName; private QueryResults hits; private Collection newArtifacts = new ArrayList<>(); - private static final int QUERY_DISPLAY_LEN = 40; + private static final int QUERY_DISPLAY_LEN = 40; BlackboardResultWriter(QueryResults hits, String listName) { this.hits = hits; @@ -398,13 +385,13 @@ class KeywordSearchResultFactory extends ChildFactory { try { progress = ProgressHandleFactory.createHandle( - NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.progress.saving", queryDisp), new Cancellable() { - @Override - public boolean cancel() { - return BlackboardResultWriter.this.cancel(true); - } - }); - + NbBundle.getMessage(this.getClass(), "KeywordSearchResultFactory.progress.saving", queryDisp), new Cancellable() { + @Override + public boolean cancel() { + return BlackboardResultWriter.this.cancel(true); + } + }); + // Create blackboard artifacts newArtifacts = hits.writeAllHitsToBlackBoard(progress, null, this, false); } finally { diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java index ad0ca1c6fb..66e75759aa 100755 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/QueryResults.java @@ -21,6 +21,7 @@ package org.sleuthkit.autopsy.keywordsearch; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -40,123 +41,89 @@ import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardAttribute; /** - * Stores the results from running a SOLR query (which could contain multiple keywords). - * + * Stores the results from running a SOLR query (which could contain multiple + * keywords). + * */ class QueryResults { + private static final Logger logger = Logger.getLogger(QueryResults.class.getName()); - + private KeywordSearchQuery keywordSearchQuery; - + // maps Keyword object to its hits private Map> results = new HashMap<>(); private KeywordList keywordList; - - QueryResults (KeywordSearchQuery query, KeywordList keywordList) { + + QueryResults(KeywordSearchQuery query, KeywordList keywordList) { this.keywordSearchQuery = query; this.keywordList = keywordList; } - + void addResult(Keyword keyword, List hits) { results.put(keyword, hits); } - + KeywordList getKeywordList() { return keywordList; } - + KeywordSearchQuery getQuery() { return keywordSearchQuery; } - + List getResults(Keyword keyword) { return results.get(keyword); } - + Set getKeywords() { - return results.keySet(); + return results.keySet(); } - - /** - * Get the unique set of files across all keywords in the results - * @param results - * @return - */ - List getUniqueFiles() { - List uniqueHits = new ArrayList<>(); - for (Keyword keyWord : getKeywords()) { - for (KeywordHit hit : getResults(keyWord)) { - AbstractFile abstractFile = hit.getContent(); - //flatten, record first chunk encountered - if (!uniqueHits.containsKey(abstractFile)) { - uniqueHits.put(abstractFile, hit); - } - } - } - return uniqueHits; - } - /** - * Get the unique set of files for a specific keyword - * @param keyword - * @return Map of Abstract files and the chunk with the first hit - */ - Map getUniqueFiles(Keyword keyword) { - Map ret = new LinkedHashMap<>(); - for (KeywordHit h : getResults(keyword)) { - AbstractFile f = h.getContent(); - if (!ret.containsKey(f)) { - ret.put(f, h.getChunkId()); - } - } - - return ret; - } - - /** - * Creates a blackboard artifacts for the hits. makes one artifact per keyword per file (i.e. if a keyword hits several times in teh file, only one artifact is created) + * Creates a blackboard artifacts for the hits. makes one artifact per + * keyword per file (i.e. if a keyword hits several times in the file, only + * one artifact is created) + * * @param listName - * @param progress can be null + * @param progress can be null * @param subProgress can be null - * @param notifyInbox flag indicating whether or not to call writeSingleFileInboxMessage() for each hit + * @param notifyInbox flag indicating whether or not to call + * writeSingleFileInboxMessage() for each hit * @return list of new artifactsPerFile */ - public Collection writeAllHitsToBlackBoard(ProgressHandle progress, ProgressContributor subProgress, SwingWorker worker, boolean notifyInbox) { + Collection writeAllHitsToBlackBoard(ProgressHandle progress, ProgressContributor subProgress, SwingWorker worker, boolean notifyInbox) { final Collection newArtifacts = new ArrayList<>(); if (progress != null) { progress.start(getKeywords().size()); } int unitProgress = 0; - - for (final Keyword hitTerm : getKeywords()) { + + for (final Keyword keyword : getKeywords()) { if (worker.isCancelled()) { - logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", hitTerm.getQuery()); //NON-NLS + logger.log(Level.INFO, "Cancel detected, bailing before new keyword processed: {0}", keyword.getQuery()); //NON-NLS break; } - + // Update progress object(s), if any if (progress != null) { - progress.progress(hitTerm.toString(), unitProgress); - } + progress.progress(keyword.toString(), unitProgress); + } if (subProgress != null) { - String hitDisplayStr = hitTerm.getQuery(); + String hitDisplayStr = keyword.getQuery(); if (hitDisplayStr.length() > 50) { hitDisplayStr = hitDisplayStr.substring(0, 49) + "..."; } subProgress.progress(keywordList.getName() + ": " + hitDisplayStr, unitProgress); } - - // this returns the unique files in the set with the first chunk that has a hit - Map flattened = getUniqueFiles(hitTerm); - - for (AbstractFile hitFile : flattened.keySet()) { - String termString = hitTerm.getQuery(); - int chunkId = flattened.get(hitFile); + + for (KeywordHit hit : getOneHitPerObject(keyword)) { + String termString = keyword.getQuery(); + int chunkId = hit.getChunkId(); final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString); String snippet; try { - snippet = LuceneQuery.querySnippet(snippetQuery, hitFile.getId(), chunkId, !keywordSearchQuery.isLiteral(), true); + snippet = LuceneQuery.querySnippet(snippetQuery, hit.getObjectId(), chunkId, !keywordSearchQuery.isLiteral(), true); // RJCTODO: IS this right? } catch (NoOpenCoreException e) { logger.log(Level.WARNING, "Error querying snippet: " + snippetQuery, e); //NON-NLS //no reason to continue @@ -166,35 +133,55 @@ class QueryResults { continue; } if (snippet != null) { - KeywordCachedArtifact writeResult = keywordSearchQuery.writeSingleFileHitsToBlackBoard(termString, hitFile, snippet, keywordList.getName()); - + KeywordCachedArtifact writeResult = keywordSearchQuery.writeSingleFileHitsToBlackBoard(termString, hit.getFile(), snippet, keywordList.getName()); // RJCTODO: Probably not right + if (writeResult != null) { newArtifacts.add(writeResult.getArtifact()); if (notifyInbox) { - writeSingleFileInboxMessage(writeResult, hitFile); + writeSingleFileInboxMessage(writeResult, hit.getFile()); // RJCTODO: Probably not right } } else { - logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hitFile, hitTerm.toString()}); //NON-NLS + logger.log(Level.WARNING, "BB artifact for keyword hit not written, file: {0}, hit: {1}", new Object[]{hit.getFile(), keyword.toString()}); //NON-NLS } } } ++unitProgress; } - + // Update artifact browser if (!newArtifacts.isEmpty()) { IngestServices.getInstance().fireModuleDataEvent(new ModuleDataEvent(KeywordSearchModuleFactory.getModuleName(), BlackboardArtifact.ARTIFACT_TYPE.TSK_KEYWORD_HIT, newArtifacts)); - } - + } + return newArtifacts; - } - + } + + /** + * RJCTODO: Update Get the unique set of files for a specific keyword + * + * @param keyword + * @return Map of Abstract files and the chunk with the first hit + */ + private List getOneHitPerObject(Keyword keyword) { + List hits = new ArrayList<>(); + Set uniqueObjectIds = new HashSet<>(); + for (KeywordHit hit : getResults(keyword)) { + long objectId = hit.getObjectId(); + if (!uniqueObjectIds.contains(objectId)) { + uniqueObjectIds.add(objectId); + hits.add(hit); + } + } + return hits; + } + /** * Generate an ingest inbox message for given keyword in given file + * * @param written - * @param hitFile + * @param hitFile */ - public void writeSingleFileInboxMessage(KeywordCachedArtifact written, AbstractFile hitFile) { + private void writeSingleFileInboxMessage(KeywordCachedArtifact written, AbstractFile hitFile) { StringBuilder subjectSb = new StringBuilder(); StringBuilder detailsSb = new StringBuilder(); @@ -255,5 +242,5 @@ class QueryResults { IngestServices.getInstance().postMessage(IngestMessage.createDataMessage(KeywordSearchModuleFactory.getModuleName(), subjectSb.toString(), detailsSb.toString(), uniqueKey, written.getArtifact())); } - + }