mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-18 02:27:42 +00:00
Modified filterResults() to only return a a single hit per keyword per object where a hit for that keyword on that object has not been seen before.
This commit is contained in:
parent
401ac636b9
commit
4bf5988c40
@ -264,7 +264,9 @@ public final class SearchRunner {
|
|||||||
// mutable state:
|
// mutable state:
|
||||||
private volatile boolean workerRunning;
|
private volatile boolean workerRunning;
|
||||||
private List<String> keywordListNames; //guarded by SearchJobInfo.this
|
private List<String> keywordListNames; //guarded by SearchJobInfo.this
|
||||||
private Map<Keyword, Set<String>> currentResults; //guarded by SearchJobInfo.this
|
|
||||||
|
// Map of keyword to the object ids that contain a hit
|
||||||
|
private Map<Keyword, Set<Long>> currentResults; //guarded by SearchJobInfo.this
|
||||||
private SearchRunner.Searcher currentSearcher;
|
private SearchRunner.Searcher currentSearcher;
|
||||||
private AtomicLong moduleReferenceCount = new AtomicLong(0);
|
private AtomicLong moduleReferenceCount = new AtomicLong(0);
|
||||||
private final Object finalSearchLock = new Object(); //used for a condition wait
|
private final Object finalSearchLock = new Object(); //used for a condition wait
|
||||||
@ -296,11 +298,11 @@ public final class SearchRunner {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized Set<String> currentKeywordResults(Keyword k) {
|
private synchronized Set<Long> currentKeywordResults(Keyword k) {
|
||||||
return currentResults.get(k);
|
return currentResults.get(k);
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void addKeywordResults(Keyword k, Set<String> resultsIDs) {
|
private synchronized void addKeywordResults(Keyword k, Set<Long> resultsIDs) {
|
||||||
currentResults.put(k, resultsIDs);
|
currentResults.put(k, resultsIDs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -470,8 +472,8 @@ public final class SearchRunner {
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate new results by substracting results already obtained in this ingest
|
// Reduce the results of the query to only those hits we
|
||||||
// this creates a map of each keyword to the list of unique files that have that hit.
|
// have not already seen.
|
||||||
QueryResults newResults = filterResults(queryResults);
|
QueryResults newResults = filterResults(queryResults);
|
||||||
|
|
||||||
if (!newResults.getKeywords().isEmpty()) {
|
if (!newResults.getKeywords().isEmpty()) {
|
||||||
@ -570,61 +572,67 @@ public final class SearchRunner {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Over time, periodic searches can produce keyword hits that were seen
|
|
||||||
* in earlier searches (in addition to new hits).
|
|
||||||
* This method filters out all of the hits found in earlier
|
* This method filters out all of the hits found in earlier
|
||||||
* periodic searches and returns only the results found by the most
|
* periodic searches and returns only the results found by the most
|
||||||
* recent search.
|
* recent search.
|
||||||
*
|
*
|
||||||
|
* This method will only return hits for objects for which we haven't
|
||||||
|
* previously seen a hit for the keyword.
|
||||||
|
*
|
||||||
* @param queryResult The results returned by a keyword search.
|
* @param queryResult The results returned by a keyword search.
|
||||||
* @return The set of hits found by the most recent search.
|
* @return The set of hits found by the most recent search for objects
|
||||||
|
* that have not previously had a hit.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private QueryResults filterResults(QueryResults queryResult) {
|
private QueryResults filterResults(QueryResults queryResult) {
|
||||||
|
|
||||||
// Create a new (empty) QueryResults object to hold the most recently found hits.
|
// Create a new (empty) QueryResults object to hold the most recently
|
||||||
|
// found hits.
|
||||||
QueryResults newResults = new QueryResults(queryResult.getQuery(), queryResult.getKeywordList());
|
QueryResults newResults = new QueryResults(queryResult.getQuery(), queryResult.getKeywordList());
|
||||||
|
|
||||||
|
// For each keyword represented in the results.
|
||||||
for (Keyword keyword : queryResult.getKeywords()) {
|
for (Keyword keyword : queryResult.getKeywords()) {
|
||||||
|
// These are all of the hits across all objects for the most recent search.
|
||||||
|
// This may well include duplicates of hits we've seen in earlier periodic searches.
|
||||||
List<KeywordHit> queryTermResults = queryResult.getResults(keyword);
|
List<KeywordHit> queryTermResults = queryResult.getResults(keyword);
|
||||||
|
|
||||||
// Grab the set of solr document ids that have a hit for this
|
// This will be used to build up the hits we haven't seen before
|
||||||
// keyword.
|
// for this keyword.
|
||||||
Set<String> queryTermResultsIDs = new HashSet<>();
|
List<KeywordHit> newUniqueHits = new ArrayList<>();
|
||||||
for (KeywordHit hit : queryTermResults) {
|
|
||||||
queryTermResultsIDs.add(hit.getSolrDocumentId());
|
// Get the set of object ids seen in the past by this searcher
|
||||||
|
// for the given keyword.
|
||||||
|
Set<Long> curTermResults = job.currentKeywordResults(keyword);
|
||||||
|
if (curTermResults == null) {
|
||||||
|
// We create a new empty set if we haven't seen results for
|
||||||
|
// this keyword before.
|
||||||
|
curTermResults = new HashSet<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the set of document ids seen in the past by this searcher
|
// For each hit for this keyword.
|
||||||
// for the given keyword.
|
for (KeywordHit hit : queryTermResults) {
|
||||||
Set<String> curTermResults = job.currentKeywordResults(keyword);
|
if (curTermResults.contains(hit.getSolrObjectId())) {
|
||||||
if (curTermResults == null) {
|
// Skip the hit if we've already seen a hit for
|
||||||
// If we haven't seen any results in the past, we simply
|
// this keyword in the object.
|
||||||
// add all of the incoming results to the outgoing collection.
|
continue;
|
||||||
job.addKeywordResults(keyword, queryTermResultsIDs);
|
|
||||||
newResults.addResult(keyword, queryTermResults);
|
|
||||||
} else {
|
|
||||||
// Otherwise, we have seen results for this keyword in the past.
|
|
||||||
// For each of the keyword hits in the incoming results we check to
|
|
||||||
// see if we have seen the document id previously.
|
|
||||||
for (KeywordHit res : queryTermResults) {
|
|
||||||
if (!curTermResults.contains(res.getSolrDocumentId())) {
|
|
||||||
// We have not seen the document id before so we add
|
|
||||||
// the keyword hit to the results
|
|
||||||
List<KeywordHit> newKeywordHits = newResults.getResults(keyword);
|
|
||||||
if (newKeywordHits == null) {
|
|
||||||
// Create an empty list to hold the new hits.
|
|
||||||
newKeywordHits = new ArrayList<>();
|
|
||||||
newResults.addResult(keyword, newKeywordHits);
|
|
||||||
}
|
|
||||||
// Add the new hit to the list.
|
|
||||||
newKeywordHits.add(res);
|
|
||||||
// Add the document id to the set of document ids
|
|
||||||
// that have been seen for this keyword.
|
|
||||||
curTermResults.add(res.getSolrDocumentId());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We haven't seen the hit before so add it to list of new
|
||||||
|
// unique hits.
|
||||||
|
newUniqueHits.add(hit);
|
||||||
|
|
||||||
|
// Add the object id to the results we've seen for this
|
||||||
|
// keyword.
|
||||||
|
curTermResults.add(hit.getSolrObjectId());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the job with the list of objects for which we have
|
||||||
|
// seen hits for the current keyword.
|
||||||
|
job.addKeywordResults(keyword, curTermResults);
|
||||||
|
|
||||||
|
// Add the new hits for the current keyword into the results
|
||||||
|
// to be returned.
|
||||||
|
newResults.addResult(keyword, newUniqueHits);
|
||||||
}
|
}
|
||||||
|
|
||||||
return newResults;
|
return newResults;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user