Merge pull request #2471 from esaunders/snippet_fix

Snippet fix
This commit is contained in:
Richard Cordovano 2017-01-19 17:36:37 -05:00 committed by GitHub
commit 836413f781
3 changed files with 29 additions and 27 deletions

View File

@ -30,7 +30,7 @@ import org.sleuthkit.datamodel.TskCoreException;
* keyword was found and the file available to clients. Artifact keyword hits * keyword was found and the file available to clients. Artifact keyword hits
* also make the artifact available to clients. * also make the artifact available to clients.
*/ */
class KeywordHit { class KeywordHit implements Comparable<KeywordHit> {
private final String solrDocumentId; private final String solrDocumentId;
private final long solrObjectId; private final long solrObjectId;
@ -140,4 +140,23 @@ class KeywordHit {
return hash; return hash;
} }
@Override
public int compareTo(KeywordHit o) {
if (this.solrObjectId < o.solrObjectId) {
// Out object id is less than the other object id
return -1;
} else if (this.solrObjectId == o.solrObjectId) {
// Hits have same object id
if (this.chunkId < o.chunkId) {
// Our chunk id is lower than the other chunk id
return -1;
} else {
// Our chunk id is either greater than or equal to the other chunk id
return this.chunkId == o.chunkId ? 0 : 1;
}
} else {
// Our object id is greater than the other object id
return 1;
}
}
} }

View File

@ -131,7 +131,7 @@ class QueryResults {
subProgress.progress(keywordList.getName() + ": " + hitDisplayStr, unitProgress); subProgress.progress(keywordList.getName() + ": " + hitDisplayStr, unitProgress);
} }
for (KeywordHit hit : getOneHitPerObject(keyword)) { for (KeywordHit hit : getResults(keyword)) {
String termString = keyword.getSearchTerm(); String termString = keyword.getSearchTerm();
final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString); final String snippetQuery = KeywordSearchUtil.escapeLuceneQuery(termString);
String snippet; String snippet;
@ -174,29 +174,6 @@ class QueryResults {
return newArtifacts; return newArtifacts;
} }
/**
* Gets the first hit of the keyword.
*
* @param keyword
*
* @return Collection<KeywordHit> containing KeywordHits with lowest
* SolrObjectID-ChunkID pairs.
*/
private Collection<KeywordHit> getOneHitPerObject(Keyword keyword) {
HashMap<Long, KeywordHit> hits = new HashMap<>();
// create a list of KeywordHits. KeywordHits with lowest chunkID is added the the list.
for (KeywordHit hit : getResults(keyword)) {
if (!hits.containsKey(hit.getSolrObjectId())) {
hits.put(hit.getSolrObjectId(), hit);
} else if (hit.getChunkId() < hits.get(hit.getSolrObjectId()).getChunkId()) {
hits.put(hit.getSolrObjectId(), hit);
}
}
return hits.values();
}
/** /**
* Generate an ingest inbox message for given keyword in given file * Generate an ingest inbox message for given keyword in given file
* *

View File

@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.List; import java.util.List;
@ -580,8 +581,9 @@ public final class SearchRunner {
* previously seen a hit for the keyword. * previously seen a hit for the keyword.
* *
* @param queryResult The results returned by a keyword search. * @param queryResult The results returned by a keyword search.
* @return The set of hits found by the most recent search for objects * @return A unique set of hits found by the most recent search for objects
* that have not previously had a hit. * that have not previously had a hit. The hits will be for the lowest
* numbered chunk associated with the object.
* *
*/ */
private QueryResults filterResults(QueryResults queryResult) { private QueryResults filterResults(QueryResults queryResult) {
@ -596,6 +598,10 @@ public final class SearchRunner {
// This may well include duplicates of hits we've seen in earlier periodic searches. // This may well include duplicates of hits we've seen in earlier periodic searches.
List<KeywordHit> queryTermResults = queryResult.getResults(keyword); List<KeywordHit> queryTermResults = queryResult.getResults(keyword);
// Sort the hits for this keyword so that we are always
// guaranteed to return the hit for the lowest chunk.
Collections.sort(queryTermResults);
// This will be used to build up the hits we haven't seen before // This will be used to build up the hits we haven't seen before
// for this keyword. // for this keyword.
List<KeywordHit> newUniqueHits = new ArrayList<>(); List<KeywordHit> newUniqueHits = new ArrayList<>();