Merge pull request #6523 from dannysmyda/6997-Improve-Domain-Discovery-Thumbnail-Algorithm

6997 - Improve Domain Thumbnail Algorithm
This commit is contained in:
Richard Cordovano 2020-12-04 14:52:33 -05:00 committed by GitHub
commit 28c10838b8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -22,6 +22,7 @@ import com.google.common.cache.CacheLoader;
import java.awt.Image; import java.awt.Image;
import java.util.List; import java.util.List;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import org.sleuthkit.autopsy.coreutils.ImageUtils; import org.sleuthkit.autopsy.coreutils.ImageUtils;
import org.sleuthkit.datamodel.BlackboardArtifact; import org.sleuthkit.datamodel.BlackboardArtifact;
@ -38,17 +39,22 @@ import org.sleuthkit.datamodel.TskCoreException;
import org.openide.util.ImageUtilities; import org.openide.util.ImageUtilities;
/** /**
* Loads a thumbnail for the given request. Thumbnail candidates are JPEG files * Loads a thumbnail for the given request. Thumbnail candidates types are defined below.
* that have either TSK_WEB_DOWNLOAD or TSK_WEB_CACHE artifacts that match the * These candidates types must be the source of either TSK_WEB_DOWNLOAD or
* domain name (see the DomainSearch getArtifacts() API). JPEG files are sorted * TSK_WEB_CACHE artifacts that match the
* domain name (see the DomainSearch getArtifacts() API). Candidate files are sorted
* by most recent if sourced from TSK_WEB_DOWNLOADs and by size if sourced from * by most recent if sourced from TSK_WEB_DOWNLOADs and by size if sourced from
* TSK_WEB_CACHE artifacts. The first suitable thumbnail is selected. * TSK_WEB_CACHE artifacts. The first suitable thumbnail is selected.
*/ */
public class DomainSearchThumbnailLoader extends CacheLoader<DomainSearchThumbnailRequest, Image> { public class DomainSearchThumbnailLoader extends CacheLoader<DomainSearchThumbnailRequest, Image> {
private static final String UNSUPPORTED_IMAGE = "org/sleuthkit/autopsy/images/image-extraction-not-supported.png"; private static final String UNSUPPORTED_IMAGE = "org/sleuthkit/autopsy/images/image-extraction-not-supported.png";
private static final String JPG_EXTENSION = "jpg"; private static final List<String> SUPPORTED_EXTENSIONS = Arrays.asList("jpg", "svg", "png", "webp", "ico", "gif");
private static final String JPG_MIME_TYPE = "image/jpeg"; private static final List<String> SUPPORTED_MIMETYPES = Arrays.asList(
"image/gif", "image/jpeg", "image/png", "image/webp",
"image/svg+xml", "image/vnd.microsoft.icon", "image/x-icon");
private static final String ICO_EXTENSION = "ico";
private static final List<String> ICO_MIMETYPES = Arrays.asList("image/vnd.microsoft.icon", "image/x-icon");
private final DomainSearchArtifactsCache artifactsCache; private final DomainSearchArtifactsCache artifactsCache;
/** /**
@ -75,8 +81,21 @@ public class DomainSearchThumbnailLoader extends CacheLoader<DomainSearchThumbna
final DomainSearchArtifactsRequest webDownloadsRequest = new DomainSearchArtifactsRequest( final DomainSearchArtifactsRequest webDownloadsRequest = new DomainSearchArtifactsRequest(
caseDb, thumbnailRequest.getDomain(), TSK_WEB_DOWNLOAD); caseDb, thumbnailRequest.getDomain(), TSK_WEB_DOWNLOAD);
final List<BlackboardArtifact> webDownloads = artifactsCache.get(webDownloadsRequest); final List<BlackboardArtifact> webDownloads = artifactsCache.get(webDownloadsRequest);
final List<AbstractFile> webDownloadPictures = getJpegsFromWebDownload(caseDb, webDownloads); final List<AbstractFile> webDownloadPictures = getCandidatesFromWebDownloads(caseDb, webDownloads);
Collections.sort(webDownloadPictures, (file1, file2) -> Long.compare(file1.getCrtime(), file2.getCrtime())); Collections.sort(webDownloadPictures, (file1, file2) -> {
// Push ICO to the back of the sorted collection, so that ICO
// is a last resort matching type.
if (isIco(file1) && isIco(file2)) {
return Long.compare(file1.getCrtime(), file2.getCrtime());
} else if (isIco(file1)) {
return 1;
} else if (isIco(file2)) {
return -1;
} else {
return Long.compare(file1.getCrtime(), file2.getCrtime());
}
});
for (int i = webDownloadPictures.size() - 1; i >= 0; i--) { for (int i = webDownloadPictures.size() - 1; i >= 0; i--) {
if(Thread.currentThread().isInterrupted()) { if(Thread.currentThread().isInterrupted()) {
throw new InterruptedException(); throw new InterruptedException();
@ -92,8 +111,20 @@ public class DomainSearchThumbnailLoader extends CacheLoader<DomainSearchThumbna
final DomainSearchArtifactsRequest webCacheRequest = new DomainSearchArtifactsRequest( final DomainSearchArtifactsRequest webCacheRequest = new DomainSearchArtifactsRequest(
caseDb, thumbnailRequest.getDomain(), TSK_WEB_CACHE); caseDb, thumbnailRequest.getDomain(), TSK_WEB_CACHE);
final List<BlackboardArtifact> webCacheArtifacts = artifactsCache.get(webCacheRequest); final List<BlackboardArtifact> webCacheArtifacts = artifactsCache.get(webCacheRequest);
final List<AbstractFile> webCachePictures = getJpegsFromWebCache(caseDb, webCacheArtifacts); final List<AbstractFile> webCachePictures = getCandidatesFromWebCache(caseDb, webCacheArtifacts);
Collections.sort(webCachePictures, (file1, file2) -> Long.compare(file1.getSize(), file2.getSize())); Collections.sort(webCachePictures, (file1, file2) -> {
// Push ICO to the back of the sorted collection, so that ICO
// is a last resort matching type.
if (isIco(file1) && isIco(file2)) {
return Long.compare(file1.getSize(), file2.getSize());
} else if (isIco(file1)) {
return 1;
} else if (isIco(file2)) {
return -1;
} else {
return Long.compare(file1.getSize(), file2.getSize());
}
});
for (int i = webCachePictures.size() - 1; i >= 0; i--) { for (int i = webCachePictures.size() - 1; i >= 0; i--) {
if(Thread.currentThread().isInterrupted()) { if(Thread.currentThread().isInterrupted()) {
throw new InterruptedException(); throw new InterruptedException();
@ -109,40 +140,45 @@ public class DomainSearchThumbnailLoader extends CacheLoader<DomainSearchThumbna
} }
/** /**
* Finds all JPEG source files from TSK_WEB_DOWNLOAD instances. * Finds all supported image files from TSK_WEB_DOWNLOAD instances.
* *
* @param caseDb The case database being searched. * @param caseDb The case database being searched.
* @param artifacts The list of artifacts to get jpegs from. * @param artifacts The list of artifacts to search.
* *
* @return The list of AbstractFiles representing jpegs which were * @return The list of AbstractFiles representing supported images which were
* associated with the artifacts. * associated with the artifacts.
* *
* @throws TskCoreException * @throws TskCoreException
*/ */
private List<AbstractFile> getJpegsFromWebDownload(SleuthkitCase caseDb, List<BlackboardArtifact> artifacts) throws TskCoreException, InterruptedException { private List<AbstractFile> getCandidatesFromWebDownloads(SleuthkitCase caseDb, List<BlackboardArtifact> artifacts) throws TskCoreException, InterruptedException {
final List<AbstractFile> jpegs = new ArrayList<>(); final List<AbstractFile> candidates = new ArrayList<>();
for (BlackboardArtifact artifact : artifacts) { for (BlackboardArtifact artifact : artifacts) {
if(Thread.currentThread().isInterrupted()) { if(Thread.currentThread().isInterrupted()) {
throw new InterruptedException(); throw new InterruptedException();
} }
final Content sourceContent = caseDb.getContentById(artifact.getObjectID()); final Content sourceContent = caseDb.getContentById(artifact.getObjectID());
addIfJpeg(jpegs, sourceContent); addIfSupported(candidates, sourceContent);
} }
return jpegs; return candidates;
}
private boolean isIco(AbstractFile file) {
return ICO_EXTENSION.equals(file.getNameExtension())
|| ICO_MIMETYPES.contains(file.getMIMEType());
} }
/** /**
* Finds all JPEG source files from TSK_WEB_CACHE instances. * Finds all supported image files from TSK_WEB_CACHE instances.
* *
* @param caseDb The case database being searched. * @param caseDb The case database being searched.
* @param artifacts The list of artifacts to get jpegs from. * @param artifacts The list of artifacts to get images from.
* *
* @return The list of AbstractFiles representing jpegs which were * @return The list of AbstractFiles representing supported images which were
* associated with the artifacts. * associated with the artifacts.
*/ */
private List<AbstractFile> getJpegsFromWebCache(SleuthkitCase caseDb, List<BlackboardArtifact> artifacts) throws TskCoreException, InterruptedException { private List<AbstractFile> getCandidatesFromWebCache(SleuthkitCase caseDb, List<BlackboardArtifact> artifacts) throws TskCoreException, InterruptedException {
final BlackboardAttribute.Type TSK_PATH_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_PATH_ID); final BlackboardAttribute.Type TSK_PATH_ID = new BlackboardAttribute.Type(ATTRIBUTE_TYPE.TSK_PATH_ID);
final List<AbstractFile> jpegs = new ArrayList<>(); final List<AbstractFile> candidates = new ArrayList<>();
for (BlackboardArtifact artifact : artifacts) { for (BlackboardArtifact artifact : artifacts) {
if(Thread.currentThread().isInterrupted()) { if(Thread.currentThread().isInterrupted()) {
throw new InterruptedException(); throw new InterruptedException();
@ -150,24 +186,23 @@ public class DomainSearchThumbnailLoader extends CacheLoader<DomainSearchThumbna
final BlackboardAttribute tskPathId = artifact.getAttribute(TSK_PATH_ID); final BlackboardAttribute tskPathId = artifact.getAttribute(TSK_PATH_ID);
if (tskPathId != null) { if (tskPathId != null) {
final Content sourceContent = caseDb.getContentById(tskPathId.getValueLong()); final Content sourceContent = caseDb.getContentById(tskPathId.getValueLong());
addIfJpeg(jpegs, sourceContent); addIfSupported(candidates, sourceContent);
} }
} }
return jpegs; return candidates;
} }
/** /**
* Checks if the candidate source content is indeed a JPEG file. * Checks if the candidate source content is indeed a supported type.
* *
* @param files The list of source content files which are jpegs to * @param files The list of source content files which are supported.
* add to.
* @param sourceContent The source content to check and possibly add. * @param sourceContent The source content to check and possibly add.
*/ */
private void addIfJpeg(List<AbstractFile> files, Content sourceContent) { private void addIfSupported(List<AbstractFile> files, Content sourceContent) {
if ((sourceContent instanceof AbstractFile) && !(sourceContent instanceof DataSource)) { if ((sourceContent instanceof AbstractFile) && !(sourceContent instanceof DataSource)) {
final AbstractFile file = (AbstractFile) sourceContent; final AbstractFile file = (AbstractFile) sourceContent;
if (JPG_EXTENSION.equals(file.getNameExtension()) if (SUPPORTED_EXTENSIONS.contains(file.getNameExtension())
|| JPG_MIME_TYPE.equals(file.getMIMEType())) { || SUPPORTED_MIMETYPES.contains(file.getMIMEType())) {
files.add(file); files.add(file);
} }
} }