From f35bb56969fc89310102b2ff7d699f353423e66b Mon Sep 17 00:00:00 2001 From: adam-m Date: Mon, 18 Mar 2013 23:31:50 -0400 Subject: [PATCH 01/10] improve loading of png, bmp and large images (scale them to save memory in rendered canvas). Use javafx provided extensions instead of harcoding them. --- .../DataContentViewerMedia.java | 61 ++++++++++++++++--- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java index 4ae4f122bf..0f3e782bad 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java @@ -23,6 +23,7 @@ import java.awt.Component; import java.awt.Dimension; import java.awt.EventQueue; import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.IntBuffer; @@ -38,6 +39,7 @@ import javafx.scene.Group; import javafx.scene.Scene; import javafx.scene.image.Image; import javafx.scene.image.ImageView; +import javax.imageio.ImageIO; import org.sleuthkit.autopsy.coreutils.Logger; import javax.swing.BoxLayout; import javax.swing.SwingUtilities; @@ -71,7 +73,7 @@ import org.sleuthkit.datamodel.TskData.TSK_FS_NAME_FLAG_ENUM; }) public class DataContentViewerMedia extends javax.swing.JPanel implements DataContentViewer, FrameCapture { - private static final String[] IMAGES = new String[]{".jpg", ".jpeg", ".png", ".gif", ".jpe", ".bmp"}; + private String[] IMAGES; // use javafx supported private static final String[] VIDEOS = new String[]{".mov", ".m4v", ".flv", ".mp4", ".3gp", ".avi", ".mpg", ".mpeg"}; private static final String[] AUDIOS = new String[]{".mp3", ".wav", ".wma"}; private static final int NUM_FRAMES = 12; @@ -106,7 +108,18 @@ public class DataContentViewerMedia extends javax.swing.JPanel implements DataCo logger.log(Level.INFO, "Initializing JavaFX for image viewing"); } }); - + logger.log(Level.INFO, "Supported image formats by javafx image viewer: "); + + //initialize supported image types + //TODO use mime-types instead once we have support + String[] fxSupportedImagesSuffixes = ImageIO.getReaderFileSuffixes(); + IMAGES = new String[fxSupportedImagesSuffixes.length]; + for (int i=0; i Date: Tue, 19 Mar 2013 11:12:04 -0400 Subject: [PATCH 02/10] event better image scaling (using scalr) and better handling of huge hi-res images --- .../DataContentViewerMedia.java | 37 ++++++++------- CoreLibs/ivy.xml | 3 ++ CoreLibs/nbproject/project.properties | 3 ++ CoreLibs/nbproject/project.xml | 21 +++++++++ .../autopsy/corelibs/ScalrWrapper.java | 47 +++++++++++++++++++ 5 files changed, 93 insertions(+), 18 deletions(-) create mode 100644 CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java index 0f3e782bad..d44324d048 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerMedia.java @@ -35,6 +35,7 @@ import java.util.concurrent.TimeUnit; import java.util.logging.Level; import javafx.application.Platform; import javafx.embed.swing.JFXPanel; +import javafx.embed.swing.SwingFXUtils; import javafx.scene.Group; import javafx.scene.Scene; import javafx.scene.image.Image; @@ -54,10 +55,12 @@ import org.netbeans.api.progress.ProgressHandle; import org.netbeans.api.progress.ProgressHandleFactory; import org.openide.nodes.Node; import org.openide.util.Cancellable; +import org.openide.util.Exceptions; import org.openide.util.lookup.ServiceProvider; import org.openide.util.lookup.ServiceProviders; import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.corecomponentinterfaces.DataContentViewer; +import org.sleuthkit.autopsy.corelibs.ScalrWrapper; import org.sleuthkit.autopsy.coreutils.MessageNotifyUtil; import org.sleuthkit.autopsy.datamodel.ContentUtils; import org.sleuthkit.datamodel.AbstractFile; @@ -303,31 +306,29 @@ public class DataContentViewerMedia extends javax.swing.JPanel implements DataCo public void run() { Dimension dims = DataContentViewerMedia.this.getSize(); - //reading all bytes first, then passing to byte array input stream - //becase otherwise does not work for png for some reason - //and we need to load all bytes anyways before scaling - long fileSize = file.getSize(); - byte[] imageBytes = new byte[(int)fileSize]; - final InputStream inputStream = new ReadContentInputStream(file); + + final Image fxImage; try { - inputStream.read(imageBytes, 0, imageBytes.length); + //original input stream + BufferedImage bi = ImageIO.read(inputStream); + //scale image using Scalr + BufferedImage biScaled = ScalrWrapper.resizeHighQuality(bi, (int)dims.getWidth(), (int)dims.getHeight()); + //convert from awt imageto fx image + fxImage = SwingFXUtils.toFXImage(biScaled, null); } catch (IOException ex) { logger.log(Level.WARNING, "Could not load image file into media view: " + fileName, ex); - //MessageNotifyUtil.Message.warn("Could not load image file: " + file.getName() + ", " + ex.getMessage()); return; } - - final InputStream byteInputStream = new ByteArrayInputStream(imageBytes); - - //scaled down the image while loading to save memory - final Image fxImage; - if (fileName.toLowerCase().endsWith(".bmp")) { - //bmp does not work with scaling - fxImage = new Image(byteInputStream); + catch (OutOfMemoryError ex) { + logger.log(Level.WARNING, "Could not load image file into media view (too large): " + fileName, ex); + MessageNotifyUtil.Notify.warn("Could not load image file (too large): " + file.getName(), ex.getMessage()); + return; } - else { - fxImage = new Image(byteInputStream, dims.getWidth(), dims.getHeight(), true, true); + + if (fxImage == null) { + logger.log(Level.WARNING, "Could not load image file into media view: " + fileName); + return; } // simple displays ImageView the image as is diff --git a/CoreLibs/ivy.xml b/CoreLibs/ivy.xml index 4477f81966..7a07b8f459 100644 --- a/CoreLibs/ivy.xml +++ b/CoreLibs/ivy.xml @@ -28,5 +28,8 @@ + + + diff --git a/CoreLibs/nbproject/project.properties b/CoreLibs/nbproject/project.properties index e800cc1427..53770338f0 100644 --- a/CoreLibs/nbproject/project.properties +++ b/CoreLibs/nbproject/project.properties @@ -12,6 +12,7 @@ file.reference.geronimo-jms_1.1_spec-1.0.jar=release/modules/ext/geronimo-jms_1. file.reference.gson-1.4.jar=release/modules/ext/gson-1.4.jar file.reference.gstreamer-java-1.5.jar=release/modules/ext/gstreamer-java-1.5.jar file.reference.guava-11.0.2.jar=release/modules/ext/guava-11.0.2.jar +file.reference.imgscalr-lib-4.2.jar=release/modules/ext/imgscalr-lib-4.2.jar file.reference.javaee-api-5.0-2.jar=release/modules/ext/javaee-api-5.0-2.jar file.reference.javassist-3.12.1.GA.jar=release/modules/ext/javassist-3.12.1.GA.jar file.reference.jcalendarbutton-1.4.6.jar=release/modules/ext/jcalendarbutton-1.4.6.jar @@ -29,6 +30,8 @@ file.reference.poi-ooxml-schemas-3.8.jar=release/modules/ext/poi-ooxml-schemas-3 file.reference.poi-scratchpad-3.8.jar=release/modules/ext/poi-scratchpad-3.8.jar file.reference.reflections-0.9.8.jar=release/modules/ext/reflections-0.9.8.jar file.reference.servlet-api-2.5.jar=release/modules/ext/servlet-api-2.5.jar +file.reference.sigar-1.6.4-sources.jar=release/modules/ext/sigar-1.6.4-sources.jar +file.reference.sigar-1.6.4.jar=release/modules/ext/sigar-1.6.4.jar file.reference.slf4j-api-1.6.1.jar=release/modules/ext/slf4j-api-1.6.1.jar file.reference.slf4j-simple-1.6.1.jar=release/modules/ext/slf4j-simple-1.6.1.jar file.reference.stax-api-1.0.1.jar=release/modules/ext/stax-api-1.0.1.jar diff --git a/CoreLibs/nbproject/project.xml b/CoreLibs/nbproject/project.xml index 8d4f053403..20093c86e4 100644 --- a/CoreLibs/nbproject/project.xml +++ b/CoreLibs/nbproject/project.xml @@ -661,6 +661,7 @@ org.hyperic.sigar.util org.hyperic.sigar.vmware org.hyperic.sigar.win32 + org.imgscalr org.jbundle.thin.base.screen.jcalendarbutton org.openxmlformats.schemas.drawingml.x2006.chart org.openxmlformats.schemas.drawingml.x2006.chart.impl @@ -774,6 +775,10 @@ ext/ant-1.8.2.jar release/modules/ext/ant-1.8.2.jar + + ext/commons-lang-2.4-javadoc.jar + release/modules/ext/commons-lang-2.4-javadoc.jar + ext/stax-api-1.0.1.jar release/modules/ext/stax-api-1.0.1.jar @@ -786,6 +791,14 @@ ext/reflections-0.9.8.jar release/modules/ext/reflections-0.9.8.jar + + ext/imgscalr-lib-4.2-sources.jar + release/modules/ext/imgscalr-lib-4.2-sources.jar + + + ext/imgscalr-lib-4.2-javadoc.jar + release/modules/ext/imgscalr-lib-4.2-javadoc.jar + ext/jna-3.4.0.jar release/modules/ext/jna-3.4.0.jar @@ -858,6 +871,10 @@ ext/commons-codec-1.5.jar release/modules/ext/commons-codec-1.5.jar + + ext/imgscalr-lib-4.2.jar + release/modules/ext/imgscalr-lib-4.2.jar + ext/poi-ooxml-schemas-3.8.jar release/modules/ext/poi-ooxml-schemas-3.8.jar @@ -870,6 +887,10 @@ ext/guava-11.0.2.jar release/modules/ext/guava-11.0.2.jar + + ext/commons-lang-2.4-sources.jar + release/modules/ext/commons-lang-2.4-sources.jar + ext/poi-excelant-3.8.jar release/modules/ext/poi-excelant-3.8.jar diff --git a/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java b/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java new file mode 100644 index 0000000000..7b74ee1f10 --- /dev/null +++ b/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java @@ -0,0 +1,47 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2013 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.sleuthkit.autopsy.corelibs; + +import java.awt.image.BufferedImage; +import org.imgscalr.Scalr; +import org.imgscalr.Scalr.Method; + +/** + * Scalr wrapper to deal with exports and provide thread-safety + * + */ +public class ScalrWrapper { + + public static synchronized BufferedImage resize(BufferedImage input, int width, int height) { + return Scalr.resize(input, width, height); + } + + public static synchronized BufferedImage resize(BufferedImage input, int size) { + return Scalr.resize(input, size); + } + + public static synchronized BufferedImage resizeHighQuality(BufferedImage input, int width, int height) { + return Scalr.resize(input, Method.QUALITY, width, height, Scalr.OP_ANTIALIAS); + } + + public static synchronized BufferedImage resizeFast(BufferedImage input, int size) { + return Scalr.resize(input, Method.SPEED, size, Scalr.OP_ANTIALIAS); + } +} From 5d13bdbb31a67ae579bc132ce8e7812075bef500 Mon Sep 17 00:00:00 2001 From: adam-m Date: Tue, 19 Mar 2013 12:10:12 -0400 Subject: [PATCH 03/10] much better thumbnail viewer: add support for bmp, faster thumnail generation and better quality looking. Simplified code, by using scalr. --- .../corecomponents/ThumbnailViewChildren.java | 14 ++- .../corecomponents/ThumbnailViewNode.java | 112 ++++++------------ .../autopsy/corelibs/ScalrWrapper.java | 10 +- 3 files changed, 59 insertions(+), 77 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewChildren.java b/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewChildren.java index ed3d29553d..0f4508ac0b 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewChildren.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewChildren.java @@ -22,6 +22,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.logging.Level; +import javax.imageio.ImageIO; import org.openide.nodes.AbstractNode; import org.openide.nodes.Children; import org.openide.nodes.Node; @@ -154,7 +156,17 @@ class ThumbnailViewChildren extends Children.Keys { private static class IsSupportedContentVisitor extends ContentVisitor.Default { - private static final List SUPP_EXTENSIONS = Arrays.asList(".jpeg", ".jpg", ".gif", ".png"); + private final List SUPP_EXTENSIONS; + + IsSupportedContentVisitor() { + String[] supportedImagesSuffixes = ImageIO.getReaderFileSuffixes(); + + SUPP_EXTENSIONS = new ArrayList(supportedImagesSuffixes.length); + for (int i = 0; i < supportedImagesSuffixes.length; ++i) { + String suffix = supportedImagesSuffixes[i]; + SUPP_EXTENSIONS.add("." + suffix); + } + } @Override public Boolean visit(DerivedFile f) { diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewNode.java b/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewNode.java index 97d23f267f..0acf0f9334 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewNode.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/ThumbnailViewNode.java @@ -26,6 +26,7 @@ import java.awt.Toolkit; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.lang.ref.SoftReference; import java.util.logging.Level; import javax.imageio.ImageIO; @@ -35,32 +36,39 @@ import org.openide.nodes.Children; import org.openide.nodes.FilterNode; import org.openide.nodes.Node; import org.sleuthkit.autopsy.casemodule.Case; +import org.sleuthkit.autopsy.corelibs.ScalrWrapper; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.datamodel.Content; +import org.sleuthkit.datamodel.ReadContentInputStream; import org.sleuthkit.datamodel.TskException; /** - * Node that wraps around original node and adds the bitmap icon representing the picture + * Node that wraps around original node and adds the bitmap icon representing + * the picture */ class ThumbnailViewNode extends FilterNode { private SoftReference iconCache; - private static final Image defaultIcon = new ImageIcon("/org/sleuthkit/autopsy/images/file-icon.png").getImage(); + private static final Logger logger = Logger.getLogger(ThumbnailViewNode.class.getName()); + //private final BufferedImage defaultIconBI; - /** the constructor */ + /** + * the constructor + */ ThumbnailViewNode(Node arg) { super(arg, Children.LEAF); } @Override - public String getDisplayName(){ - if(super.getDisplayName().length() > 15) + public String getDisplayName() { + if (super.getDisplayName().length() > 15) { return super.getDisplayName().substring(0, 15).concat("..."); - else + } else { return super.getDisplayName(); + } } - + @Override public Image getIcon(int type) { Image icon = null; @@ -68,12 +76,11 @@ class ThumbnailViewNode extends FilterNode { if (iconCache != null) { icon = iconCache.get(); } - - - + + if (icon == null) { Content content = this.getLookup().lookup(Content.class); - + if (content != null) { if (getFile(content.getId()).exists()) { try { @@ -84,85 +91,44 @@ class ThumbnailViewNode extends FilterNode { } else { try { icon = generateIcon(content); - ImageIO.write(toBufferedImage(icon), "jpg", getFile(content.getId())); - } catch (TskException ex) { - icon = ThumbnailViewNode.defaultIcon; + if (icon == null) { + icon = ThumbnailViewNode.defaultIcon; + } + else { + ImageIO.write((BufferedImage) icon, "jpg", getFile(content.getId())); + } } catch (IOException ex) { + logger.log(Level.WARNING, "Could not write cache thumbnail: " + content, ex); } } } else { icon = ThumbnailViewNode.defaultIcon; } - + iconCache = new SoftReference(icon); } return icon; } - static private Image generateIcon(Content content) throws TskException { - byte[] data = new byte[(int)content.getSize()]; - int bytesRead = content.read(data, 0, content.getSize()); - - if (bytesRead < 1) + /* + * Generate a scaled image + */ + static private BufferedImage generateIcon(Content content) { + + try { + final InputStream inputStream = new ReadContentInputStream(content); + BufferedImage bi = ImageIO.read(inputStream); + + BufferedImage biScaled = ScalrWrapper.resizeFast(bi, 100, 100); + return biScaled; + } catch (Exception e) { + logger.log(Level.WARNING, "Could not scale image: " + content.getName(), e); return null; - - Image result = Toolkit.getDefaultToolkit().createImage(data); - - // scale the image - MediaTracker mTracker = new MediaTracker(new JFrame()); - mTracker.addImage(result, 1); - try { - mTracker.waitForID(1); - } catch (InterruptedException ex) { - // TODO: maybe make bubble instead - Logger.getLogger(ThumbnailViewNode.class.getName()).log(Level.WARNING, "Error while trying to scale the icon.", ex); } - int width = result.getWidth(null); - int height = result.getHeight(null); - - int max = Math.max(width, height); - double scale = (75 * 100) / max; - - // getScaledInstance can't take have width or height be 0, so round - // up by adding 1 after truncating to int. - width = (int) ((width * scale) / 100) + 1; - height = (int) ((height * scale) / 100) + 1; - - result = result.getScaledInstance(width, height, Image.SCALE_SMOOTH); - - // load the image completely - mTracker.addImage(result, 1); - try { - mTracker.waitForID(1); - } catch (InterruptedException ex) { - // TODO: maybe make bubble instead - Logger.getLogger(ThumbnailViewNode.class.getName()).log(Level.WARNING, "Error while trying to load the icon.", ex); - } - - // create 75x75 image for the icon with the icon on the center - BufferedImage combined = new BufferedImage(75, 75, BufferedImage.TYPE_INT_ARGB); - Graphics2D g = (Graphics2D) combined.getGraphics(); - g.setColor(Color.WHITE); - g.setBackground(Color.WHITE); - g.drawImage(result, (75 - width) / 2, (75 - height) / 2, null); - - return Toolkit.getDefaultToolkit().createImage(combined.getSource()); } - private static BufferedImage toBufferedImage(Image src) { - int w = src.getWidth(null); - int h = src.getHeight(null); - int type = BufferedImage.TYPE_INT_RGB; // other options - BufferedImage dest = new BufferedImage(w, h, type); - Graphics2D g2 = dest.createGraphics(); - g2.drawImage(src, 0, 0, null); - g2.dispose(); - return dest; - } - private static File getFile(long id) { return new File(Case.getCurrentCase().getCacheDirectory() + File.separator + id + ".jpg"); } - } diff --git a/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java b/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java index 7b74ee1f10..3884133984 100644 --- a/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java +++ b/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java @@ -30,11 +30,11 @@ import org.imgscalr.Scalr.Method; public class ScalrWrapper { public static synchronized BufferedImage resize(BufferedImage input, int width, int height) { - return Scalr.resize(input, width, height); + return Scalr.resize(input, width, height, Scalr.OP_ANTIALIAS); } public static synchronized BufferedImage resize(BufferedImage input, int size) { - return Scalr.resize(input, size); + return Scalr.resize(input, size, Scalr.OP_ANTIALIAS); } public static synchronized BufferedImage resizeHighQuality(BufferedImage input, int width, int height) { @@ -42,6 +42,10 @@ public class ScalrWrapper { } public static synchronized BufferedImage resizeFast(BufferedImage input, int size) { - return Scalr.resize(input, Method.SPEED, size, Scalr.OP_ANTIALIAS); + return Scalr.resize(input, Method.SPEED, Scalr.Mode.FIT_TO_WIDTH, size, Scalr.OP_ANTIALIAS); + } + + public static synchronized BufferedImage resizeFast(BufferedImage input, int width, int height) { + return Scalr.resize(input, Method.SPEED, Scalr.Mode.AUTOMATIC, width, height, Scalr.OP_ANTIALIAS); } } From fa52bcd708b1fb3741edd2216a0312180f8411a2 Mon Sep 17 00:00:00 2001 From: adam-m Date: Tue, 19 Mar 2013 12:18:32 -0400 Subject: [PATCH 04/10] updated news --- NEWS.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.txt b/NEWS.txt index bc41caccee..8faa11f448 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -15,6 +15,7 @@ Bugfixes: - fixed directory tree history being reset when tree is refreshed. - exif module better jpeg detection using signature and not only file extension. - The "media view" tab is inactive for deleted files (#165) +- improved image loading in Media View and Thumbnail View (faster loading, handles large files better) ---------------- VERSION 3.0.4 -------------- From 555a15016fda06d2cc307a664a13a98130389c4c Mon Sep 17 00:00:00 2001 From: adam-m Date: Tue, 19 Mar 2013 12:18:42 -0400 Subject: [PATCH 05/10] updated scalr wrapper --- CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java b/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java index 3884133984..fce800e30b 100644 --- a/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java +++ b/CoreLibs/src/org/sleuthkit/autopsy/corelibs/ScalrWrapper.java @@ -42,7 +42,7 @@ public class ScalrWrapper { } public static synchronized BufferedImage resizeFast(BufferedImage input, int size) { - return Scalr.resize(input, Method.SPEED, Scalr.Mode.FIT_TO_WIDTH, size, Scalr.OP_ANTIALIAS); + return Scalr.resize(input, Method.SPEED, Scalr.Mode.AUTOMATIC, size, Scalr.OP_ANTIALIAS); } public static synchronized BufferedImage resizeFast(BufferedImage input, int width, int height) { From 33a308a75524a8f475427bc3a062d13de7ac4dcb Mon Sep 17 00:00:00 2001 From: adam-m Date: Tue, 19 Mar 2013 15:01:12 -0400 Subject: [PATCH 06/10] Keyword search changes to use tika mime type detection rather than file extensions. TODO fine-tune mime types. --- .../keywordsearch/AbstractFileExtract.java | 90 +++++++++++++++---- .../AbstractFileHtmlExtract.java | 33 ++++--- .../AbstractFileStringExtract.java | 38 ++++---- .../AbstractFileTikaTextExtract.java | 56 +++++------- .../KeywordSearchIngestModule.java | 79 +++++++++++----- NEWS.txt | 4 +- 6 files changed, 194 insertions(+), 106 deletions(-) diff --git a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java index c196533875..dcb8b25728 100644 --- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java +++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java @@ -16,9 +16,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.sleuthkit.autopsy.keywordsearch; +import java.util.Arrays; import java.util.List; import java.util.Map; import org.sleuthkit.autopsy.coreutils.StringExtract.StringExtractUnicodeTable.SCRIPT; @@ -29,72 +29,124 @@ import org.sleuthkit.datamodel.AbstractFile; * chunks */ interface AbstractFileExtract { - + /** * Common options that can be used by some extractors */ enum ExtractOptions { + EXTRACT_UTF16, ///< extract UTF16 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString() EXTRACT_UTF8, ///< extract UTF8 text, possible values Boolean.TRUE.toString(), Boolean.FALSE.toString() }; + + //generally text extractors should ignore archives + //and let unpacking modules take case of them + static final List ARCHIVE_MIME_TYPES = + Arrays.asList( + //ignore unstructured binary and compressed data, for which string extraction or unzipper works better + "application/x-7z-compressed", + "application/x-ace-compressed", + "application/x-alz-compressed", + "application/x-arj", + "application/vnd.ms-cab-compressed", + "application/x-cfs-compressed", + "application/x-dgc-compressed", + "application/x-apple-diskimage", + "application/x-gca-compressed", + "application/x-dar", + "application/x-lzx", + "application/x-lzh", + "application/x-rar-compressed", + "application/x-stuffit", + "application/x-stuffitx", + "application/x-gtar", + "application/x-archive", + "application/x-executable", + "application/x-gzip", + "application/zip", + "application/x-zoo", + "application/x-cpio", + "application/x-shar", + "application/x-tar", + "application/x-bzip", + "application/x-bzip2", + "application/x-lzip", + "application/x-lzma", + "application/x-lzop", + "application/x-z", + "application/x-compress"); /** * Get number of chunks resulted from extracting this AbstractFile + * * @return the number of chunks produced */ int getNumChunks(); /** * Get the source file associated with this extraction + * * @return the source AbstractFile */ AbstractFile getSourceFile(); /** * Index the Abstract File + * * @param sourceFile file to index * @return true if indexed successfully, false otherwise - * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException + * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException */ boolean index(AbstractFile sourceFile) throws Ingester.IngesterException; - + /** * Sets the scripts to use for the extraction + * * @param extractScripts scripts to use - * @return true if extractor supports script - specific extraction, false otherwise + * @return true if extractor supports script - specific extraction, false + * otherwise */ boolean setScripts(List