Merge changes

2025-07-06 21:00:22 +00:00 · 2012-07-20 12:23:14 -04:00 · 2012-07-20 12:23:14 -04:00 · df92a2a8d2
commit df92a2a8d2
parent 5cdc24fb78 c12bb2a75b
54 changed files with 1990 additions and 479 deletions
--- a/Case/src/org/sleuthkit/autopsy/casemodule/Case.java
+++ b/Case/src/org/sleuthkit/autopsy/casemodule/Case.java
@ -44,6 +44,7 @@ import org.openide.util.actions.CallableSystemAction;
 import org.openide.util.actions.SystemAction;
 import org.openide.windows.WindowManager;
 import org.sleuthkit.autopsy.corecomponentinterfaces.CoreComponentControl;
+import org.sleuthkit.autopsy.coreutils.FileUtil;
 import org.sleuthkit.autopsy.coreutils.Log;
 import org.sleuthkit.autopsy.coreutils.Version;
 import org.sleuthkit.datamodel.*;
@ -681,17 +682,7 @@ public class Case {
     * @return boolean  whether the case directory is successfully deleted or not
     */
    static boolean deleteCaseDirectory(File casePath) {
-        if (casePath.exists()) {
-            File[] files = casePath.listFiles();
-            for (int i = 0; i < files.length; i++) {
-                if (files[i].isDirectory()) {
-                    deleteCaseDirectory(files[i]);
-                } else {
-                    files[i].delete();
-                }
-            }
-        }
-        return (casePath.delete());
+        return FileUtil.deleteDir(casePath);
    }

    /**
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/corecomponentinterfaces/package.dox
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/corecomponentinterfaces/package.dox
@ -2,9 +2,9 @@
 * \package org.sleuthkit.autopsy.corecomponentinterfaces
 * This package contains the interface classes that define the core components in Autopsy.  These components are used in the difference zones of the GUI.
 * <h2>Autopsy Zones</h2>
- * There are three major zones in the Autopsy UI. The left hand side has the {@link org.sleuthkit.autopsy.corecomponentinterfaces.DataExplorer DataExplorer} zone. This area is where you can search for and explore data. It has all of the analysis smarts. An example of a DataExplorer is the directory tree that shows the hierarchy of directories (and hides the files from view).
- * The DataExplorer area identifies a subset of the data to show the user and passes the data to the {@link org.sleuthkit.autopsy.corecomponentinterfaces.DataResult DataResult}s area in the upper right. In the previous example, the contents of a specific folder would be passed to this area and displayed in a table or thumbnail form.</p>
- * When a file or object is selected in the DataResult, it is passed to the {@link org.sleuthkit.autopsy.corecomponentinterfaces.DataContent DataContent} zone in the lower right. This is where file content can be viewed in hex form, strings, etc. 
+ * There are three major zones in the Autopsy UI. The left hand side has the org.sleuthkit.autopsy.corecomponentinterfaces.DataExplorer zone. This area is where you can search for and explore data. It has all of the analysis smarts. An example of a DataExplorer is the directory tree that shows the hierarchy of directories (and hides the files from view).
+ * The DataExplorer area identifies a subset of the data to show the user and passes the data to the org.sleuthkit.autopsy.corecomponentinterfaces.DataResult area in the upper right. In the previous example, the contents of a specific folder would be passed to this area and displayed in a table or thumbnail form.</p>
+ * When a file or object is selected in the DataResult, it is passed to the org.sleuthkit.autopsy.corecomponentinterfaces.DataContent zone in the lower right. This is where file content can be viewed in hex form, strings, etc. 
 * <h2>Data Flow</h2>
 * <h3>Creating Nodes in DataExplorer</h3>
 * Data flows between the areas inside of a NetBeans node. The DataExplorer modules create the NetBeans nodes. They query the SQLite database or do whatever they want to identify the set of files that are of interest. They create the NetBeans nodes based on Sleuthkit data model objects. See the org.sleuthkit.autopsy.datamodel package for more details on this. 
@ -13,7 +13,7 @@
 * The DataExplorer is responsible for setting the double-click and right-click actions associated with the node.  The default single click action is to pass data to DataContent.  To override this, you must create a new DataResultViewer instance that overrides the propertyChange() method. The DataExplorer adds actions to wrapping the node in a FilterNode variant. The FilterNode then defines the actions for the node by overriding the getPreferredAction() and getActions() methods.  As an example, org.sleuthkit.autopsy.directorytree.DataResultFilterNode and org.sleuthkit.autopsy.directorytree.DataResultFilterChildren wraps the nodes that are passed over by the DirectoryTree DataExplorer.
 * DataResult can send data back to its DataExplorer by making a custom action that looks up it's instance (DataExplorer.getInstance()).
 * <h3>Getting Nodes to DataContent </h3>
- * A default DataContent viewer is created when a case is opened. To display the contents of a node, it must be passed to a DataContent instance.  The default single-click behavior of the DataResultViewers is to lookup the default DataContent TopComponent and pass the selected node to it.   See {@link org.sleuthkit.autopsy.corecomponents.AbstractDataResultViewer#propertyChange(PropertyChangeEvent) AbstractDataResultViewer.propertyChange()} for details. 
+ * A default DataContent viewer is created when a case is opened. To display the contents of a node, it must be passed to a DataContent instance.  The default single-click behavior of the DataResultViewers is to lookup the default DataContent TopComponent and pass the selected node to it.   See org.sleuthkit.autopsy.corecomponents.AbstractDataResultViewer.propertyChange(PropertyChangeEvent) for details. 
 * <h2>Creating new Functionality</h2>
 * <h3>Creating a DataExplorer</h3>
 * <ol>
@ -32,7 +32,7 @@
 * <li>Utilities API
 * <li>Window System API
 * </ul>
- * <li> Create a class that implements {@link org.sleuthkit.autopsy.corecomponentinterfaces.DataExplorer DataExplorer}. We have been making the TopComponent class be the one that implements DataExplorer. Register this class as a DataExplorer service provider by specifying "@ServiceProvider(service=DataExplorer.class)" in the class or using layer.xml. 
+ * <li> Create a class that implements org.sleuthkit.autopsy.corecomponentinterfaces.DataExplorer. We have been making the TopComponent class be the one that implements DataExplorer. Register this class as a DataExplorer service provider by specifying "@ServiceProvider(service=DataExplorer.class)" in the class or using layer.xml. 
 * <li>Implement the methods required by the DataExplorer interface. 
 * <li>Register the class to receive property change events from the org.sleuthkit.autopsy.Case module by using its addPropertyChangeListener() method. 
 * <li>Access case data using the org.sleuthkit.autopsy.Case module. 
@ -58,7 +58,7 @@
 * <li>Utilities API
 * <li>Window System API
 * </ul>
- * <li>Make a class that extends {@link org.sleuthkit.autopsy.corecomponents.AbstractDataResultViewer#AbstractDataResultViewer() AbstractDataResultViewer} and is registered as a service provider for the {@link org.sleuthkit.autopsy.corecomponentinterfaces.DataResultViewer DataResultViewer} class by specifying "@ServiceProvider(service = DataResultViewer.class)" or by using layer.xml.  This class will extend JPanel. </li>
+ * <li>Make a class that extends org.sleuthkit.autopsy.corecomponents.AbstractDataResultViewer and is registered as a service provider for the org.sleuthkit.autopsy.corecomponentinterfaces.DataResultViewer class by specifying "@ServiceProvider(service = DataResultViewer.class)" or by using layer.xml.  This class will extend JPanel. </li>
 * <li>See the previous sections on default actions.</li>
 * </ol>
 * <h3>Creating a DataContentViewer</h3>
@ -79,6 +79,6 @@
 * <li>Utilities API
 * <li>Window System API
 * </ul>
- * <li>Make a class that implements {@link org.sleuthkit.autopsy.corecomponentinterfaces.DataContentViewer DataContentViewer} and is registered as a service provider for DataContentViewer.class by specifying "@ServiceProvider(service = DataContentViewer.class)" or by using layer.xml.  This class must extend JPanel. </li>
+ * <li>Make a class that implements org.sleuthkit.autopsy.corecomponentinterfaces.DataContentViewer and is registered as a service provider for DataContentViewer.class by specifying "@ServiceProvider(service = DataContentViewer.class)" or by using layer.xml.  This class must extend JPanel. </li>
 * </ol>
 */
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/audio-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/audio-file.png
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/doc-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/doc-file.png
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/exe-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/exe-file.png
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/image-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/image-file.png
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/pdf-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/pdf-file.png
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/text-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/text-file.png
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/video-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/video-file.png
--- a/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/web-file.png
+++ b/CoreComponentInterfaces/src/org/sleuthkit/autopsy/images/web-file.png
--- a/CoreUtils/src/org/sleuthkit/autopsy/coreutils/FileUtil.java
+++ b/CoreUtils/src/org/sleuthkit/autopsy/coreutils/FileUtil.java
@ -0,0 +1,91 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2012 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.sleuthkit.autopsy.coreutils;
+
+import java.io.File;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+/**
+ * File and dir utilities
+ */
+public class FileUtil {
+    private static final Logger logger = Logger.getLogger(FileUtil.class.getName());
+
+    /**
+     * Recursively delete a dir
+     *
+     * @param dirPath dir path of a dir to delete
+     * @return true if the dir deleted, false otherwise
+     */
+    public static boolean deleteDir(File dirPath) {
+        if (dirPath.exists()) {
+            File[] files = dirPath.listFiles();
+            for (int i = 0; i < files.length; i++) {
+                if (files[i].isDirectory()) {
+                    deleteDir(files[i]);
+                } else {
+                    files[i].delete();
+                }
+            }
+        }
+        return (dirPath.delete());
+
+    }
+    
+    /**
+     * Check if given path is a file or directory, then delete it through
+     * recursion with path leading to a file as the base case.
+     * 
+     * @param path  the path to the file or directory to delete
+     * @return  true if the File at path is deleted, false otherwise
+     */
+    public static boolean deleteFileDir(File path) {
+            boolean sucess = true;
+            if(path.isFile()) { // If it's a file
+                if(!path.delete()) {
+                    sucess = false;
+                    logger.log(Level.WARNING, "Failed to delete file {0}", path.getPath());
+                }
+            } else { // If it's a directory
+                if(path.list().length==0) { // If the dir is empty
+                    if(!path.delete()) {
+                        sucess = false;
+                        logger.log(Level.WARNING, "Failed to delete the empty directory at {0}", path.getPath());
+                    }
+                } else {
+                    String files[] = path.list();
+                    for(String s:files) {
+                        File sub = new File(path, s);
+                        sucess = deleteFileDir(sub);
+                    }
+                    if(path.list().length==0) { // Delete the newly-empty dir
+                        if(!path.delete()) {
+                            sucess = false;
+                            logger.log(Level.WARNING, "Failed to delete the empty directory at {0}", path.getPath());
+                        }
+                    } else {
+                        sucess = false;
+                        logger.log(Level.WARNING, "Directory {0} did not recursivly delete sucessfully.", path.getPath());
+                    }
+                }
+            }
+            return sucess;
+        }
+}
--- a/DataModel/nbproject/project.xml
+++ b/DataModel/nbproject/project.xml
@ -6,6 +6,15 @@
            <code-name-base>org.sleuthkit.autopsy.datamodel</code-name-base>
            <suite-component/>
            <module-dependencies>
+                <dependency>
+                    <code-name-base>org.netbeans.api.progress</code-name-base>
+                    <build-prerequisite/>
+                    <compile-dependency/>
+                    <run-dependency>
+                        <release-version>1</release-version>
+                        <specification-version>1.28.1</specification-version>
+                    </run-dependency>
+                </dependency>
                <dependency>
                    <code-name-base>org.openide.awt</code-name-base>
                    <build-prerequisite/>
--- a/DataModel/src/org/sleuthkit/autopsy/datamodel/ContentUtils.java
+++ b/DataModel/src/org/sleuthkit/autopsy/datamodel/ContentUtils.java
@ -28,6 +28,8 @@ import java.util.List;
 import java.util.TimeZone;
 import java.util.logging.Level;
 import java.util.logging.Logger;
+import javax.swing.SwingWorker;
+import org.netbeans.api.progress.ProgressHandle;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.ContentVisitor;
 import org.sleuthkit.datamodel.Directory;
@ -214,25 +216,47 @@ public final class ContentUtils {
     * it does
     * @throws IOException 
     */
-    public static void writeToFile(Content content, java.io.File outputFile) throws IOException {
+    public static void writeToFile(Content content, java.io.File outputFile, ProgressHandle progress, SwingWorker worker, boolean source) throws IOException {

        InputStream in = new ReadContentInputStream(content);
        
        boolean append = false;
        FileOutputStream out = new FileOutputStream(outputFile, append);
        
+        // Get the unit size for a progress bar
+        int unit = (int) (content.getSize() / 100);
+        long totalRead = 0;
+
        try {
            byte[] buffer = new byte[TO_FILE_BUFFER_SIZE];
            int len = in.read(buffer);
            while (len != -1) {
+                // If there is a worker, check for a cancelation
+                if (worker!=null && worker.isCancelled()) {
+                    break;
+                }
                out.write(buffer, 0, len);
                len = in.read(buffer);
+                totalRead+=len;
+                // If there is a progress bar and this is the source file,
+                // report any progress
+                if(progress!=null && source) {
+                    int totalProgress = (int) (totalRead / unit);
+                    progress.progress(content.getName(), totalProgress);
+                // If it's not the source, just update the file being processed
+                } else if(progress!=null && !source) {
+                    progress.progress(content.getName());
+                }
            }
        } finally {
            out.close();
        }
    }
    
+    public static void writeToFile(Content content, java.io.File outputFile) throws IOException {
+        writeToFile(content, outputFile, null, null, false);
+    }
+    
    /**
     * Helper to ignore the '.' and '..' directories
     */
@ -250,11 +274,21 @@ public final class ContentUtils {
    public static class ExtractFscContentVisitor extends ContentVisitor.Default<Void> {

        java.io.File dest;
+        ProgressHandle progress;
+        SwingWorker worker;
+        boolean source = false;

        /**
         * Make new extractor for a specific destination
         * @param dest The file/folder visited will be extracted as this file
         */
+        public ExtractFscContentVisitor(java.io.File dest, ProgressHandle progress, SwingWorker worker, boolean source) {
+            this.dest = dest;
+            this.progress = progress;
+            this.worker = worker;
+            this.source = source;
+        }
+        
        public ExtractFscContentVisitor(java.io.File dest) {
            this.dest = dest;
        }
@ -263,13 +297,13 @@ public final class ContentUtils {
         * Convenience method to make a new instance for given destination
         * and extract given content 
         */
-        public static void extract(Content cntnt, java.io.File dest) {
-            cntnt.accept(new ExtractFscContentVisitor(dest));
+        public static void extract(Content cntnt, java.io.File dest, ProgressHandle progress, SwingWorker worker) {
+            cntnt.accept(new ExtractFscContentVisitor(dest, progress, worker, true));
        }

        public Void visit(File f) {
            try {
-                ContentUtils.writeToFile(f, dest);
+                ContentUtils.writeToFile(f, dest, progress, worker, source);
            } catch (IOException ex) {
                logger.log(Level.SEVERE,
                        "Trouble extracting file to " + dest.getAbsolutePath(),
@ -292,12 +326,23 @@ public final class ContentUtils {
            DestFileContentVisitor destFileCV = new DestFileContentVisitor();

            try {
+                int numProcessed = 0;
                // recurse on children
                for (Content child : dir.getChildren()) {
                    java.io.File childFile = child.accept(destFileCV);
                    ExtractFscContentVisitor childVisitor = 
-                        new ExtractFscContentVisitor(childFile);
+                        new ExtractFscContentVisitor(childFile, progress, worker, false);
+                    // If this is the source directory of an extract it
+                    // will have a progress and worker, and will keep track
+                    // of the progress bar's progress
+                    if(worker!=null && worker.isCancelled()) {
+                        break;
+                    }
+                    if(progress!=null && source) {
+                        progress.progress(child.getName(), numProcessed);
+                    }
                    child.accept(childVisitor);
+                    numProcessed++;
                }
            } catch (TskException ex) {
                logger.log(Level.SEVERE,
--- a/DataModel/src/org/sleuthkit/autopsy/datamodel/FileNode.java
+++ b/DataModel/src/org/sleuthkit/autopsy/datamodel/FileNode.java
@ -19,7 +19,6 @@
 package org.sleuthkit.autopsy.datamodel;

 import javax.swing.Action;
-import org.openide.nodes.Sheet;
 import org.sleuthkit.datamodel.File;
 import org.sleuthkit.datamodel.TskData;

@ -31,7 +30,6 @@ import org.sleuthkit.datamodel.TskData;
 public class FileNode extends AbstractFsContentNode<File> {

    /**
-     * 
     * @param file underlying Content
     */
    public FileNode(File file) {
@ -45,7 +43,7 @@ public class FileNode extends AbstractFsContentNode<File> {
        if (File.dirFlagToValue(file.getDir_flags()).equals(TskData.TSK_FS_NAME_FLAG_ENUM.TSK_FS_NAME_FLAG_UNALLOC.toString())) {
            this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/file-icon-deleted.png");
        } else {
-            this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/file-icon.png");
+            this.setIconBaseWithExtension(getIconForFileType(file));
        }
    }

@ -69,4 +67,52 @@ public class FileNode extends AbstractFsContentNode<File> {
    public <T> T accept(DisplayableItemNodeVisitor<T> v) {
        return v.visit(this);
    }
+    
+    // Given a file, returns the correct icon for said
+    // file based off it's extension
+    static String getIconForFileType(File file) {
+        // Get the name, extension
+        String name = file.getName();
+        int dotIndex = name.lastIndexOf(".");
+        if (dotIndex == -1) {
+            return "org/sleuthkit/autopsy/images/file-icon.png";
+        }
+        String ext = name.substring(dotIndex).toLowerCase();
+        
+        // Images
+        for(String s:FileTypeExtensions.getImageExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/image-file.png"; }
+        }
+        // Videos
+        for(String s:FileTypeExtensions.getVideoExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/video-file.png"; }
+        }
+        // Audio Files
+        for(String s:FileTypeExtensions.getAudioExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/audio-file.png"; }
+        }
+        // Documents
+        for(String s:FileTypeExtensions.getDocumentExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/doc-file.png"; }
+        }
+        // Executables / System Files
+        for(String s:FileTypeExtensions.getExecutableExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/exe-file.png"; }
+        }
+        // Text Files
+        for(String s:FileTypeExtensions.getTextExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/text-file.png"; }
+        }
+        // Web Files
+        for(String s:FileTypeExtensions.getWebExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/web-file.png"; }
+        }
+        // PDFs
+        for(String s:FileTypeExtensions.getPDFExtensions()) {
+            if(ext.equals(s)) { return "org/sleuthkit/autopsy/images/pdf-file.png"; }
+        }
+        // Else return the default
+        return "org/sleuthkit/autopsy/images/file-icon.png";
+        
+    }
 }
--- a/DataModel/src/org/sleuthkit/autopsy/datamodel/FileTypeExtensions.java
+++ b/DataModel/src/org/sleuthkit/autopsy/datamodel/FileTypeExtensions.java
@ -0,0 +1,46 @@
+package org.sleuthkit.autopsy.datamodel;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Contains Lists of commonly known and used file type extensions
+ * and 'getters' to obtain them.
+ */
+class FileTypeExtensions {
+    private final static List<String> IMAGE_EXTENSIONS = Arrays.asList(".jpg", ".jpeg", ".png", ".psd", ".nef", ".tiff");
+    private final static List<String> VIDEO_EXTENSIONS = Arrays.asList(".aaf", ".3gp", ".asf", ".avi", ".m1v", ".m2v",
+            ".m4v", ".mp4", ".mov", ".mpeg", ".mpg", ".mpe", ".mp4", ".rm", ".wmv", ".mpv", ".flv", ".swf");
+    private final static List<String> AUDIO_EXTENSIONS = Arrays.asList(".aiff", ".aif", ".flac", ".wav", ".m4a", ".ape",
+            ".wma", ".mp2", ".mp1", ".mp3", ".aac", ".mp4", ".m4p", ".m1a", ".m2a", ".m4r", ".mpa", ".m3u", ".mid", ".midi", ".ogg");
+    private final static List<String> DOCUMENT_EXTENSIONS = Arrays.asList(".doc", ".docx", ".odt", ".xls", ".xlsx", ".ppt", ".pptx");
+    private final static List<String> EXECUTABLE_EXTENSIONS = Arrays.asList(".exe", ".msi", ".cmd", ".com", ".bat", ".reg", ".scr", ".dll", ".ini");
+    private final static List<String> TEXT_EXTENSIONS = Arrays.asList(".txt", ".rtf", ".log", ".text", ".xml");
+    private final static List<String> WEB_EXTENSIONS = Arrays.asList(".html", ".htm", ".css", ".js", ".php", ".aspx");
+    private final static List<String> PDF_EXTENSIONS = Arrays.asList(".pdf");
+    
+    static List<String> getImageExtensions() {
+        return IMAGE_EXTENSIONS;
+    }
+    static List<String> getVideoExtensions() {
+        return VIDEO_EXTENSIONS;
+    }
+    static List<String> getAudioExtensions() {
+        return AUDIO_EXTENSIONS;
+    }
+    static List<String> getDocumentExtensions() {
+        return DOCUMENT_EXTENSIONS;
+    }
+    static List<String> getExecutableExtensions() {
+        return EXECUTABLE_EXTENSIONS;
+    }
+    static List<String> getTextExtensions() {
+        return TEXT_EXTENSIONS;
+    }
+    static List<String> getWebExtensions() {
+        return WEB_EXTENSIONS;
+    }
+    static List<String> getPDFExtensions() {
+        return PDF_EXTENSIONS;
+    }
+}
--- a/DataModel/src/org/sleuthkit/autopsy/datamodel/SearchFilters.java
+++ b/DataModel/src/org/sleuthkit/autopsy/datamodel/SearchFilters.java
@ -30,14 +30,9 @@ public class SearchFilters implements AutopsyVisitableItem {
    SleuthkitCase skCase;

    public enum FileSearchFilter implements AutopsyVisitableItem,SearchFilterInterface {
-        TSK_IMAGE_FILTER(0, "TSK_IMAGE_FILTER", "Images", Arrays.asList(".jpg", ".jpeg", ".png", ".psd", ".nef", ".tiff")),
-        TSK_VIDEO_FILTER(1, "TSK_VIDEO_FILTER", "Videos",
-            Arrays.asList(".aaf", ".3gp", ".asf", ".avi", ".m1v", ".m2v", ".m4v", ".mp4",
-            ".mov", ".mpeg", ".mpg", ".mpe", ".mp4", ".rm", ".wmv", ".mpv", ".flv", ".swf")),
-        TSK_AUDIO_FILTER(2, "TSK_AUDIO_FILTER", "Audio", 
-            Arrays.asList(".aiff", ".aif", ".flac", ".wav", ".m4a", ".ape", ".wma", ".mp2",
-            ".mp1", ".mp3", ".aac", ".mp4", ".m4p", ".m1a", ".m2a", ".m4r", ".mpa",
-            ".m3u", ".mid", ".midi", ".ogg")),
+        TSK_IMAGE_FILTER(0, "TSK_IMAGE_FILTER", "Images", FileTypeExtensions.getImageExtensions()),
+        TSK_VIDEO_FILTER(1, "TSK_VIDEO_FILTER", "Videos", FileTypeExtensions.getVideoExtensions()),
+        TSK_AUDIO_FILTER(2, "TSK_AUDIO_FILTER", "Audio", FileTypeExtensions.getAudioExtensions()),
        TSK_DOCUMENT_FILTER(3, "TSK_DOCUMENT_FILTER", "Documents", Arrays.asList(".doc", ".docx", ".pdf", ".xls", ".rtf", ".txt"));

        int id;
--- a/DirectoryTree/nbproject/project.xml
+++ b/DirectoryTree/nbproject/project.xml
@ -6,6 +6,15 @@
            <code-name-base>org.sleuthkit.autopsy.directorytree</code-name-base>
            <suite-component/>
            <module-dependencies>
+                <dependency>
+                    <code-name-base>org.netbeans.api.progress</code-name-base>
+                    <build-prerequisite/>
+                    <compile-dependency/>
+                    <run-dependency>
+                        <release-version>1</release-version>
+                        <specification-version>1.28.1</specification-version>
+                    </run-dependency>
+                </dependency>
                <dependency>
                    <code-name-base>org.netbeans.modules.settings</code-name-base>
                    <build-prerequisite/>
--- a/DirectoryTree/src/org/sleuthkit/autopsy/directorytree/ExtractAction.java
+++ b/DirectoryTree/src/org/sleuthkit/autopsy/directorytree/ExtractAction.java
@ -18,17 +18,24 @@
 */
 package org.sleuthkit.autopsy.directorytree;

-import java.awt.event.ActionEvent;
-import javax.swing.JFileChooser;
-import java.io.File;
 import java.awt.Component;
+import java.awt.event.ActionEvent;
+import java.io.File;
+import java.util.concurrent.CancellationException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 import javax.swing.AbstractAction;
+import javax.swing.JFileChooser;
 import javax.swing.JOptionPane;
+import javax.swing.SwingWorker;
+import org.netbeans.api.progress.ProgressHandle;
+import org.netbeans.api.progress.ProgressHandleFactory;
 import org.openide.nodes.Node;
+import org.openide.util.Cancellable;
 import org.sleuthkit.autopsy.casemodule.Case;
+import org.sleuthkit.autopsy.coreutils.FileUtil;
 import org.sleuthkit.autopsy.datamodel.ContentUtils;
 import org.sleuthkit.autopsy.datamodel.ContentUtils.ExtractFscContentVisitor;
-import org.sleuthkit.autopsy.coreutils.Log;
 import org.sleuthkit.datamodel.Content;
 import org.sleuthkit.datamodel.ContentVisitor;
 import org.sleuthkit.datamodel.Directory;
@ -41,6 +48,7 @@ public final class ExtractAction extends AbstractAction {

    private static final InitializeContentVisitor initializeCV = new InitializeContentVisitor();
    private FsContent fsContent;
+    private Logger logger = Logger.getLogger(ExtractAction.class.getName());

    public ExtractAction(String title, Node contentNode) {
        super(title);
@ -78,8 +86,7 @@ public final class ExtractAction extends AbstractAction {
     */
    @Override
    public void actionPerformed(ActionEvent e) {
-        Log.noteAction(this.getClass());
-
+        // Get file and check that it's okay to overwrite existing file
        JFileChooser fc = new JFileChooser();
        fc.setCurrentDirectory(new File(Case.getCurrentCase().getCaseDirectory()));
        fc.setSelectedFile(new File(this.fsContent.getName()));
@ -88,7 +95,7 @@ public final class ExtractAction extends AbstractAction {
        if (returnValue == JFileChooser.APPROVE_OPTION) {
            File destination = fc.getSelectedFile();

-            // check that it's okay to overwrite existing file
+            // do the check
            if (destination.exists()) {
                int choice = JOptionPane.showConfirmDialog(
                        (Component) e.getSource(),
@ -97,7 +104,7 @@ public final class ExtractAction extends AbstractAction {
                        JOptionPane.OK_CANCEL_OPTION);

                if (choice != JOptionPane.OK_OPTION) {
-                    return;
+                    return; // Just exit the function
                }

                if (!destination.delete()) {
@ -107,12 +114,94 @@ public final class ExtractAction extends AbstractAction {
                }
            }
        
-            ExtractFscContentVisitor.extract(fsContent, destination);
-            if(fsContent.isDir())
+            try {
+                ExtractFileThread extract = new ExtractFileThread();    
+                extract.init(this.fsContent, e, destination);
+                extract.execute();
+            } catch (Exception ex) {
+                logger.log(Level.WARNING, "Unable to start background thread.", ex);
+            }
+        }
+    }
+    
+    private class ExtractFileThread extends SwingWorker<Object,Void> {
+        private Logger logger = Logger.getLogger(ExtractFileThread.class.getName());
+        private ProgressHandle progress;
+        private FsContent fsContent;
+        ActionEvent e;
+        File destination;
+        
+        private void init(FsContent fsContent, ActionEvent e, File destination) {
+            this.fsContent = fsContent;
+            this.e = e;
+            this.destination = destination;
+        }
+
+        @Override
+        protected Object doInBackground() throws Exception {
+            logger.log(Level.INFO, "Starting background processing for file extraction.");
+            
+            // Setup progress bar
+            final String displayName = "Extracting";
+            progress = ProgressHandleFactory.createHandle(displayName, new Cancellable() {
+                @Override
+                public boolean cancel() {
+                    if (progress != null)
+                        progress.setDisplayName(displayName + " (Cancelling...)");
+                    return ExtractAction.ExtractFileThread.this.cancel(true);
+                }
+            });
+
+            // Start the progress bar as indeterminate
+            progress.start();
+            progress.switchToIndeterminate();
+            if(fsContent.isFile()) {
+                // Max file size of 200GB
+                long filesize = fsContent.getSize();
+                int unit = (int) (filesize / 100);
+                progress.switchToDeterminate(100);
+            } else if(fsContent.isDir()) {
+                // If dir base progress off number of children
+                int toProcess = fsContent.getChildren().size();
+                progress.switchToDeterminate(toProcess);
+            }
+
+            // Start extracting the file/directory
+            ExtractFscContentVisitor.extract(fsContent, destination, progress, this);
+            logger.log(Level.INFO, "Done background processing");
+            return null;
+        }
+        
+        @Override
+        protected void done() {
+            try {
+                super.get(); //block and get all exceptions thrown while doInBackground()
+            } catch (CancellationException ex) {
+                logger.log(Level.INFO, "Extraction was canceled.");
+            } catch (InterruptedException ex) {
+                logger.log(Level.INFO, "Extraction was interrupted.");
+            } catch (Exception ex) {
+                logger.log(Level.SEVERE, "Fatal error during file extraction.", ex);
+            } finally {
+                progress.finish();
+                if (!this.isCancelled()) {
+                    logger.log(Level.INFO, "Extracting completed without cancellation.");
+                    // Alert the user extraction is over
+                    if(fsContent.isDir()) {
                        JOptionPane.showMessageDialog((Component) e.getSource(), "Directory extracted.");
-            else if(fsContent.isFile()){
+                    } else if(fsContent.isFile()){
                        JOptionPane.showMessageDialog((Component) e.getSource(), "File extracted.");
                    }
+                } else {
+                    logger.log(Level.INFO, "Attempting to delete file(s).");
+                    if(FileUtil.deleteFileDir(destination)) {
+                        logger.log(Level.INFO, "Finished deletion sucessfully.");
+                    } else {
+                        logger.log(Level.WARNING, "Deletion attempt complete; not all files were sucessfully deleted.");
                    }
                }
+            }
+        }
+    }
+    
 }
--- a/KeywordSearch/ivy.xml
+++ b/KeywordSearch/ivy.xml
@ -16,6 +16,7 @@
        <dependency conf="solr-war->default" org="org.apache.solr" name="solr" rev="3.5.0" transitive="false" /> <!-- the war file -->
        <dependency conf="autopsy->*" org="org.apache.solr" name="solr-solrj" rev="3.5.0"/>
        <dependency conf="autopsy->*" org="commons-lang" name="commons-lang" rev="2.4"/>
+        <dependency conf="autopsy->*" org="org.apache.tika" name="tika-parsers" rev="0.10"/>
        <dependency conf="start-solr->default" org="org.mortbay.jetty" name="start" rev="6.1.26"/>
        <dependency conf="jetty-libs->default" org="org.mortbay.jetty" name="jetty" rev="6.1.26"/>
        <dependency conf="jetty-libs->default" org="org.mortbay.jetty" name="jsp-2.1" rev="6.1.14"/>
--- a/KeywordSearch/nbproject/project.properties
+++ b/KeywordSearch/nbproject/project.properties
@ -1,12 +1,3 @@
-file.reference.commons-codec-1.5.jar=release/modules/ext/commons-codec-1.5.jar
-file.reference.commons-httpclient-3.1.jar=release/modules/ext/commons-httpclient-3.1.jar
-file.reference.commons-io-1.4.jar=release/modules/ext/commons-io-1.4.jar
-file.reference.commons-lang-2.4.jar=release/modules/ext/commons-lang-2.4.jar
-file.reference.jcl-over-slf4j-1.6.1.jar=release/modules/ext/jcl-over-slf4j-1.6.1.jar
-file.reference.slf4j-api-1.6.1.jar=release/modules/ext/slf4j-api-1.6.1.jar
-file.reference.solr-solrj-3.5.0.jar=release/modules/ext/solr-solrj-3.5.0.jar
 javac.source=1.6
 javac.compilerargs=-Xlint -Xlint:-serial
-javadoc.reference.solr-solrj-3.5.0.jar=release/modules/ext/solr-solrj-3.5.0-javadoc.jar
-source.reference.solr-solrj-3.5.0.jar=release/modules/ext/solr-solrj-3.5.0-sources.jar
 spec.version.base=0.0
--- a/KeywordSearch/nbproject/project.xml
+++ b/KeywordSearch/nbproject/project.xml
@ -148,34 +148,138 @@
            <public-packages>
                <package>org.sleuthkit.autopsy.keywordsearch</package>
            </public-packages>
-            <class-path-extension>
-                <runtime-relative-path>ext/slf4j-api-1.6.1.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/slf4j-api-1.6.1.jar</binary-origin>
-            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-io-1.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-io-1.4.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/commons-httpclient-3.1.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/commons-httpclient-3.1.jar</binary-origin>
+                <runtime-relative-path>ext/metadata-extractor-2.4.0-beta-1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/metadata-extractor-2.4.0-beta-1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
-                <runtime-relative-path>ext/commons-codec-1.5.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/commons-codec-1.5.jar</binary-origin>
+                <runtime-relative-path>ext/jdom-1.0.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/jdom-1.0.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/poi-3.8-beta4.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/poi-3.8-beta4.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/commons-logging-1.1.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/commons-logging-1.1.1.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/tagsoup-1.2.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/tagsoup-1.2.1.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-lang-2.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-lang-2.4.jar</binary-origin>
            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/poi-ooxml-3.8-beta4.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/poi-ooxml-3.8-beta4.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/tika-parsers-0.10.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/tika-parsers-0.10.jar</binary-origin>
+            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/jcl-over-slf4j-1.6.1.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/jcl-over-slf4j-1.6.1.jar</binary-origin>
            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/boilerpipe-1.1.0.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/boilerpipe-1.1.0.jar</binary-origin>
+            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/solr-solrj-3.5.0.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/solr-solrj-3.5.0.jar</binary-origin>
            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/fontbox-1.6.0.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/fontbox-1.6.0.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/slf4j-api-1.6.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/slf4j-api-1.6.1.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/bcmail-jdk15-1.45.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/bcmail-jdk15-1.45.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/wstx-asl-3.2.7.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/wstx-asl-3.2.7.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/netcdf-4.2-min.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/netcdf-4.2-min.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/xmlbeans-2.3.0.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/xmlbeans-2.3.0.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/apache-mime4j-core-0.7.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/apache-mime4j-core-0.7.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/commons-httpclient-3.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/commons-httpclient-3.1.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/apache-mime4j-dom-0.7.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/apache-mime4j-dom-0.7.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/poi-scratchpad-3.8-beta4.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/poi-scratchpad-3.8-beta4.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/poi-ooxml-schemas-3.8-beta4.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/poi-ooxml-schemas-3.8-beta4.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/tika-core-0.10.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/tika-core-0.10.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/dom4j-1.6.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/dom4j-1.6.1.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/commons-codec-1.5.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/commons-codec-1.5.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/rome-0.9.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/rome-0.9.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/bcprov-jdk15-1.45.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/bcprov-jdk15-1.45.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/jempbox-1.6.0.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/jempbox-1.6.0.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/pdfbox-1.6.0.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/pdfbox-1.6.0.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/commons-compress-1.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/commons-compress-1.1.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/asm-3.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/asm-3.1.jar</binary-origin>
+            </class-path-extension>
+            <class-path-extension>
+                <runtime-relative-path>ext/geronimo-stax-api_1.0_spec-1.0.1.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/geronimo-stax-api_1.0_spec-1.0.1.jar</binary-origin>
+            </class-path-extension>
        </data>
    </configuration>
 </project>
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileChunk.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileChunk.java
@ -0,0 +1,66 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2012 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.sleuthkit.autopsy.keywordsearch;
+
+import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
+
+/**
+ * Represents each string chunk to be indexed, a derivative of AbstractFileExtract file
+ */
+class AbstractFileChunk {
+    private int chunkID;
+    private AbstractFileExtract parent;
+
+    AbstractFileChunk(AbstractFileExtract parent, int chunkID) {
+        this.parent = parent;
+        this.chunkID = chunkID;
+    }
+
+    public AbstractFileExtract getParent() {
+        return parent;
+    }
+
+    public int getChunkId() {
+        return chunkID;
+    }
+
+    /**
+     * return String representation of the absolute id (parent and child)
+     *
+     * @return
+     */
+    public String getIdString() {
+        return Server.getChunkIdString(this.parent.getSourceFile().getId(), this.chunkID);
+    }
+
+    public boolean index(Ingester ingester, byte[] content, long contentSize, ByteContentStream.Encoding encoding) throws IngesterException {
+        boolean success = true;
+        ByteContentStream bcs = new ByteContentStream(content, contentSize, parent.getSourceFile(), encoding);
+        try {
+            ingester.ingest(this, bcs, content.length);
+            //logger.log(Level.INFO, "Ingesting string chunk: " + this.getName() + ": " + chunkID);
+        } catch (Exception ingEx) {
+            success = false;
+            throw new IngesterException("Problem ingesting file string chunk: " + parent.getSourceFile().getId() + ", chunk: " + chunkID, ingEx);
+        }
+        return success;
+    }
+    
+}
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileExtract.java
@ -0,0 +1,48 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2012 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.sleuthkit.autopsy.keywordsearch;
+
+import org.sleuthkit.datamodel.AbstractFile;
+
+/**
+ * Common methods for utilities that extract text and content and divide into
+ * chunks
+ */
+interface AbstractFileExtract {
+
+    /**
+     * Get number of chunks resulted from extracting this AbstractFile
+     * @return the number of chunks produced
+     */
+    int getNumChunks();
+
+    /**
+     * Get the source file associated with this extraction
+     * @return the source AbstractFile
+     */
+    AbstractFile getSourceFile();
+
+    /**
+     * Index the Abstract File
+     * @return true if indexed successfully, false otherwise
+     * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException 
+     */
+    boolean index() throws Ingester.IngesterException;
+}
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringContentStream.java
@ -24,8 +24,7 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.logging.Logger;
 import org.apache.solr.common.util.ContentStream;
-import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream;
-import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream.Encoding;
+import org.sleuthkit.autopsy.keywordsearch.ByteContentStream.Encoding;
 import org.sleuthkit.datamodel.AbstractContent;
 import org.sleuthkit.datamodel.AbstractFile;

@ -43,7 +42,7 @@ public class AbstractFileStringContentStream implements ContentStream {
    private AbstractFileStringStream stream;
    private static Logger logger = Logger.getLogger(AbstractFileStringContentStream.class.getName());

-    public AbstractFileStringContentStream(AbstractFile content, Encoding encoding) {
+    public AbstractFileStringContentStream(AbstractFile content, ByteContentStream.Encoding encoding) {
        this.content = content;
        this.encoding = encoding;
        this.stream = new AbstractFileStringStream(content, encoding);
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringExtract.java
@ -0,0 +1,128 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2011 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.sleuthkit.autopsy.keywordsearch;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
+import org.sleuthkit.datamodel.AbstractFile;
+
+
+
+
+
+/**
+ * Takes an AbstractFile, extract strings, converts into chunks (associated with the original
+ * source file) up to 1MB then and indexes chunks as text with Solr
+ */
+class AbstractFileStringExtract implements AbstractFileExtract {
+
+    private KeywordSearchIngestService service;
+    private Ingester ingester;
+    private int numChunks;
+    private static final Logger logger = Logger.getLogger(AbstractFileStringExtract.class.getName());
+    static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
+    private AbstractFile aFile;
+    //single static buffer for all extractions.  Safe, indexing can only happen in one thread
+    private static final byte[] STRING_CHUNK_BUF = new byte[(int) MAX_STRING_CHUNK_SIZE];
+    private static final int BOM_LEN = 3;
+
+    static {
+        //prepend UTF-8 BOM to start of the buffer
+        STRING_CHUNK_BUF[0] = (byte) 0xEF;
+        STRING_CHUNK_BUF[1] = (byte) 0xBB;
+        STRING_CHUNK_BUF[2] = (byte) 0xBF;
+    }
+
+    public AbstractFileStringExtract(AbstractFile aFile) {
+        this.aFile = aFile;
+        numChunks = 0; //unknown until indexing is done
+        this.service = KeywordSearchIngestService.getDefault();
+        Server solrServer = KeywordSearch.getServer();
+        ingester = solrServer.getIngester();
+    }
+
+    @Override
+    public int getNumChunks() {
+        return this.numChunks;
+    }
+
+    @Override
+    public AbstractFile getSourceFile() {
+        return aFile;
+    }
+
+    @Override
+    public boolean index() throws IngesterException {
+        boolean success = false;
+
+        //construct stream that extracts text as we read it
+        final InputStream stringStream = new AbstractFileStringStream(aFile, ByteContentStream.Encoding.UTF8);
+
+        try {
+            success = true;
+            //break input stream into chunks 
+            
+            long readSize = 0;
+            while ((readSize = stringStream.read(STRING_CHUNK_BUF, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) {
+                //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks+1));
+                //debug.write(STRING_CHUNK_BUF, 0, (int)readSize);
+
+                AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
+
+                try {
+                    chunk.index(ingester, STRING_CHUNK_BUF, readSize + BOM_LEN, ByteContentStream.Encoding.UTF8);
+                    ++this.numChunks;
+                } catch (IngesterException ingEx) {
+                    success = false;
+                    logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ingEx);
+                    throw ingEx; //need to rethrow/return to signal error and move on
+                }
+
+                //check if need invoke commit/search between chunks
+                //not to delay commit if timer has gone off
+                service.checkRunCommitSearch();
+
+                //debug.close();    
+            }
+
+
+            //after all chunks, ingest the parent file without content itself, and store numChunks
+            ingester.ingest(this);
+
+        } catch (IOException ex) {
+            logger.log(Level.WARNING, "Unable to read input stream to divide and send to Solr, file: " + aFile.getName(), ex);
+            success = false;
+        } finally {
+            try {
+                stringStream.close();
+            } catch (IOException ex) {
+                logger.log(Level.WARNING, "Error closing input stream stream, file: " + aFile.getName(), ex);
+            }
+        }
+
+
+        return success;
+    }
+    
+
+}
+
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringStream.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileStringStream.java
@ -16,13 +16,14 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.sleuthkit.autopsy.datamodel;
+package org.sleuthkit.autopsy.keywordsearch;

 import java.io.IOException;
 import java.io.InputStream;
 import java.io.UnsupportedEncodingException;
 import java.util.logging.Level;
 import java.util.logging.Logger;
+import org.sleuthkit.autopsy.datamodel.DataConversion;
 import org.sleuthkit.datamodel.AbstractFile;
 import org.sleuthkit.datamodel.TskException;

@ -33,16 +34,6 @@ import org.sleuthkit.datamodel.TskException;
 */
 public class AbstractFileStringStream extends InputStream {

-    public static enum Encoding {
-
-        UTF8 {
-
-            @Override
-            public String toString() {
-                return "UTF-8";
-            }
-        },
-    };
    
    //args
    private AbstractFile content;
@ -73,7 +64,7 @@ public class AbstractFileStringStream extends InputStream {
     * @param encoding target encoding, currently UTF-8
     * @param preserveOnBuffBoundary whether to preserve or split string on a buffer boundary. If false, will pack into read buffer up to max. possible, potentially splitting a string. If false, the string will be preserved for next read.
     */
-    public AbstractFileStringStream(AbstractFile content, Encoding encoding, boolean preserveOnBuffBoundary) {
+    public AbstractFileStringStream(AbstractFile content, ByteContentStream.Encoding encoding, boolean preserveOnBuffBoundary) {
        this.content = content;
        this.encoding = encoding.toString();
        //this.preserveOnBuffBoundary = preserveOnBuffBoundary;
@ -87,7 +78,7 @@ public class AbstractFileStringStream extends InputStream {
     * @param content to extract strings from
     * @param encoding target encoding, currently UTF-8
     */
-    public AbstractFileStringStream(AbstractFile content, Encoding encoding) {
+    public AbstractFileStringStream(AbstractFile content, ByteContentStream.Encoding encoding) {
        this(content, encoding, false);
    }

--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/AbstractFileTikaTextExtract.java
@ -0,0 +1,253 @@
+/*
+ * Autopsy Forensic Browser
+ *
+ * Copyright 2012 Basis Technology Corp.
+ * Contact: carrier <at> sleuthkit <dot> org
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.sleuthkit.autopsy.keywordsearch;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.sleuthkit.autopsy.ingest.IngestServiceAbstractFile;
+import org.sleuthkit.datamodel.AbstractFile;
+import org.sleuthkit.datamodel.ReadContentInputStream;
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.Metadata;
+import org.sleuthkit.autopsy.keywordsearch.ByteContentStream.Encoding;
+import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
+
+/**
+ * Extractor of text from TIKA supported AbstractFile content. Extracted text is
+ * divided into chunks and indexed with Solr.
+ * Protects against Tika parser hangs (for unexpected/corrupt content) using a timeout mechanism.
+ * If Tika extraction succeeds, chunks are indexed with Solr.
+ *
+ * This Tika extraction/chunking utility is useful for large files of Tika parsers-supported content type.
+ *
+ */
+public class AbstractFileTikaTextExtract implements AbstractFileExtract {
+
+    private static final Logger logger = Logger.getLogger(IngestServiceAbstractFile.class.getName());
+    private static final Encoding ENCODING = Encoding.UTF8;
+    static final Charset charset = Charset.forName(ENCODING.toString());
+    static final int MAX_EXTR_TEXT_CHARS = 512 * 1024;
+    private static final int SINGLE_READ_CHARS = 1024;
+    private static final int EXTRA_CHARS = 128; //for whitespace
+    private static final char[] TEXT_CHUNK_BUF = new char[MAX_EXTR_TEXT_CHARS];
+    private static final Tika tika = new Tika();
+    private KeywordSearchIngestService service;
+    private Ingester ingester;
+    private AbstractFile sourceFile;
+    private int numChunks = 0;
+    private static final String UTF16BOM = "\uFEFF";
+    private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
+
+    AbstractFileTikaTextExtract(AbstractFile sourceFile) {
+        this.sourceFile = sourceFile;
+        this.service = KeywordSearchIngestService.getDefault();
+        Server solrServer = KeywordSearch.getServer();
+        ingester = solrServer.getIngester();
+        //tika.setMaxStringLength(MAX_EXTR_TEXT_CHARS); //for getting back string only
+    }
+
+    @Override
+    public int getNumChunks() {
+        return numChunks;
+    }
+
+    @Override
+    public AbstractFile getSourceFile() {
+        return sourceFile;
+    }
+
+    @Override
+    public boolean index() throws Ingester.IngesterException {
+        boolean success = false;
+        Reader reader = null;
+
+
+        final InputStream stream = new ReadContentInputStream(sourceFile);
+        try {
+            Metadata meta = new Metadata();
+            ParseRequestTask parseTask = new ParseRequestTask(tika, stream, meta, sourceFile);
+            final Future<?> future = tikaParseExecutor.submit(parseTask);
+            try {
+                future.get(Ingester.getTimeout(sourceFile.getSize()), TimeUnit.SECONDS);
+            } catch (TimeoutException te) {
+                final String msg = "Tika parse timeout for content: " + sourceFile.getId() + ", " + sourceFile.getName();
+                logger.log(Level.WARNING, msg);
+                throw new IngesterException(msg);
+            }
+            
+            reader = parseTask.getReader();
+            if (reader == null) {
+                //likely due to exception in parse()
+                logger.log(Level.WARNING, "No reader available from Tika parse");
+                return false;
+            }
+
+            success = true;
+            long readSize;
+            long totalRead = 0;
+            boolean eof = false;
+            //we read max 1024 chars at time, this seems to max what this Reader would return
+            while (!eof && (readSize = reader.read(TEXT_CHUNK_BUF, 0, SINGLE_READ_CHARS)) != -1) {
+                totalRead += readSize;
+
+                //consume more bytes to fill entire chunk (leave EXTRA_CHARS to end the word)
+                while ((totalRead < MAX_EXTR_TEXT_CHARS - SINGLE_READ_CHARS - EXTRA_CHARS)
+                        && (readSize = reader.read(TEXT_CHUNK_BUF, (int) totalRead, SINGLE_READ_CHARS)) != -1) {
+                    totalRead += readSize;
+                }
+                if (readSize == -1) {
+                    //this is the last chunk
+                    eof = true;
+                } else {
+                    //try to read until whitespace to not break words
+                    while ((totalRead < MAX_EXTR_TEXT_CHARS - 1)
+                            && !Character.isWhitespace(TEXT_CHUNK_BUF[(int) totalRead - 1])
+                            && (readSize = reader.read(TEXT_CHUNK_BUF, (int) totalRead, 1)) != -1) {
+                        totalRead += readSize;
+                    }
+                    if (readSize == -1) {
+                        //this is the last chunk
+                        eof = true;
+                    }
+
+
+                }
+
+                //logger.log(Level.INFO, "TOTAL READ SIZE: " + totalRead + " file: " + sourceFile.getName());
+                //encode to bytes to index as byte stream
+                String extracted;
+                //add BOM and trim the 0 bytes
+                //set initial size to chars read + bom + metadata (roughly) - try to prevent from resizing
+                StringBuilder sb = new StringBuilder((int) totalRead + 1000);
+                //inject BOM here (saves byte buffer realloc later), will be converted to specific encoding BOM
+                sb.append(UTF16BOM);
+                if (totalRead < MAX_EXTR_TEXT_CHARS) {
+                    sb.append(TEXT_CHUNK_BUF, 0, (int) totalRead);
+                } else {;
+                    sb.append(TEXT_CHUNK_BUF);
+                }
+
+                //reset for next chunk
+                totalRead = 0;
+
+                //append meta data if last chunk
+                if (eof) {
+                    //sort meta data keys
+                    List<String> sortedKeyList = Arrays.asList(meta.names());
+                    Collections.sort(sortedKeyList);
+                    sb.append("\n\n------------------------------METADATA------------------------------\n\n");
+                    for (String key : sortedKeyList) {
+                        String value = meta.get(key);
+                        sb.append(key).append(": ").append(value).append("\n");
+                    }
+                }
+
+                extracted = sb.toString();
+
+                //converts BOM automatically to charSet encoding
+                byte[] encodedBytes = extracted.getBytes(charset);
+                AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
+                try {
+                    chunk.index(ingester, encodedBytes, encodedBytes.length, ENCODING);
+                    ++this.numChunks;
+                } catch (Ingester.IngesterException ingEx) {
+                    success = false;
+                    logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '"
+                            + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);
+                    throw ingEx; //need to rethrow/return to signal error and move on
+                }
+
+                //check if need invoke commit/search between chunks
+                //not to delay commit if timer has gone off
+                service.checkRunCommitSearch();
+            }
+        } catch (IOException ex) {
+            logger.log(Level.WARNING, "Unable to read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex);
+            success = false;
+        } catch (Exception ex) {
+            logger.log(Level.WARNING, "Unexpected error, can't read content stream from " + sourceFile.getId() + ": " + sourceFile.getName(), ex);
+            success = false;
+        } finally {
+            try {
+                stream.close();
+            } catch (IOException ex) {
+                logger.log(Level.WARNING, "Unable to close content stream from " + sourceFile.getId(), ex);
+            }
+            try {
+                if (reader != null) {
+                    reader.close();
+                }
+            } catch (IOException ex) {
+                logger.log(Level.WARNING, "Unable to close content reader from " + sourceFile.getId(), ex);
+            }
+        }
+
+        //after all chunks, ingest the parent file without content itself, and store numChunks
+        ingester.ingest(this);
+
+        return success;
+    }
+
+    /**
+     * Runnable and timeable task that calls tika to parse the content using streaming
+     */
+    private static class ParseRequestTask implements Runnable {
+
+        //in
+        private Tika tika;
+        private InputStream stream;
+        private Metadata meta;
+        private AbstractFile sourceFile;
+        //out
+        private Reader reader;
+
+        ParseRequestTask(Tika tika, InputStream stream, Metadata meta, AbstractFile sourceFile) {
+            this.tika = tika;
+            this.stream = stream;
+            this.meta = meta;
+            this.sourceFile = sourceFile;
+        }
+
+        @Override
+        public void run() {
+            try {
+                reader = tika.parse(stream, meta);
+            } catch (IOException ex) {
+                logger.log(Level.WARNING, "Unable to Tika parse the content" + sourceFile.getId() + ": " + sourceFile.getName(), ex);
+                reader = null;
+            }
+        }
+
+        public Reader getReader() {
+            return reader;
+        }
+    }
+}
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ByteContentStream.java
@ -25,15 +25,32 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 import java.util.logging.Logger;
 import org.apache.solr.common.util.ContentStream;
-import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream.Encoding;
 import org.sleuthkit.datamodel.AbstractContent;
-import org.sleuthkit.datamodel.FsContent;

 /**
 * Stream of bytes representing string with specified encoding
 * to feed into Solr as ContentStream
 */
 public class ByteContentStream implements ContentStream {   
+    
+    public static enum Encoding {
+
+        UTF8 {
+
+            @Override
+            public String toString() {
+                return "UTF-8";
+            }
+        },
+        UTF16 {
+
+            @Override
+            public String toString() {
+                return "UTF-16";
+            }
+        },
+    };
+    
    //input
    private byte[] content; //extracted subcontent
    private long contentSize;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/ExtractedContentViewer.java
@ -158,9 +158,9 @@ public class ExtractedContentViewer implements DataContentViewer {
                @Override
                public String getMarkup() {
                    try {
-                        curContent = StringEscapeUtils.escapeHtml(getSolrContent(selectedNode, currentPage, hasChunks));
-                        curContent = "<pre>" + curContent.trim() + "</pre>";
-                        return curContent;
+                        curContent = getSolrContent(selectedNode, currentPage, hasChunks);
+                        String curContentTrimmed = "<pre>" + curContent.trim() + "</pre>";
+                        return curContentTrimmed;
                    } catch (SolrServerException ex) {
                        logger.log(Level.WARNING, "Couldn't get extracted content.", ex);
                        return "";
@ -418,7 +418,10 @@ public class ExtractedContentViewer implements DataContentViewer {

        //not cached
        try {
-            curContent = solrServer.getSolrContent(contentObj, chunkId);
+            curContent = StringEscapeUtils.escapeHtml(solrServer.getSolrContent(contentObj, chunkId)).trim();
+            StringBuilder sb = new StringBuilder(curContent.length() + 20);
+            sb.append("<pre>").append(curContent).append("</pre>");
+            curContent = sb.toString();
            curContentId = contentId;
            curContentChunk = chunkId;
        } catch (NoOpenCoreException ex) {
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileExtract.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/FileExtract.java
@ -1,170 +0,0 @@
-/*
- * Autopsy Forensic Browser
- *
- * Copyright 2011 Basis Technology Corp.
- * Contact: carrier <at> sleuthkit <dot> org
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.sleuthkit.autopsy.keywordsearch;
-
-import java.io.IOException;
-import java.util.logging.Level;
-import java.util.logging.Logger;
-import org.sleuthkit.autopsy.datamodel.AbstractFileStringStream;
-import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
-import org.sleuthkit.datamodel.AbstractFile;
-
-
-/**
- * Utility to extract strings and index a file with string content as chunks
- * associated with the original parent file
- */
-class FileExtract {
-    
-    KeywordSearchIngestService service;
-    private int numChunks;
-    private static final Logger logger = Logger.getLogger(FileExtract.class.getName());
-    static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
-    private AbstractFile sourceFile;
-    
-    //single static buffer for all extractions.  Safe, indexing can only happen in one thread
-    private static final byte[] STRING_CHUNK_BUF = new byte[(int) MAX_STRING_CHUNK_SIZE];
-    private static final int BOM_LEN = 3;
-    static {
-        //prepend UTF-8 BOM to start of the buffer
-            STRING_CHUNK_BUF[0] = (byte)0xEF;
-            STRING_CHUNK_BUF[1] = (byte)0xBB;
-            STRING_CHUNK_BUF[2] = (byte)0xBF;
-    }
-    
-    public FileExtract(KeywordSearchIngestService service, AbstractFile sourceFile) {
-        this.service = service;
-        this.sourceFile = sourceFile;
-        numChunks = 0; //unknown until indexing is done
-    }
-    
-    public int getNumChunks() {
-        return this.numChunks;
-    }
-    
-    public AbstractFile getSourceFile() {
-        return sourceFile;
-    }
-    
-    
-    public boolean index(Ingester ingester) throws IngesterException {
-        boolean success = false;
-
-        AbstractFileStringStream stringStream = null;
-        try {
-            success = true;
-            //break string into chunks 
-            //Note: could use DataConversion.toString() since we are operating on fixed chunks
-            //but FsContentStringStream handles string boundary case better
-            stringStream = new AbstractFileStringStream(sourceFile, AbstractFileStringStream.Encoding.UTF8);
-            long readSize = 0;
-            
-            while ((readSize = stringStream.read(STRING_CHUNK_BUF, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) {
-                //FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks+1));
-                //debug.write(STRING_CHUNK_BUF, 0, (int)readSize);
-                
-                FileExtractedChild chunk = new FileExtractedChild(this, this.numChunks + 1);
-                
-                try {
-                    chunk.index(ingester, STRING_CHUNK_BUF, readSize + BOM_LEN);
-                    ++this.numChunks;
-                } catch (IngesterException ingEx) {
-                    success = false;
-                    logger.log(Level.WARNING, "Ingester had a problem with extracted strings from file '" + sourceFile.getName() + "' (id: " + sourceFile.getId() + ").", ingEx);   
-                    throw ingEx; //need to rethrow/return to signal error and move on
-                } 
-                
-                //check if need invoke commit/search between chunks
-                //not to delay commit if timer has gone off
-                service.checkRunCommitSearch();
-                
-                //debug.close();    
-            }
-            
-            
-            //after all chunks, ingest the parent file without content itself, and store numChunks
-            ingester.ingest(this);
-
-        } catch (IOException ex) {
-            logger.log(Level.WARNING, "Unable to read string stream and send to Solr, file: " + sourceFile.getName(), ex);
-            success = false;
-        } finally {
-            if (stringStream != null) {
-                try {
-                    stringStream.close();
-                } catch (IOException ex) {
-                    logger.log(Level.WARNING, "Error closing string stream, file: " + sourceFile.getName(), ex);
-                }
-            }
-        }
-        
-        
-        return success;
-    }
-}
-/**
- * Represents each string chunk to be indexed, a child of FileExtracted file
- */
-class FileExtractedChild {
-    
-    private int chunkID;
-    private FileExtract parent;
-    
-    FileExtractedChild(FileExtract parent, int chunkID) {
-        this.parent = parent;
-        this.chunkID = chunkID;
-    }
-    
-    public FileExtract getParentFile() {
-        return parent;
-    }
-    
-    public int getChunkId() {
-        return chunkID;
-    }
-    
-    /**
-     * return String representation of the absolute id (parent and child)
-     * @return 
-     */
-    public String getIdString() {
-        return getFileExtractChildId(this.parent.getSourceFile().getId(), this.chunkID);
-    }
-    
-    
-    public boolean index(Ingester ingester, byte[] content, long contentSize) throws IngesterException {
-        boolean success = true;
-        ByteContentStream bcs = new ByteContentStream(content, contentSize, parent.getSourceFile(), AbstractFileStringStream.Encoding.UTF8);
-        try {
-            ingester.ingest(this, bcs);
-            //logger.log(Level.INFO, "Ingesting string chunk: " + this.getName() + ": " + chunkID);
-            
-        } catch (Exception ingEx) {
-            success = false;
-            throw new IngesterException("Problem ingesting file string chunk: " + parent.getSourceFile().getId() + ", chunk: " + chunkID, ingEx);
-        }
-        return success;
-    }
-    
-    public static String getFileExtractChildId(long parentID, int childID) {
-        return Long.toString(parentID) + Server.ID_CHUNK_SEP + Integer.toString(childID);
-    }
-}
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Ingester.java
@ -69,10 +69,22 @@ public class Ingester {
    // supported extensions list from http://www.lucidimagination.com/devzone/technical-articles/content-extraction-tika
    static final String[] ingestibleExtensions = {"tar", "jar", "zip", "gzip", "bzip2",
        "gz", "tgz", "odf", "doc", "xls", "ppt", "rtf", "pdf", "html", "htm", "xhtml", "txt", "log", "manifest",
-        "bmp", "gif", "png", "jpeg", "tiff", "mp3", "aiff", "au", "midi", "wav",
+        "bmp", "gif", "png", "jpeg", "jpg", "tiff", "mp3", "aiff", "au", "midi", "wav",
        "pst", "xml", "class", "dwg", "eml", "emlx", "mbox", "mht"};


+    private static Ingester instance;
+    
+    private Ingester() {
+        
+    }
+    
+    public static synchronized Ingester getDefault() {
+        if (instance == null) {
+            instance = new Ingester();
+        }
+        return instance;
+    }

    @Override
    @SuppressWarnings("FinalizeDeclaration")
@ -99,16 +111,16 @@ public class Ingester {
    }

    /**
-     * Sends a FileExtract to Solr to have its content extracted and added to the
+     * Sends a AbstractFileExtract to Solr to have its content extracted and added to the
     * index. commit() should be called once you're done ingesting files.
     * FileExtract represents a parent of extracted file with actual content.  
-     * The parent itself has no content, only meta data and is used to associate the extracted FileExtractedChild
+     * The parent itself has no content, only meta data and is used to associate the extracted AbstractFileChunk
     * 
-     * @param fe FileExtract to ingest
+     * @param fe AbstractFileExtract to ingest
     * @throws IngesterException if there was an error processing a specific
     * file, but the Solr server is probably fine.
     */
-    void ingest(FileExtract fe) throws IngesterException {
+    void ingest(AbstractFileExtract fe) throws IngesterException {
        Map<String, String> params = getContentFields(fe.getSourceFile());

        params.put(Server.Schema.NUM_CHUNKS.toString(), Integer.toString(fe.getNumChunks()));
@ -117,23 +129,24 @@ public class Ingester {
    }

    /**
-     * Sends a FileExtractedChild to Solr and its extracted content stream to be added to the
+     * Sends a AbstractFileChunk to Solr and its extracted content stream to be added to the
     * index. commit() should be called once you're done ingesting files.
-     * FileExtractedChild represents a file chunk and its chunk content.
+     * AbstractFileChunk represents a file chunk and its chunk content.
     * 
-     * @param fec FileExtractedChild to ingest
+     * @param fec AbstractFileChunk to ingest
+     * @param size approx. size of the stream in bytes, used for timeout estimation 
     * @throws IngesterException if there was an error processing a specific
     * file, but the Solr server is probably fine.
     */
-    void ingest(FileExtractedChild fec, ByteContentStream bcs) throws IngesterException {
+    void ingest(AbstractFileChunk fec, ByteContentStream bcs, int size) throws IngesterException {
        AbstractContent sourceContent = bcs.getSourceContent();
        Map<String, String> params = getContentFields(sourceContent);

        //overwrite id with the chunk id
        params.put(Server.Schema.ID.toString(), 
-        FileExtractedChild.getFileExtractChildId(sourceContent.getId(), fec.getChunkId()));
+        Server.getChunkIdString(sourceContent.getId(), fec.getChunkId()));
    
-        ingest(bcs, params, FileExtract.MAX_STRING_CHUNK_SIZE);
+        ingest(bcs, params, size);
    }

    /**
@ -259,7 +272,7 @@ public class Ingester {
     * @param size size of the content
     * @return time in seconds to use a timeout
     */
-    private static int getTimeout(long size) {
+    static int getTimeout(long size) {
        if (size < 1024 * 1024L) //1MB
        {
            return 60;
@ -448,8 +461,9 @@ public class Ingester {
     */
    static boolean isIngestible(AbstractFile aFile) {
        TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
-        if (! aType.equals(TSK_DB_FILES_TYPE_ENUM.FS) )
+        if (! aType.equals(TSK_DB_FILES_TYPE_ENUM.FS) ) {
                return false;
+        }
        
        FsContent fsContent = (FsContent) aFile;
        
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/KeywordSearchIngestService.java
@ -20,6 +20,7 @@ package org.sleuthkit.autopsy.keywordsearch;

 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@ -40,7 +41,6 @@ import org.netbeans.api.progress.ProgressHandleFactory;
 import org.openide.util.Cancellable;
 import org.openide.util.Exceptions;
 import org.sleuthkit.autopsy.casemodule.Case;
-import org.sleuthkit.autopsy.ingest.IngestManager;
 import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
 import org.sleuthkit.autopsy.ingest.IngestMessage;
 import org.sleuthkit.autopsy.ingest.IngestMessage.MessageType;
@ -95,7 +95,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
    private final String hashDBServiceName = "Hash Lookup"; //NOTE this needs to match the HashDB service getName()
    private SleuthkitCase caseHandle = null;
    private boolean skipKnown = true;
-    boolean initialized = false;
+    private boolean initialized = false;

    private enum IngestStatus {

@ -200,6 +200,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
            managerProxy.postMessage(IngestMessage.createMessage(++messageID, MessageType.INFO, this, "Completed"));
        }

+
        //postSummary();
    }

@ -224,6 +225,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
        runSearcher = false;
        finalSearcherDone = true;

+
        //commit uncommited files, don't search again
        commit();

@ -498,16 +500,27 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi

        private final Logger logger = Logger.getLogger(Indexer.class.getName());

-        private boolean extractAndIngest(AbstractFile aFile) {
-            boolean indexed = false;
-            final FileExtract fe = new FileExtract(KeywordSearchIngestService.this, aFile);
-            try {
-                indexed = fe.index(ingester);
-            } catch (IngesterException ex) {
-                logger.log(Level.WARNING, "Error extracting strings and indexing file: " + aFile.getName(), ex);
-                indexed = false;
+        /**
+         * Extract strings or text with Tika (by streaming) from the file Divide
+         * the file into chunks and index the chunks
+         *
+         * @param aFile file to extract strings from, divide into chunks and
+         * index
+         * @param stringsOnly true if use stinrg extraction, false if use Tika
+         * text extractor
+         * @return true if the file was indexed, false otherwise
+         */
+        private boolean extractIndex(AbstractFile aFile, boolean stringsOnly) throws IngesterException {
+            AbstractFileExtract fileExtract;
+
+            if (stringsOnly) {
+                fileExtract = new AbstractFileStringExtract(aFile);
+            } else {
+                fileExtract = new AbstractFileTikaTextExtract(aFile);
            }
-            return indexed;
+
+            //divide into chunks and index
+            return fileExtract.index();
        }

        private void indexFile(AbstractFile aFile, boolean indexContent) {
@ -520,9 +533,10 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
                fsContent = (FsContent) aFile;
            }

-            //if alloc fs file and not index content, or a dir, index meta data only
+            final long size = aFile.getSize();
+            //if alloc fs file and not to index content, or a dir, or 0 content, index meta data only
            if (fsContent != null
-                    && (indexContent == false || fsContent.isDir())) {
+                    && (indexContent == false || fsContent.isDir() || size == 0)) {
                try {
                    ingester.ingest(fsContent, false); //meta-data only
                    ingestStatus.put(aFile.getId(), IngestStatus.INGESTED_META);
@ -536,28 +550,27 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi

            boolean ingestibleFile = Ingester.isIngestible(aFile);

-            final long size = aFile.getSize();
-            //if fs file, limit size of entire file, do not limit strings
-            if (fsContent != null && (size == 0 || (ingestibleFile && size > MAX_INDEX_SIZE))) {
-                //if fs file, index meta only, otherwise if unalloc, skip
-                try {
-                    ingester.ingest(fsContent, false); //meta-data only
-                    ingestStatus.put(aFile.getId(), IngestStatus.INGESTED_META);
-                } catch (IngesterException ex) {
-                    ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
-                    logger.log(Level.WARNING, "Unable to index meta-data for fsContent: " + fsContent.getId(), ex);
-                }
-
-                return;
-            }
-
            if (fsContent != null && ingestibleFile == true) {
                //we know it's an allocated fs file (FsContent) with supported content
+                //extract text with Tika, divide into chunks and index with Solr
                try {
                    //logger.log(Level.INFO, "indexing: " + fsContent.getName());
-                    ingester.ingest(fsContent, true);
-                    ingestStatus.put(fsContent.getId(), IngestStatus.INGESTED);
+                    if (!extractIndex(aFile, false)) {
+                        logger.log(Level.WARNING, "Failed to extract Tika text and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").");
+                        ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
+                        //try to extract strings, if a file
+                        if (fsContent.isFile() == true) {
+                            processNonIngestible(fsContent);
+                        }
+
+                    } else {
+                        ingestStatus.put(aFile.getId(), IngestStatus.INGESTED);
+
+                    }
+
                } catch (IngesterException e) {
+                    logger.log(Level.INFO, "Could not extract text with Tika, " + fsContent.getId() + ", "
+                            + fsContent.getName(), e);
                    ingestStatus.put(fsContent.getId(), IngestStatus.SKIPPED);
                    //try to extract strings, if a file
                    if (fsContent.isFile() == true) {
@ -565,6 +578,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
                    }

                } catch (Exception e) {
+                    logger.log(Level.WARNING, "Error extracting text with Tika, " + fsContent.getId() + ", "
+                            + fsContent.getName(), e);
                    ingestStatus.put(fsContent.getId(), IngestStatus.SKIPPED);
                    //try to extract strings if a file
                    if (fsContent.isFile() == true) {
@ -578,7 +593,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
        }

        private boolean processNonIngestible(AbstractFile aFile) {
-            if (!extractAndIngest(aFile)) {
+            try {
+                if (!extractIndex(aFile, true)) {
                    logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").");
                    ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
                    return false;
@ -586,6 +602,11 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
                    ingestStatus.put(aFile.getId(), IngestStatus.EXTRACTED_INGESTED);
                    return true;
                }
+            } catch (IngesterException ex) {
+                logger.log(Level.WARNING, "Failed to extract strings and ingest, file '" + aFile.getName() + "' (id: " + aFile.getId() + ").", ex);
+                ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
+                return false;
+            }
        }
    }

--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/LuceneQuery.java
@ -326,7 +326,7 @@ public class LuceneQuery implements KeywordSearchQuery {
        if (chunkID == 0) {
            contentIDStr = Long.toString(contentID);
        } else {
-            contentIDStr = FileExtractedChild.getFileExtractChildId(contentID, chunkID);
+            contentIDStr = Server.getChunkIdString(contentID, chunkID);
        }

        String idQuery = Server.Schema.ID.toString() + ":" + contentIDStr;
--- a/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
+++ b/KeywordSearch/src/org/sleuthkit/autopsy/keywordsearch/Server.java
@ -60,74 +60,63 @@ class Server {
    public static enum Schema {

        ID {
-
            @Override
            public String toString() {
                return "id";
            }
        },
        CONTENT {
-
            @Override
            public String toString() {
                return "content";
            }
        },
        CONTENT_WS {
-
            @Override
            public String toString() {
                return "content_ws";
            }
        },
        FILE_NAME {
-
            @Override
            public String toString() {
                return "file_name";
            }
        },
        CTIME {
-
            @Override
            public String toString() {
                return "ctime";
            }
        },
        ATIME {
-
            @Override
            public String toString() {
                return "atime";
            }
        },
        MTIME {
-
            @Override
            public String toString() {
                return "mtime";
            }
        },
        CRTIME {
-
            @Override
            public String toString() {
                return "crtime";
            }
        },
        NUM_CHUNKS {
-
            @Override
            public String toString() {
                return "num_chunks";
            }
-        },};
-    
+        },
+    };
    public static final String HL_ANALYZE_CHARS_UNLIMITED = "-1";
-    
    //max content size we can send to Solr
    public static final long MAX_CONTENT_SIZE = 1L * 1024 * 1024 * 1024;
-    
    private static final Logger logger = Logger.getLogger(Server.class.getName());
    private static final String DEFAULT_CORE_NAME = "coreCase";
    // TODO: DEFAULT_CORE_NAME needs to be replaced with unique names to support multiple open cases
@ -137,6 +126,8 @@ class Server {
    private static final int MAX_SOLR_MEM_MB = 512; //TODO set dynamically based on avail. system resources
    private Process curSolrProcess = null;
    
+    private static Ingester ingester = null;
+
    public enum CORE_EVT_STATES {

        STOPPED, STARTED
@ -148,6 +139,7 @@ class Server {

    /**
     * New instance for the server at the given URL
+     *
     * @param url should be something like "http://localhost:8983/solr/"
     */
    Server(String url) {
@ -260,8 +252,7 @@ class Server {
    /**
     * Tries to stop a Solr instance.
     *
-     * Waits for the stop command to finish
-     * before returning.
+     * Waits for the stop command to finish before returning.
     */
    synchronized void stop() {
        try {
@ -283,8 +274,11 @@ class Server {
    }

    /**
-     * Tests if there's a Solr server running by sending it a core-status request.
-     * @return false if the request failed with a connection error, otherwise true
+     * Tests if there's a Solr server running by sending it a core-status
+     * request.
+     *
+     * @return false if the request failed with a connection error, otherwise
+     * true
     */
    synchronized boolean isRunning() {

@ -311,7 +305,9 @@ class Server {

        return true;
    }
-    /**** Convenience methods for use while we only open one case at a time ****/
+    /**
+     * ** Convenience methods for use while we only open one case at a time ***
+     */
    private volatile Core currentCore = null;

    synchronized void openCore() {
@ -331,9 +327,12 @@ class Server {
        serverAction.putValue(CORE_EVT, CORE_EVT_STATES.STOPPED);
    }

-    /**** end single-case specific methods ****/
+    /**
+     * ** end single-case specific methods ***
+     */
    /**
     * Open a core for the given case
+     *
     * @param c
     * @return
     */
@ -345,6 +344,7 @@ class Server {

    /**
     * commit current core if it exists
+     *
     * @throws SolrServerException, NoOpenCoreException
     */
    synchronized void commit() throws SolrServerException, NoOpenCoreException {
@ -362,8 +362,10 @@ class Server {
    }

    /**
-     * Execute query that gets only number of all Solr files indexed
-     * without actually returning the files.  The result does not include chunks, only number of actual files.
+     * Execute query that gets only number of all Solr files indexed without
+     * actually returning the files. The result does not include chunks, only
+     * number of actual files.
+     *
     * @return int representing number of indexed files
     * @throws SolrServerException
     */
@ -376,8 +378,9 @@ class Server {
    }

    /**
-     * Execute query that gets only number of all Solr documents indexed (files and chunks)
-     * without actually returning the documents
+     * Execute query that gets only number of all Solr documents indexed (files
+     * and chunks) without actually returning the documents
+     *
     * @return int representing number of indexed files (files and chunks)
     * @throws SolrServerException
     */
@ -391,6 +394,7 @@ class Server {

    /**
     * Return true if the file is indexed (either as a whole as a chunk)
+     *
     * @param contentID
     * @return true if it is indexed
     * @throws SolrServerException, NoOpenCoreException
@ -405,8 +409,10 @@ class Server {

    /**
     * Execute query that gets number of indexed file chunks for a file
+     *
     * @param fileID file id of the original file broken into chunks and indexed
-     * @return int representing number of indexed file chunks, 0 if there is no chunks
+     * @return int representing number of indexed file chunks, 0 if there is no
+     * chunks
     * @throws SolrServerException
     */
    public int queryNumFileChunks(long fileID) throws SolrServerException, NoOpenCoreException {
@ -419,6 +425,7 @@ class Server {

    /**
     * Execute solr query
+     *
     * @param sq query
     * @return query response
     * @throws SolrServerException
@ -433,6 +440,7 @@ class Server {

    /**
     * Execute solr query
+     *
     * @param sq the query
     * @param method http method to use
     * @return query response
@ -448,6 +456,7 @@ class Server {

    /**
     * Execute Solr terms query
+     *
     * @param sq the query
     * @return terms response
     * @throws SolrServerException
@ -462,6 +471,7 @@ class Server {

    /**
     * Execute Solr query to get content text
+     *
     * @param content to get the text for
     * @return content text string
     * @throws SolrServerException
@ -476,8 +486,10 @@ class Server {

    /**
     * Execute Solr query to get content text from content chunk
+     *
     * @param content to get the text for
-     * @param chunkID chunk number to query (starting at 1), or 0 if there is no chunks for that content
+     * @param chunkID chunk number to query (starting at 1), or 0 if there is no
+     * chunks for that content
     * @return content text string
     * @throws SolrServerException
     * @throws NoOpenCoreException
@ -490,15 +502,28 @@ class Server {
    }

    /**
-     * factory method to create ingester
-     * @return ingester
+     * Method to return ingester instance
+     *
+     * @return ingester instance
     */
-    public Ingester getIngester() {
-        return new Ingester();
+    public static Ingester getIngester() {
+        return Ingester.getDefault();
+    }
+
+    /**
+     * Given file parent id and child chunk ID, return the ID string of the chunk
+     * as stored in Solr, e.g. FILEID_CHUNKID
+     * @param parentID the parent file id (id of the source content)
+     * @param childID the child chunk id
+     * @return formatted string id
+     */
+    public static String getChunkIdString(long parentID, int childID) {
+        return Long.toString(parentID) + Server.ID_CHUNK_SEP + Integer.toString(childID);
    }

    /**
     * Open a new core
+     *
     * @param coreName name to refer to the core by in Solr
     * @param dataDir directory to load/store the core data from/to
     * @return new core
@ -574,13 +599,13 @@ class Server {
            }
        }

-        
        private String getSolrContent(long contentID, int chunkID) {
            final SolrQuery q = new SolrQuery();
            q.setQuery("*:*");
            String filterQuery = Schema.ID.toString() + ":" + contentID;
-            if (chunkID != 0)
+            if (chunkID != 0) {
                filterQuery = filterQuery + Server.ID_CHUNK_SEP + chunkID;
+            }
            q.addFilterQuery(filterQuery);
            q.setFields(Schema.CONTENT.toString());
            try {
@ -602,10 +627,11 @@ class Server {
        }

        /**
-         * Execute query that gets only number of all Solr files (not chunks) indexed
-         * without actually returning the files
+         * Execute query that gets only number of all Solr files (not chunks)
+         * indexed without actually returning the files
         *
-         * @return int representing number of indexed files (entire files, not chunks)
+         * @return int representing number of indexed files (entire files, not
+         * chunks)
         * @throws SolrServerException
         */
        private int queryNumIndexedFiles() throws SolrServerException {
@ -617,10 +643,11 @@ class Server {

        /**
         * Execute query that gets only number of all Solr documents indexed
-         * without actually returning the documents.  Documents include entire indexed files
-         * as well as chunks, which are treated as documents.
+         * without actually returning the documents. Documents include entire
+         * indexed files as well as chunks, which are treated as documents.
         *
-         * @return int representing number of indexed documents (entire files and chunks)
+         * @return int representing number of indexed documents (entire files
+         * and chunks)
         * @throws SolrServerException
         */
        private int queryNumIndexedDocuments() throws SolrServerException {
@ -631,6 +658,7 @@ class Server {

        /**
         * Return true if the file is indexed (either as a whole as a chunk)
+         *
         * @param contentID
         * @return true if it is indexed
         * @throws SolrServerException
@ -645,8 +673,11 @@ class Server {

        /**
         * Execute query that gets number of indexed file chunks for a file
-         * @param contentID file id of the original file broken into chunks and indexed
-         * @return int representing number of indexed file chunks, 0 if there is no chunks
+         *
+         * @param contentID file id of the original file broken into chunks and
+         * indexed
+         * @return int representing number of indexed file chunks, 0 if there is
+         * no chunks
         * @throws SolrServerException
         */
        private int queryNumFileChunks(long contentID) throws SolrServerException {
--- a/RecentActivity/nbproject/genfiles.properties
+++ b/RecentActivity/nbproject/genfiles.properties
@ -1,8 +1,8 @@
-build.xml.data.CRC32=9610d898
+build.xml.data.CRC32=f8013a8e
 build.xml.script.CRC32=d323407a
 build.xml.stylesheet.CRC32=a56c6a5b@1.46.2
 # This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
 # Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
-nbproject/build-impl.xml.data.CRC32=9610d898
+nbproject/build-impl.xml.data.CRC32=f8013a8e
 nbproject/build-impl.xml.script.CRC32=aef16a21
 nbproject/build-impl.xml.stylesheet.CRC32=238281d1@1.46.2
--- a/RecentActivity/release/rr/plugins/officedocs.pl
+++ b/RecentActivity/release/rr/plugins/officedocs.pl
@ -36,7 +36,7 @@ sub pluginmain {
 	#::logMsg("Launching officedocs v.".$VERSION);
   # ::rptMsg("officedocs v.".$VERSION); # 20110830 [fpi] + banner
   # ::rptMsg("(".getHive().") ".getShortDescr()."\n"); # 20110830 [fpi] + banner
-	::rptMsg("<Office>");
+	::rptMsg("<office>");
 	my $reg = Parse::Win32Registry->new($ntuser);
 	my $root_key = $reg->get_root_key;
 	#::rptMsg("officedocs v.".$VERSION);
@ -56,6 +56,8 @@ sub pluginmain {
 		#::rptMsg("MSOffice version ".$version." located.");
 		my $key_path = "Software\\Microsoft\\Office\\".$version;	                 
 		my $of_key = $root_key->get_subkey($key_path);
+		::rptMsg("<artifacts>");
+		::rptMsg("<time> ".gmtime($of_key->get_timestamp())."</time>");
 		if ($of_key) {
 # Attempt to retrieve Word docs			
 			my @funcs = ("Open","Save As","File Save");
@ -63,11 +65,12 @@ sub pluginmain {
 				my $word = "Common\\Open Find\\Microsoft Office Word\\Settings\\".$func."\\File Name MRU";
 				my $word_key = $of_key->get_subkey($word);
 				if ($word_key) {
-					::rptMsg($word);
-					::rptMsg("<time> ".gmtime($word_key->get_timestamp())."</time><artifacts>");
+					#::rptMsg($word);
+					
 					#::rptMsg("");
 					my $value = $word_key->get_value("Value")->get_data();
 					my @data = split(/\00/,$value);
+					::rptMsg("<Word name=\"".$value."\">". @data . "</Word>");
 					#map{::rptMsg("$_");}@data;
 				}
 				else {
@ -78,8 +81,8 @@ sub pluginmain {
 # Attempt to retrieve Excel docs
 			my $excel = 'Excel\\Recent Files';
 			if (my $excel_key = $of_key->get_subkey($excel)) {
-				::rptMsg($key_path."\\".$excel);
-				::rptMsg("LastWrite Time ".gmtime($excel_key->get_timestamp())." (UTC)");
+				#::rptMsg($key_path."\\".$excel);
+				#::rptMsg("LastWrite Time ".gmtime($excel_key->get_timestamp())." (UTC)");
 				my @vals = $excel_key->get_list_of_values();
 				if (scalar(@vals) > 0) {
 					my %files;
@ -93,22 +96,22 @@ sub pluginmain {
 # Print sorted content to report file			
 					foreach my $u (sort {$a <=> $b} keys %files) {
 						my ($val,$data) = split(/:/,$files{$u},2);
-						::rptMsg("  ".$val." -> ".$data);
+						::rptMsg("<Excel name=\"".$val."\">".$data . "</Excel>");
 					}
 				}
 				else {
-					::rptMsg($key_path.$excel." has no values.");
+					#::rptMsg($key_path.$excel." has no values.");
 				}
 			}
 			else {
-				::rptMsg($key_path.$excel." not found.");
+				#::rptMsg($key_path.$excel." not found.");
 			}
-			::rptMsg("");
+			#::rptMsg("");
 # Attempt to retrieve PowerPoint docs			
 			my $ppt = 'PowerPoint\\Recent File List';
 			if (my $ppt_key = $of_key->get_subkey($ppt)) {
-				::rptMsg($key_path."\\".$ppt);
-				::rptMsg("LastWrite Time ".gmtime($ppt_key->get_timestamp())." (UTC)");
+				#::rptMsg($key_path."\\".$ppt);
+				#::rptMsg("LastWrite Time ".gmtime($ppt_key->get_timestamp())." (UTC)");
 				my @vals = $ppt_key->get_list_of_values();
 				if (scalar(@vals) > 0) {
 					my %files;
@ -122,7 +125,7 @@ sub pluginmain {
 # Print sorted content to report file			
 					foreach my $u (sort {$a <=> $b} keys %files) {
 						my ($val,$data) = split(/:/,$files{$u},2);
-						::rptMsg("  ".$val." -> ".$data);
+						::rptMsg("<PowerPoint name=\"".$val."\">".$data . "</PowerPoint);
 					}
 				}
 				else {
@ -142,7 +145,7 @@ sub pluginmain {
 		#::logMsg("MSOffice version not found.");
 		#::rptMsg("MSOffice version not found.");
 	}
-	::rptMsg("</artifacts></Office>");
+::rptMsg("</artifacts></office>");
 }
 	
 1;
--- a/RecentActivity/release/rr/plugins/officedocs2010.pl
+++ b/RecentActivity/release/rr/plugins/officedocs2010.pl
@ -69,9 +69,9 @@ my $VERSION = getVersion();
 sub pluginmain {
 	my $class = shift;
 	my $ntuser = shift;
-	::logMsg("Launching officedocs2010 v.".$VERSION);
-    ::rptMsg("officedocs2010 v.".$VERSION); # 20110830 [fpi] + banner
-    ::rptMsg("(".getHive().") ".getShortDescr()."\n"); # 20110830 [fpi] + banner
+	#::logMsg("Launching officedocs2010 v.".$VERSION);
+    #::rptMsg("officedocs2010 v.".$VERSION); # 20110830 [fpi] + banner
+    #::rptMsg("(".getHive().") ".getShortDescr()."\n"); # 20110830 [fpi] + banner

 	my $reg = Parse::Win32Registry->new($ntuser);
 	my $root_key = $reg->get_root_key;
@ -83,15 +83,15 @@ sub pluginmain {
 	}
 	
 	if ($tag) {
-		::rptMsg("MSOffice version 2010 located.");
+		#::rptMsg("MSOffice version 2010 located.");
 		my $key_path = "Software\\Microsoft\\Office\\14.0";	                 
 		my $of_key = $root_key->get_subkey($key_path);
 		if ($of_key) {
 # Attempt to retrieve Word docs
 			my $word = 'Word\\File MRU';
 			if (my $word_key = $of_key->get_subkey($word)) {
-				::rptMsg($key_path."\\".$word);
-				::rptMsg("LastWrite Time ".gmtime($word_key->get_timestamp())." (UTC)");
+				#::rptMsg($key_path."\\".$word);
+				#::rptMsg("LastWrite Time ".gmtime($word_key->get_timestamp())." (UTC)");
 				my @vals = $word_key->get_list_of_values();
 				if (scalar(@vals) > 0) {
 					my %files;
@ -106,22 +106,22 @@ sub pluginmain {
 # Print sorted content to report file			
 					foreach my $u (sort {$a <=> $b} keys %files) {
 						my ($val,$data) = split(/:/,$files{$u},2);
-						::rptMsg("  ".$val." -> ".$data);
+						::rptMsg("<Word name=\"".$val."\">".$data . "</Word>");
 					}
 				}
 				else {
-					::rptMsg($key_path.$word." has no values.");
+					#::rptMsg($key_path.$word." has no values.");
 				}
 			}
 			else {
-				::rptMsg($key_path.$word." not found.");
+				#::rptMsg($key_path.$word." not found.");
 			}
-			::rptMsg("");
+			#::rptMsg("");
 # Attempt to retrieve Excel docs
 			my $excel = 'Excel\\File MRU';
 			if (my $excel_key = $of_key->get_subkey($excel)) {
-				::rptMsg($key_path."\\".$excel);
-				::rptMsg("LastWrite Time ".gmtime($excel_key->get_timestamp())." (UTC)");
+				#::rptMsg($key_path."\\".$excel);
+				#::rptMsg("LastWrite Time ".gmtime($excel_key->get_timestamp())." (UTC)");
 				my @vals = $excel_key->get_list_of_values();
 				if (scalar(@vals) > 0) {
 					my %files;
@ -136,22 +136,22 @@ sub pluginmain {
 # Print sorted content to report file			
 					foreach my $u (sort {$a <=> $b} keys %files) {
 						my ($val,$data) = split(/:/,$files{$u},2);
-						::rptMsg("  ".$val." -> ".$data);
+						::rptMsg("<Excel name=\"".$val."\">".$data . "</Excel>");
 					}
 				}
 				else {
-					::rptMsg($key_path.$excel." has no values.");
+					#::rptMsg($key_path.$excel." has no values.");
 				}
 			}
 			else {
-				::rptMsg($key_path.$excel." not found.");
+				#::rptMsg($key_path.$excel." not found.");
 			}
-			::rptMsg("");
+			#::rptMsg("");
 # Attempt to retrieve Access docs
 			my $access = 'Access\\File MRU';
 			if (my $access_key = $of_key->get_subkey($access)) {
-				::rptMsg($key_path."\\".$access);
-				::rptMsg("LastWrite Time ".gmtime($access_key->get_timestamp())." (UTC)");
+				#::rptMsg($key_path."\\".$access);
+				#::rptMsg("LastWrite Time ".gmtime($access_key->get_timestamp())." (UTC)");
 				my @vals = $access_key->get_list_of_values();
 				if (scalar(@vals) > 0) {
 					my %files;
@ -166,22 +166,22 @@ sub pluginmain {
 # Print sorted content to report file			
 					foreach my $u (sort {$a <=> $b} keys %files) {
 						my ($val,$data) = split(/:/,$files{$u},2);
-						::rptMsg("  ".$val." -> ".$data);
+						::rptMsg("<Access name=\"".$val."\">".$data . "</Access>");
 					}
 				}
 				else {
-					::rptMsg($key_path.$access." has no values.");
+				#	::rptMsg($key_path.$access." has no values.");
 				}
 			}
 			else {
-				::rptMsg($key_path.$access." not found.");
+			#	::rptMsg($key_path.$access." not found.");
 			}
-			::rptMsg("");
+			#::rptMsg("");
 # Attempt to retrieve PowerPoint docs			
 			my $ppt = 'PowerPoint\\File MRU';
 			if (my $ppt_key = $of_key->get_subkey($ppt)) {
-				::rptMsg($key_path."\\".$ppt);
-				::rptMsg("LastWrite Time ".gmtime($ppt_key->get_timestamp())." (UTC)");
+				#::rptMsg($key_path."\\".$ppt);
+				#::rptMsg("LastWrite Time ".gmtime($ppt_key->get_timestamp())." (UTC)");
 				my @vals = $ppt_key->get_list_of_values();
 				if (scalar(@vals) > 0) {
 					my %files;
@ -196,25 +196,25 @@ sub pluginmain {
 # Print sorted content to report file			
 					foreach my $u (sort {$a <=> $b} keys %files) {
 						my ($val,$data) = split(/:/,$files{$u},2);
-						::rptMsg("  ".$val." -> ".$data);
+						::rptMsg("<PowerPoint name=\"".$val."\">".$data . "</PowerPoint>");
 					}
 				}
 				else {
-					::rptMsg($key_path."\\".$ppt." has no values.");
+				#	::rptMsg($key_path."\\".$ppt." has no values.");
 				}		
 			}
 			else {
-				::rptMsg($key_path."\\".$ppt." not found.");
+			#	::rptMsg($key_path."\\".$ppt." not found.");
 			}			
 		}
 		else {
-			::rptMsg("Could not access ".$key_path);
-			::logMsg("Could not access ".$key_path);
+		#	::rptMsg("Could not access ".$key_path);
+		#	::logMsg("Could not access ".$key_path);
 		}
 	}
 	else {
-		::logMsg("MSOffice version not found.");
-		::rptMsg("MSOffice version not found.");
+	#	::logMsg("MSOffice version not found.");
+	#	::rptMsg("MSOffice version not found.");
 	}
 }

--- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Chrome.java
+++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Chrome.java
@ -79,6 +79,7 @@ public class Chrome extends Extract implements IngestServiceImage {
        int j = 0;
        if (FFSqlitedb != null && !FFSqlitedb.isEmpty()) {
            while (j < FFSqlitedb.size()) {
+                
                String temps = currentCase.getTempDirectory() + File.separator + FFSqlitedb.get(j).getName().toString() + j + ".db";
                try {
                    ContentUtils.writeToFile(FFSqlitedb.get(j), new File(currentCase.getTempDirectory() + File.separator + FFSqlitedb.get(j).getName().toString() + j + ".db"));
@ -92,6 +93,7 @@ public class Chrome extends Extract implements IngestServiceImage {
                    break;
                }
                List<HashMap<String, Object>> tempList = this.dbConnect(temps, chquery);
+                logger.log(Level.INFO, moduleName + "- Now getting history from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
@ -129,6 +131,7 @@ public class Chrome extends Extract implements IngestServiceImage {
                    logger.log(Level.WARNING, "Error while trying to write out a sqlite db.{0}", ex);
                    this.addErrorMessage(this.getName() + ": Error while trying to analyze file:" + FFSqlitedb.get(j).getName());
                }
+                 logger.log(Level.INFO, moduleName + "- Now getting Bookmarks from " + temps);
                File dbFile = new File(temps);
                if (controller.isCancelled()) {
                    dbFile.delete();
@ -195,6 +198,7 @@ public class Chrome extends Extract implements IngestServiceImage {
                }

                List<HashMap<String, Object>> tempList = this.dbConnect(temps, chcookiequery);
+                logger.log(Level.INFO, moduleName + "- Now getting cookies from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
@ -242,6 +246,7 @@ public class Chrome extends Extract implements IngestServiceImage {
                }

                List<HashMap<String, Object>> tempList = this.dbConnect(temps, chdownloadquery);
+                logger.log(Level.INFO, moduleName + "- Now getting downloads from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
@ -289,6 +294,7 @@ public class Chrome extends Extract implements IngestServiceImage {
                    break;
                }
                List<HashMap<String, Object>> tempList = this.dbConnect(temps, chloginquery);
+                logger.log(Level.INFO, moduleName + "- Now getting login information from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
--- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Extract.java
+++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Extract.java
@ -115,9 +115,9 @@ abstract public class Extract implements IngestServiceImage{
     * @param  query is a sql string query that is to be run
     * @return  list is the ArrayList that contains the resultset information in it that the query obtained
     */
-    public List dbConnect(String path, String query) {
+    public List<HashMap<String,Object>> dbConnect(String path, String query) {
        ResultSet temprs = null;
-        List list = new ArrayList();
+        List<HashMap<String,Object>> list = new ArrayList<HashMap<String,Object>>();
        String connectionString = "jdbc:sqlite:" + path;
        try {
            dbconnect tempdbconnect = new dbconnect("org.sqlite.JDBC", connectionString);
@ -126,7 +126,7 @@ abstract public class Extract implements IngestServiceImage{
            tempdbconnect.closeConnection();
        } catch (Exception ex) {
            logger.log(Level.WARNING, "Error while trying to read into a sqlite db." + connectionString, ex);
-            return new ArrayList();
+            return new ArrayList<HashMap<String,Object>>();
        }
        return list;
    }
@ -137,12 +137,12 @@ abstract public class Extract implements IngestServiceImage{
     * @param  rs is the resultset that needs to be converted to an arraylist
     * @return  list returns the arraylist built from the converted resultset
     */
-    public List<HashMap> resultSetToArrayList(ResultSet rs) throws SQLException {
+    public List<HashMap<String,Object>> resultSetToArrayList(ResultSet rs) throws SQLException {
        ResultSetMetaData md = rs.getMetaData();
        int columns = md.getColumnCount();
-        List list = new ArrayList(50);
+        List<HashMap<String,Object>> list = new ArrayList<HashMap<String,Object>>(50);
        while (rs.next()) {
-            HashMap row = new HashMap(columns);
+            HashMap<String,Object> row = new HashMap<String,Object>(columns);
            for (int i = 1; i <= columns; ++i) {
                if (rs.getObject(i) == null) {
                    row.put(md.getColumnName(i), "");
--- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java
+++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java
@ -119,7 +119,7 @@ public class ExtractRegistry extends Extract implements IngestServiceImage {
                    logger.log(Level.WARNING, "Error while trying to read into a sqlite db.{0}", ex);
                }
                File regFile = new File(temps);
-
+                logger.log(Level.INFO, moduleName + "- Now getting registry information from " + temps);
                String txtPath = executeRegRip(temps, j);
                if (txtPath.length() > 0) {
                    Success = parseReg(txtPath, orgId);
@ -219,7 +219,7 @@ public class ExtractRegistry extends Extract implements IngestServiceImage {
                    Long epochtime = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy").parse(etime).getTime();
                    time = epochtime.longValue();
                    String Tempdate = time.toString();
-                    time = Long.valueOf(Tempdate)/1000;
+                    time = Long.valueOf(Tempdate) / 1000;
                } catch (ParseException e) {
                    logger.log(Level.WARNING, "RegRipper::Conversion on DateTime -> ", e);
                }
@ -266,7 +266,7 @@ public class ExtractRegistry extends Extract implements IngestServiceImage {
                            try {
                                Long epochtime = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy").parse(name).getTime();
                                ftime = epochtime.longValue();
-                                ftime = ftime/1000;
+                                ftime = ftime / 1000;
                            } catch (ParseException e) {
                                logger.log(Level.WARNING, "RegRipper::Conversion on DateTime -> ", e);
                            }
@ -290,7 +290,7 @@ public class ExtractRegistry extends Extract implements IngestServiceImage {
                                    Long epochtime = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy").parse(value).getTime();
                                    installtime = epochtime.longValue();
                                    String Tempdate = installtime.toString();
-                                    installtime = Long.valueOf(Tempdate)/1000;
+                                    installtime = Long.valueOf(Tempdate) / 1000;
                                } catch (ParseException e) {
                                    logger.log(Level.WARNING, "RegRipper::Conversion on DateTime -> ", e);
                                }
@ -299,6 +299,15 @@ public class ExtractRegistry extends Extract implements IngestServiceImage {
                                BlackboardArtifact bbart = tempDb.getContentById(orgId).newArtifact(ARTIFACT_TYPE.TSK_INSTALLED_PROG);
                                bbart.addAttributes(bbattributes);
                            }
+                        } else if ("office".equals(context)) {
+                                                       
+                            BlackboardArtifact bbart = tempDb.getContentById(orgId).newArtifact(ARTIFACT_TYPE.TSK_RECENT_OBJECT);
+                            bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_LAST_ACCESSED.getTypeID(), "RecentActivity", context, time));
+                            bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_NAME.getTypeID(), "RecentActivity", context, name));
+                            bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_VALUE.getTypeID(), "RecentActivity", context, value));
+                            bbattributes.add(new BlackboardAttribute(ATTRIBUTE_TYPE.TSK_PROG_NAME.getTypeID(), "RecentActivity", context, artnode.getName()));
+                            bbart.addAttributes(bbattributes);
+
                        } else {
 //                            BlackboardArtifact bbart = tempDb.getContentById(orgId).newArtifact(sysid);
 //                            bbart.addAttributes(bbattributes);
@ -330,8 +339,7 @@ public class ExtractRegistry extends Extract implements IngestServiceImage {

    @Override
    public void stop() {
-        if(JavaSystemCaller.Exec.getProcess() != null)
-        {
+        if (JavaSystemCaller.Exec.getProcess() != null) {
            JavaSystemCaller.Exec.stop();
        }
    }
--- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Firefox.java
+++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/Firefox.java
@ -87,6 +87,7 @@ public class Firefox extends Extract implements IngestServiceImage {
                    break;
                }
                List<HashMap<String, Object>> tempList = this.dbConnect(temps, ffquery);
+                logger.log(Level.INFO, moduleName + "- Now getting history from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
@ -130,6 +131,7 @@ public class Firefox extends Extract implements IngestServiceImage {
                    break;
                }
                List<HashMap<String, Object>> tempList = this.dbConnect(temps, ffbookmarkquery);
+                logger.log(Level.INFO, moduleName + "- Now getting bookmarks from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
@ -180,6 +182,7 @@ public class Firefox extends Extract implements IngestServiceImage {
                }

                List<HashMap<String, Object>> tempList = this.dbConnect(temps, query);
+                logger.log(Level.INFO, moduleName + "- Now getting cookies from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
@ -231,6 +234,7 @@ public class Firefox extends Extract implements IngestServiceImage {
                }

                List<HashMap<String, Object>> tempList = this.dbConnect(temps, ffdownloadquery);
+                logger.log(Level.INFO, moduleName + "- Now getting downloads from " + temps + " with " + tempList.size() + "artifacts identified.");
                for (HashMap<String, Object> result : tempList) {
                    try {
                        Collection<BlackboardAttribute> bbattributes = new ArrayList<BlackboardAttribute>();
--- a/Report/src/org/sleuthkit/autopsy/report/ReportAction.java
+++ b/Report/src/org/sleuthkit/autopsy/report/ReportAction.java
@ -213,7 +213,7 @@ public final class ReportAction extends CallableSystemAction implements Presente
                    filterpanel.setComponentOrientation(ComponentOrientation.LEFT_TO_RIGHT);
                    filterpanel.setAlignmentY(Component.TOP_ALIGNMENT);
                    filterpanel.setAlignmentX(Component.LEFT_ALIGNMENT);
-                    filterpanel.setSize(300, 100);
+                    filterpanel.setSize(300, 200);
                    ButtonGroup previewGroup = new ButtonGroup();
                    for (ReportModule m : Lookup.getDefault().lookupAll(ReportModule.class)) {
                        String name = m.getName();
@ -260,6 +260,9 @@ public final class ReportAction extends CallableSystemAction implements Presente

            popUpWindow.pack();
            popUpWindow.setResizable(false);
+            // Modules need extra room for text to properly show
+            popUpWindow.setSize(popUpWindow.getWidth(),
+                    popUpWindow.getHeight()+50);

            // set the location of the popUp Window on the center of the screen
            Dimension screenDimension = Toolkit.getDefaultToolkit().getScreenSize();
--- a/Report/src/org/sleuthkit/autopsy/report/ReportBodyFile.java
+++ b/Report/src/org/sleuthkit/autopsy/report/ReportBodyFile.java
@ -0,0 +1,252 @@
+ /*
+ *
+ * Autopsy Forensic Browser
+ * 
+ * Copyright 2012 42six Solutions.
+ * Contact: aebadirad <at> 42six <dot> com
+ * Project Contact/Architect: carrier <at> sleuthkit <dot> org
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.sleuthkit.autopsy.report;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.sleuthkit.autopsy.casemodule.Case;
+import org.sleuthkit.autopsy.ingest.IngestManager;
+import org.sleuthkit.datamodel.*;
+
+/**
+ * ReportBodyFile generates a report in the body file format specified on
+ * The Sleuth Kit wiki as MD5|name|inode|mode_as_string|UID|GID|size|atime|mtime|ctime|crtime.
+ */
+public class ReportBodyFile implements ReportModule {
+    //Declare our publically accessible formatted Report, this will change everytime they run a Report
+    private static String bodyFilePath = "";
+    private ReportConfiguration config;
+    private static ReportBodyFile instance = null;
+    private Case currentCase = Case.getCurrentCase(); // get the current case
+    private SleuthkitCase skCase = currentCase.getSleuthkitCase();
+    private static final Logger logger = Logger.getLogger(ReportBodyFile.class.getName());
+
+    ReportBodyFile() {
+    }
+
+    public static synchronized ReportBodyFile getDefault() {
+        if (instance == null) {
+            instance = new ReportBodyFile();
+        }
+        return instance;
+    }
+
+    /**
+     * Generates a Body File report in the Reports folder of the current case.
+     * 
+     * @param reportconfig  unused in the body file
+     * @return  the path to the generated report
+     * @throws ReportModuleException 
+     */
+    @Override
+    public String generateReport(ReportConfiguration reportconfig) throws ReportModuleException {
+        config = reportconfig;
+        
+        // Setup timestamp
+        DateFormat dateFormat = new SimpleDateFormat("MM-dd-yyyy-HH-mm-ss");
+        Date date = new Date();
+        String datenotime = dateFormat.format(date);
+        
+        // Get report path
+        bodyFilePath = currentCase.getCaseDirectory() + File.separator + "Reports" +
+                File.separator + currentCase.getName() + "-" + datenotime + ".txt";
+        
+        // Run query to get all files
+        ResultSet rs = null;
+        try {
+            // exclude non-fs files/dirs and . and .. files
+            rs = skCase.runQuery("SELECT * FROM tsk_files "
+                               + "WHERE type = '" + TskData.TSK_DB_FILES_TYPE_ENUM.FS.getFileType() + "' "
+                               + "AND name != '.' "
+                               + "AND name != '..'");
+            List<FsContent> fs = skCase.resultSetToFsContents(rs);
+            // Check if ingest finished
+            String ingestwarning = "";
+            if (IngestManager.getDefault().isIngestRunning()) {
+                ingestwarning = "Warning, this report was run before ingest services completed!\n";
+            }
+            // Loop files and write info to report
+            for (FsContent file : fs) {
+                if (ReportFilter.cancel == true) {
+                    break;
+                }
+                
+                BufferedWriter out = null;
+                try {
+                    // MD5|name|inode|mode_as_string|UID|GID|size|atime|mtime|ctime|crtime
+                    out = new BufferedWriter(new FileWriter(bodyFilePath, true));
+                    out.write(ingestwarning);
+                    
+                    if(file.getMd5Hash()!=null) {
+                        out.write(file.getMd5Hash());
+                    }
+                    out.write("|");
+                    if(file.getUniquePath()!=null) {
+                        out.write(file.getUniquePath());
+                    }
+                    out.write("|");
+                    out.write(Long.toString(file.getMeta_addr()));
+                    out.write("|");
+                    if(file.getModeAsString()!=null) {
+                        out.write(file.getModeAsString());
+                    }
+                    out.write("|");
+                    out.write(Long.toString(file.getUid()));
+                    out.write("|");
+                    out.write(Long.toString(file.getGid()));
+                    out.write("|");
+                    out.write(Long.toString(file.getSize()));
+                    out.write("|");
+                    out.write(Long.toString(file.getAtime()));
+                    out.write("|");
+                    out.write(Long.toString(file.getMtime()));
+                    out.write("|");
+                    out.write(Long.toString(file.getCtime()));
+                    out.write("|");
+                    out.write(Long.toString(file.getCrtime()));
+                    out.write("\n");
+                } catch (IOException ex) {
+                    logger.log(Level.WARNING, "Could not write the temp body file report.", ex);
+                } finally {
+                    try {
+                        out.flush();
+                        out.close();
+                    } catch (IOException ex) {
+                        logger.log(Level.WARNING, "Could not flush and close the BufferedWriter.", ex);
+                    }
+                }
+            }
+        } catch(SQLException ex) {
+            logger.log(Level.WARNING, "Failed to get all file information.", ex);
+        } catch(TskCoreException ex) {
+            logger.log(Level.WARNING, "Failed to get the unique path.", ex);
+        } finally {
+            try {// Close the query
+                if(rs!=null) { skCase.closeRunQuery(rs); }
+            } catch (SQLException ex) {
+                logger.log(Level.WARNING, "Failed to close the query.", ex);
+            }
+        }
+        
+        return bodyFilePath;
+    }
+
+    @Override
+    public String getName() {
+        String name = "Body File";
+        return name;
+    }
+
+    /**
+     * Save the previously generated report to the given path.
+     * If the report was not generated in generateReport, save will attempt
+     * to regenerate it, then copy the file. If the regeneration fails, the 
+     * incident is logged.
+     */
+    @Override
+    public void save(String path) {
+        File caseFile = new File(bodyFilePath);
+        if(!caseFile.exists()) {
+            logger.log(Level.WARNING, "Body File report does not exist.");
+            try {
+                // Try to generate it again
+                generateReport(config);
+                logger.log(Level.INFO, "Body File report has been regenerated.");
+            } catch (ReportModuleException ex) {
+                logger.log(Level.WARNING, "Failed attempt to regenerate the report.", ex);
+            }
+        }
+        // Check again
+        if(caseFile.exists()) {
+            InputStream in = null;
+            OutputStream out = null;
+            try {
+                in = new FileInputStream(caseFile);
+                out = new FileOutputStream(path);
+                byte[] b  = new byte[Integer.parseInt(Long.toString(caseFile.length()))];
+                int len = b.length;
+                int total = 0;
+                int result = 0;
+
+                while ((result = in.read(b, total, len-total)) > 0) {
+                  out.write(b, total, len);
+                  total += result;
+                }
+            } catch(FileNotFoundException ex) {
+                logger.log(Level.WARNING, "Could find the file specified.", ex);
+            } catch(IOException ex) {
+                logger.log(Level.WARNING, "Could not read from the FileInputStream.", ex);
+            } finally {
+                try {
+                    in.close();
+                    out.flush();
+                    out.close();
+                } catch (IOException ex) {
+                    logger.log(Level.WARNING, "Could not close and flush the streams.", ex);
+                }
+            }
+        }
+        // Otherwise give up
+    }
+
+    @Override
+    public String getReportType() {
+        String type = "BodyFile";
+        return type;
+    }
+
+    @Override
+    public String getExtension() {
+        String ext = ".txt";
+        return ext;
+    }
+
+    @Override
+    public ReportConfiguration GetReportConfiguration() {
+        return config;
+    }
+
+    @Override
+    public String getReportTypeDescription() {
+        String desc = "This is an body file format report.";
+        return desc;
+    }
+
+    @Override
+    public void getPreview(String path) {
+        BrowserControl.openUrl(path);
+    }
+}
--- a/Report/src/org/sleuthkit/autopsy/report/layer.xml
+++ b/Report/src/org/sleuthkit/autopsy/report/layer.xml
@ -22,6 +22,11 @@
            <attr name="instanceCreate" methodvalue="org.sleuthkit.autopsy.report.ReportXLS.getDefault"/>
            <attr name="position" intvalue="902"/>
        </file>
+        <file name="org-sleuthkit-autopsy-report-ReportBodyFile.instance">
+            <attr name="instanceOf" stringvalue="org.sleuthkit.autopsy.report.ReportModule"/>
+            <attr name="instanceCreate" methodvalue="org.sleuthkit.autopsy.report.ReportBodyFile.getDefault"/>
+            <attr name="position" intvalue="903"/>
+        </file>
    </folder>
    <folder name="Toolbars">
        <folder name="File">
--- a/Testing/script/config.xml
+++ b/Testing/script/config.xml
@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="ASCII"?>
+<!-- This file is an example config file for regression.py. 
+ the following tags are mandatory:
+ 1 "indir" tag that specifies where the program can find the hash databases
+ any number of "image" tags that specify where the images are stored. They do not have to be local.
+ invalid paths and images are ignored by the tester. 
+
+ in this file, the first three tags are accepted by the tester, while the last two are disregarded as errors-->
+
+<Properties>
+    <indir name="indir" value="C:\Users\dhurd\Documents\GitHub\autopsy\Testing\script\input" />
+    <!--<image name="image1" value="P:\shared\Testing\script\input\64mb2.img"/>-->
+</Properties>
--- a/Testing/script/regression.py
+++ b/Testing/script/regression.py
@ -1,4 +1,5 @@
 #!/usr/bin/python 
+#en_US.UTF-8
 import sys
 import sqlite3
 import re
@ -6,16 +7,24 @@ import subprocess
 import os.path
 import shutil
 import time
+import xml
+from xml.dom.minidom import parse, parseString

-#  Usage: ./regression.py [-i FILE] [OPTIONS]
+
+#  Last modified 7/17/12 @5pm
+#  Usage: ./regression.py [-s FILE] OR [-l CONFIG]  [OPTIONS]
 #  Run the RegressionTest.java file, and compare the result with a gold standard
 #  When the -i flag is set, this script only tests the image given by FILE.
-#  By default, it tests every image in ./input/
 #  An indexed NSRL database is expected at ./input/nsrl.txt-md5.idx,
 #  and an indexed notable hash database at ./input/notablehashes.txt-md5.idx
 #  In addition, any keywords to search for must be in ./input/notablekeywords.xml
+#  When the -l flag is set, the script looks for a config.xml file of the given name 
+#  where images are stored. For usage notes please see the example "config.xml" in
+#  the /script folder.
 #    Options: 
 #    -r, --rebuild      Rebuild the gold standards from the test results for each image
+#    -i, --ignore       Ignores unallocated space when ingesting. Faster, but less accurate results.
+  

 hadErrors = False # If any of the tests failed
 results = {}      # Dictionary in which to store map ({imgname}->errors)
@ -26,12 +35,18 @@ outDir = os.path.join("output",time.strftime("%Y.%m.%d-%H.%M"))


 # Run ingest on all the images in 'input', using notablekeywords.xml and notablehashes.txt-md5.idx
-def testAddImageIngest(inFile):
+def testAddImageIngest(inFile, ignoreUnalloc, list):
  print "================================================"
  print "Ingesting Image: " + inFile

  # Set up case directory path
  testCaseName = imageName(inFile)
+  
+  #check for flags to append to folder name
+  if ignoreUnalloc:
+    testCaseName+="-i"
+  if list:
+    testCaseName+="-l"
  if os.path.exists(os.path.join(outDir,testCaseName)):
    shutil.rmtree(os.path.join(outDir,testCaseName))
  os.makedirs(os.path.join(outDir,testCaseName))
@ -40,10 +55,32 @@ def testAddImageIngest(inFile):

  cwd = wgetcwd()
  testInFile = wabspath(inFile)
+
+  # NEEDS windows path (backslashes) for .E00 images to work
+  testInFile = testInFile.replace("/", "\\")
+  if list:
+    knownBadPath = os.path.join(inDir, "notablehashes.txt-md5.idx")
+    keywordPath = os.path.join(inDir, "notablekeywords.xml")
+    nsrlPath = os.path.join(inDir, "nsrl.txt-md5.idx")
+  else:  
    knownBadPath = os.path.join(cwd,inDir,"notablehashes.txt-md5.idx")
    keywordPath = os.path.join(cwd,inDir,"notablekeywords.xml")
    nsrlPath = os.path.join(cwd,inDir,"nsrl.txt-md5.idx")
    
+  knownBadPath = knownBadPath.replace("/", "\\")
+  keywordPath = keywordPath.replace("/", "\\")
+  nsrlPath = nsrlPath.replace("/", "\\")
+
+  antlog = os.path.join(cwd,outDir,testCaseName,"antlog.txt")
+  antlog = antlog.replace("/", "\\")
+
+  timeout = 24 * 60 * 60 * 1000    # default of 24 hours, just to be safe
+  size = getImageSize(inFile, list)      # get the size in bytes
+  timeout = (size / 1000) / 1000   # convert to MB
+  timeout = timeout * 1000         # convert sec to ms
+  timeout = timeout * 1.5          # add a little extra umph
+  timeout = timeout * 25       # decided we needed A LOT extra to be safe
+
  # set up ant target
  args = ["ant"]
  args.append("-q")
@ -51,13 +88,15 @@ def testAddImageIngest(inFile):
  args.append(os.path.join("..","build.xml"))
  args.append("regression-test")
  args.append("-l")
-  args.append(os.path.join(cwd,outDir,testCaseName,"antlog.txt"))
+  args.append(antlog)
  args.append("-Dimg_path=" + testInFile)
  args.append("-Dknown_bad_path=" + knownBadPath)
  args.append("-Dkeyword_path=" + keywordPath)
  args.append("-Dnsrl_path=" + nsrlPath)
-  args.append("-Dgold_path=" + os.path.join(cwd,goldDir))
-  args.append("-Dout_path=" + os.path.join(cwd,outDir,testCaseName))
+  args.append("-Dgold_path=" + os.path.join(cwd,goldDir).replace("/", "\\"))
+  args.append("-Dout_path=" + os.path.join(cwd,outDir,testCaseName).replace("/", "\\"))
+  args.append("-Dignore_unalloc=" + "%s" % ignoreUnalloc)
+  args.append("-Dtest.timeout=" + str(timeout))

  # print the ant testing command
  print "CMD: " + " ".join(args)
@ -68,13 +107,31 @@ def testAddImageIngest(inFile):
  #fnull.close();
  subprocess.call(args)

-def testCompareToGold(inFile):
+def getImageSize(inFile, list):
+  name = imageName(inFile)
+  size = 0
+  if list:
+    size += os.path.getsize(inFile)
+  else: 
+    path = os.path.join(".",inDir)
+    
+    for files in os.listdir(path):
+        filename = os.path.splitext(files)[0]
+        if filename == name:
+            filepath = os.path.join(path, files)
+        if not os.path.samefile(filepath, inFile):
+            size += os.path.getsize(filepath)
+    size += os.path.getsize(inFile)
+  return size
+
+def testCompareToGold(inFile, ignore):
  print "-----------------------------------------------"
  print "Comparing results for " + inFile + " with gold."

  name = imageName(inFile)
+  if ignore:
+   name += ("-i")
  cwd = wgetcwd()
-  
  goldFile = os.path.join("./",goldDir,name,"standard.db")  
  testFile = os.path.join("./",outDir,name,"AutopsyTestCase","autopsy.db")
  if os.path.isfile(goldFile) == False:
@ -130,18 +187,105 @@ def testCompareToGold(inFile):
  else:
      print("Object counts match!")

-def copyTestToGold(inFile): 
-  print "------------------------------------------------"
-  print "Recreating gold standard from results."
-  inFile = imageName(inFile)
+def clearGoldDir(inFile, ignore, list):
  cwd = wgetcwd()
-  goldFile = os.path.join("./",goldDir,inFile,"standard.db")
-  testFile = os.path.join("./",outDir,inFile,"AutopsyTestCase","autopsy.db")
+  inFile = imageName(inFile)
+  if ignore:
+    inFile += "-i"
+  if list:
+    inFile += "-l"
  if os.path.exists(os.path.join(cwd,goldDir,inFile)):
    shutil.rmtree(os.path.join(cwd,goldDir,inFile))
  os.makedirs(os.path.join(cwd,goldDir,inFile))
+
+def copyTestToGold(inFile, ignore, list): 
+  print "------------------------------------------------"
+  print "Recreating gold standard from results."
+  inFile = imageName(inFile)
+  if ignore:
+    inFile += "-i"
+  if list:
+    inFile += "-l"
+  cwd = wgetcwd()
+  goldFile = os.path.join("./",goldDir,inFile,"standard.db")
+  testFile = os.path.join("./",outDir,inFile,"AutopsyTestCase","autopsy.db")
  shutil.copy(testFile, goldFile)

+def copyReportToGold(inFile, ignore, list): 
+  print "------------------------------------------------"
+  print "Recreating gold report from results."
+  inFile = imageName(inFile)
+  if ignore:
+    inFile += "-i"
+  if list:
+    inFile += "-l"
+  cwd = wgetcwd()
+  goldReport = os.path.join("./",goldDir,inFile,"report.html")
+  testReportPath = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports")
+  # Because Java adds a timestamp to the report file, one can't call it
+  # directly, so one must get a list of files in the dir, which are only
+  # reports, then filter for the .html report
+  testReport = None
+  for files in os.listdir(testReportPath):
+    if files.endswith(".html"): # Get the HTML one
+      testReport = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports",files)
+  if testReport is None:
+    markError("No test report exists", inFile)
+    return
+  else:
+    shutil.copy(testReport, goldReport)
+
+def testCompareReports(inFile, ignore, list):
+  print "------------------------------------------------"
+  print "Comparing report to golden report."
+  name = imageName(inFile)
+  if ignore:
+    name += "-i"
+  if list:
+    name += "-l"
+  goldReport = os.path.join("./",goldDir,name,"report.html")  
+  testReportPath = os.path.join("./",outDir,name,"AutopsyTestCase","Reports")
+  # Because Java adds a timestamp to the report file, one can't call it
+  # directly, so one must get a list of files in the dir, which are only
+  # reports, then filter for the .html report
+  testReport = None
+  for files in os.listdir(testReportPath):
+    if files.endswith(".html"): # Get the HTML one
+      testReport = os.path.join("./",outDir,name,"AutopsyTestCase","Reports",files)
+  if os.path.isfile(goldReport) == False:
+    markError("No gold report exists", inFile)
+    return
+  if testReport is None:
+    markError("No test report exists", inFile)
+    return
+  # Compare the reports
+  goldFile = open(goldReport)
+  testFile = open(testReport)
+  # Search for <ul> because it is first seen in the report
+  # immediately after the unnecessary metadata, styles, and timestamp
+  gold = goldFile.read()
+  test = testFile.read()
+  gold = gold[gold.find("<ul>"):]
+  test = test[test.find("<ul>"):]
+  # Splitting allows for printouts of what the difference is
+  goldList = split(gold, 50)
+  testList = split(test, 50)
+  failed = 0
+  for goldSplit, testSplit in zip(goldList, testList):
+    if goldSplit != testSplit:
+      failed = 1
+      #print "Got: " + testSplit
+      #print "Expected: " + goldSplit
+      break
+  if(failed):
+    errString = "Reports do not match."
+    markError(errString, inFile)
+  else:
+    print "Reports match."
+  
+def split(input, size):
+  return [input[start:start+size] for start in range(0, len(input), size)]
+
 class ImgType:
  RAW, ENCASE, SPLIT, UNKNOWN = range(4)

@ -161,15 +305,20 @@ def imageType(inFile):

 def imageName(inFile):
    pathEnd = inFile.rfind("/")
+    pathEnd2 = inFile.rfind("\\")
    extStart = inFile.rfind(".")
    if(extStart == -1 and extStart == -1):
        return inFile
+    if(pathEnd2 != -1):
+        return inFile[pathEnd2+1:extStart]
    elif(extStart == -1):
        return inFile[pathEnd+1:]
    elif(pathEnd == -1):
        return inFile[:extStart]
-    else:
+    elif(pathEnd!=-1 and extStart!=-1):
        return inFile[pathEnd+1:extStart]
+    else:
+        return inFile[pathEnd2+1:extStart]

 def markError(errString, inFile):
    global hadErrors
@ -185,61 +334,116 @@ def wgetcwd():
    return out.rstrip()

 def wabspath(inFile):
+    if(inFile[1:2] == ":"):
+         proc = subprocess.Popen(("cygpath", "-m", inFile), stdout=subprocess.PIPE)
+         out,err = proc.communicate()
+    else:
        proc = subprocess.Popen(("cygpath", "-m", os.path.abspath(inFile)), stdout=subprocess.PIPE)
        out,err = proc.communicate()
    return out.rstrip()

-def copyLogs(inFile):
+def copyLogs(inFile, ignore, list):
+  name = imageName(inFile)
+  if ignore:
+   name +="-i"
+  if list:
+    name+="-l"
  logDir = os.path.join("..","build","test","qa-functional","work","userdir0","var","log")
-  shutil.copytree(logDir,os.path.join(outDir,imageName(inFile),"logs"))
+  shutil.copytree(logDir,os.path.join(outDir,name,"logs"))

-def testFile(image, rebuild):
+def testFile(image, rebuild, ignore, list):
  if imageType(image) != ImgType.UNKNOWN:
-    testAddImageIngest(image)
-    #print imageName(image)
-    copyLogs(image)
+    testAddImageIngest(image, ignore, list)
+    copyLogs(image, ignore, list)
    if rebuild:
-      copyTestToGold(image)
+      clearGoldDir(image, ignore, list)
+      copyTestToGold(image, ignore, list)
+      copyReportToGold(image, ignore, list)
    else:
-      testCompareToGold(image)
+      testCompareToGold(image, ignore, list)
+      testCompareReports(image, ignore, list)
      
-def usage() :
+def usage():
  usage = "\
-  Usage: ./regression.py [-i FILE] [OPTIONS] \n\n\
+  Usage: ./regression.py [-s FILE] [OPTIONS] \n\n\
  Run the RegressionTest.java file, and compare the result with a gold standard \n\n\
  When the -i flag is set, this script only tests the image given by FILE.\n\
  By default, it tests every image in ./input/\n\n\
  An indexed NSRL database is expected at ./input/nsrl.txt-md5.idx,\n\
  and an indexed notable hash database at ./input/notablehashes.txt-md5.idx\n\
  In addition, any keywords to search for must be in ./input/notablekeywords.xml\n\n\
+  When the -l flag is set, the script looks for a config.xml file of the given name\n\
+  where images are stored. For usage notes please see the example config.xml in\n\
+  the /script folder.\
    Options:\n\n\
-    -r, --rebuild\t\tRebuild the gold standards from the test results for each image"
+    -r, --rebuild\t\tRebuild the gold standards from the test results for each image\n\n\
+    -u, --ignore\t\tIgnore unallocated space while ingesting"
  return usage

 def main():
  rebuild = False
  single = False
+  ignore = False
+  list = False
  test = True
  argi = 1
+  Config = None   #file pointed to by --list
+  imgListB = []   #list of legal images from config
+  cwd = wgetcwd()
  while argi < len(sys.argv):
      arg = sys.argv[argi]
-      if arg == "-i" and argi+1 < len(sys.argv):
+      if arg == "-s" and argi+1 < len(sys.argv): #check for single
          single = True
          argi+=1
          image = sys.argv[argi]
          print "Running on single image: " + image
-      elif (arg  == "--rebuild") or (arg == "-r"):
+      if arg == "-l" or arg == "--list":    #check for config file
+            list = True
+            argi+=1
+            #check for file in ./
+            if(os.path.isfile(os.path.join("./", sys.argv[argi]))):
+                 Config = parse(os.path.join("./", sys.argv[argi]))
+            #else check if it is a specified path
+            elif (os.path.exists(wabspath(sys.argv[argi]))):
+                Config = parse(sys.argv[argi])
+            else:
+                print sys.argv[argi]
+                print wabspath(sys.argv[argi])
+                markError("Ran with " + arg +" but no such file exists", arg)
+      elif (arg  == "--rebuild") or (arg == "-r"):  #check for rebuild flag
          rebuild = True
          print "Running in REBUILD mode"
+      elif (arg == "--ignore") or (arg == "-i"):    #check for ignore flag
+          ignore = True
+          print "Ignoring unallocated space"
      else:
          test = False
          print usage()
      argi+=1
  if single:
-    testFile(image, rebuild)
+    testFile(image, rebuild, ignore)
+  if list:
+    listImages = []
+    errors = 0
+    global inDir    
+    out = Config.getElementsByTagName("indir")[0].getAttribute("value").encode() #there should only be one indir element in the config
+    inDir = out
+    for element in Config.getElementsByTagName("image"):
+        elem = element.getAttribute("value").encode()
+        proc2 = subprocess.Popen(("cygpath", "-i", elem), stdout=subprocess.PIPE)
+        out2,err = proc2.communicate()
+        out2 = out2.rstrip()
+        if(os.path.exists(out2) and os.path.isfile(out2)):
+            listImages.append(elem)
+        else:
+            print out2 + " is not a valid path or is not an image"
+            errors+=1
+    print "Illegal files specified: " + str(errors)
+    for image in listImages:
+        testFile(image, rebuild, ignore, list)
  elif test:
    for inFile in os.listdir(inDir):
-      testFile(os.path.join(inDir,inFile), rebuild)
+      testFile(os.path.join(inDir,inFile), rebuild, ignore, list)

  if hadErrors == True:
    print "**********************************************"
--- a/Testing/test/qa-functional/src/org/sleuthkit/autopsy/testing/RegressionTest.java
+++ b/Testing/test/qa-functional/src/org/sleuthkit/autopsy/testing/RegressionTest.java
@ -25,6 +25,7 @@ import javax.swing.JDialog;
 import javax.swing.JTextField;
 import junit.framework.Test;
 import org.netbeans.jellytools.JellyTestCase;
+import org.netbeans.jellytools.MainWindowOperator;
 import org.netbeans.jellytools.NbDialogOperator;
 import org.netbeans.jellytools.WizardOperator;
 import org.netbeans.jemmy.Timeout;
@ -44,6 +45,7 @@ import org.sleuthkit.autopsy.ingest.IngestServiceAbstract;
 * nsrl_path: Path to the nsrl database
 * known_bad_path: Path to a database of known bad hashes
 * keyword_path: Path to a keyword list xml file
+ * ignore_unalloc: Boolean whether to ignore unallocated space or not
 * 
 * Without these properties set, the test will fail to run correctly.
 * To run this test correctly, you should use the script 'regression.py'
@ -61,7 +63,6 @@ public class RegressionTest extends JellyTestCase{
    
    /** Creates suite from particular test cases. */
    public static Test suite() {
-
        // run tests with specific configuration
        NbModuleSuite.Configuration conf = NbModuleSuite.createConfiguration(RegressionTest.class).
                clusters(".*").
@ -73,13 +74,20 @@ public class RegressionTest extends JellyTestCase{
                "testConfigureHash",
                "testConfigureIngest2",
                "testConfigureSearch",
-                "testIngest");
+                "testConfigureIngest2a",
+                "testIngest",
+                "testGenerateReportToolbar",
+                "testGenerateReportButton"       
+                );
        return  NbModuleSuite.create(conf);
+
+               
    }

    /** Method called before each test case. */
    @Override
    public void setUp() {
+        
        logger.info("########  " + System.getProperty("img_path") + "  #######");
    }

@ -189,6 +197,8 @@ public class RegressionTest extends JellyTestCase{
        JButtonOperator jbo2 = new JButtonOperator(jdo, "OK", 0);
        jbo2.pushNoBlock();
        WizardOperator wo = new WizardOperator("Add Image");
+        JCheckBoxOperator jbco0 = new JCheckBoxOperator(wo, "Process Unallocated Space");
+        jbco0.setSelected(Boolean.parseBoolean(System.getProperty("ignore_unalloc"))); //ignore unallocated space or not. Set with Regression.py -u
        wo.btNext().clickMouse();
        wo.btFinish().clickMouse();
    }
@ -202,6 +212,7 @@ public class RegressionTest extends JellyTestCase{
        }
        logger.info("Enqueue took " + (System.currentTimeMillis()-start) + "ms");
        while(man.isIngestRunning()) {
+            
            new Timeout("pausing", 1000).sleep(); // give it a second (or five) to process
        }
        new Timeout("pausing", 15000).sleep(); // give it a second (or fifteen) to process
@ -216,5 +227,40 @@ public class RegressionTest extends JellyTestCase{
        }
        logger.info("Ingest (including enqueue) took " + (System.currentTimeMillis()-start) + "ms");
        new Timeout("pausing", 5000).sleep(); // allow keyword search to finish saving artifacts, just in case
+        
    }
+    
+    public void testGenerateReportToolbar() {
+
+        logger.info("Generate Report Toolbars");
+        // Force the action if necessary:
+        //new Action("Tools|Generate Report", null).perform();
+        //new Timeout("pausing", 1000).sleep();
+        MainWindowOperator mwo = MainWindowOperator.getDefault();
+        JButtonOperator jbo = new JButtonOperator(mwo, "Generate Report");
+        jbo.pushNoBlock();
+        new Timeout("pausing", 1000).sleep();
+    }
+    
+    public void testGenerateReportButton() {
+        logger.info("Generate Report Button");
+        JDialog reportDialog = JDialogOperator.waitJDialog("Generate Report", false, false);
+        JDialogOperator reportDialogOperator = new JDialogOperator(reportDialog);
+        JCheckBoxOperator jcbo0 = new JCheckBoxOperator(reportDialogOperator, "Excel");
+        jcbo0.doClick();
+        JCheckBoxOperator jcbo1 = new JCheckBoxOperator(reportDialogOperator, "Default XML");
+        jcbo1.doClick();
+        JCheckBoxOperator jcbo2 = new JCheckBoxOperator(reportDialogOperator, "Body File");
+        jcbo2.doClick();
+        JButtonOperator jbo0 = new JButtonOperator(reportDialogOperator, "Generate Report");
+        jbo0.pushNoBlock();
+        new Timeout("pausing", 3000).sleep(); // Give it a few seconds to generate
+        
+        JDialog previewDialog = JDialogOperator.waitJDialog("Report Preview", false, false);
+        JDialogOperator previewDialogOperator = new JDialogOperator(previewDialog);
+        JButtonOperator jbo1 = new JButtonOperator(previewDialogOperator, "Close");
+        jbo1.pushNoBlock();
+        new Timeout("pausing", 3000).sleep(); // Give the program a second to idle to be safe
+    }
+   
 }
--- a/docs/doxygen/regressionTesting.dox
+++ b/docs/doxygen/regressionTesting.dox
@ -20,13 +20,168 @@ and runs a test for every disk image it finds.
 regression.py also does regression test result validation by comparing the test result with the golden standards
 for the image.

+It is assumed that the steps detailed in the Building Autopsy from Source document have been completed, and that appropriate versions of the JDK, LIBEWF etc, are present on the system. Building Autopsy from Source can be accessed at:
+https://github.com/sleuthkit/autopsy/blob/master/BUILDING.txt
+

 \section regression_test_setup Setting up regression testing

+1)  Install Cygwin
+
+	1a) Download and run the Cygwin setup, available at
+
+	http://www.cygwin.com/setup.exe
+
+
+	1b) From the list of packages to install, select both Database and Python. 
+
+
+2) Setting up Regression.py
+
+	2a) Place all images that you wish to regression test under 
+
+		autopsy/Testing/script/input
+
+
+	2b)  Ensure that the following files are also under  /input
+
+		notablehashes.txt-md5.idx
+		nsrl.txt-md5.idx
+		notablekeywords.xml
+
+
+	2c) place the gold standard database files ("standards.db") for each image under
+
+		autopsy/Testing/script/gold/{name of image}
+
+
+
 \section regression_test_running Running regression testing

-\section regression_test_update Updating golden standards
+3) Run regression tests
+
+	3a) From the Cygwin terminal, navigate to the /script folder and run "./regression.py". The script will automatically begin Autopsy and run ingestion and analysis on all the images from the /input directory, and will close when finished. The Cygwin terminal will print out whether or not errors were encountered at the end of each image's test.
+
+
+
+\section regression_test_update Options
+		
+		Currently, the script supports three argument parameters:
+
+			-i {imgname}	: runs the test with a single image as specified by {imgname}. Must include the path, it does not read from input.
+			-r or --rebuild	: runs in REBUILD mode, see 4 below
+			-u or --ignore	: runs ignoring unallocated space. Useful for determining framework is operational. Appends "-u" to the output and gold folders for distinguishing them from others. Will automatically compare the right reports.
+
+		These can be run in any combination.
+
+4) OPTIONAL: Update the standards databases
+
+	4a) From the Cygwin terminal, navigate to 
+
+		autopsy/Testing/script
+
+	4b)  run "./regression.py -r", The script will automatically delete pre-existing standards.db files and generate the updated ones in the proper locations (/script/gold/{name of image}).
+
+Running in -r will also generate a golden report file built from the image. Normal runs of regression.py compare their generated report against the golden one, and report any differences in the file, ignoring the timestamp. 
+
+
+
+\section developers_note_regression_test Developers Notes: Jemmy and RegressionTest.java
+
+
+For additional details regarding setting up and using Jemmy, please see 
+
+	http://platform.netbeans.org/tutorials/nbm-test.html
+	http://wiki.netbeans.org/Writing_JellyTools_Tests_Guide
+	
+
+
+The Jemmy UI framework includes elements such as buttons, frames, dialog boxes and wizards. In order to manipulate these elements programatically, the associated ContainerOperators must be used. RegressionTest.java makes use of the following major operators:
+	
+	JButtonOperator
+	JDialogOperator
+	nbDialogOperator
+	JTableOperator
+	JFileChooserOperator
+	WizardOperator
+
+
+WizardOperators are for elements that implement the Wizard interface. Wizards specifically have back and next buttons. A WizardOperator can be created by 
+
+	WizardOperator wo = new WizardOperator(String title);
+
+Where title is the display title of the wizard you wish to manipulate.
+
+In order to use any Jemmy UI element, it must first be found. There are a number of ways to do this, but the most common involves searching by the display name of the element in question. Finding elements is a function of that elements ContainerOperator.
+For example, to find a JDialog whose display name is the string "Hash Database Configuration", the following code might be used:
+
+	JDialog hashMainDialog =  JDialogOperator.waitJDialog("Hash Database Configuration", false, false);
+
+The two booleans are for searching the exact string including subsrtings, and for searching case sensitively. 
+
+Note that the method used is called '.waitJDialog', and not '.findJDialog'. This is an important distinction regarding thoroughness of the find, but the functionality of the same. Refer to the link on Jemmy above for greater detail.
+
+Once you an element has been located, it can be operated upon by creating a new ContainerOperator, with the element as the only argument:
+
+	JDialogOperator hashMainDialogOperator = new JDialogOperator(hashMainDialog);
+
+
+Selecting the main window:
+	
+In order to select the main window, in this case, the general Autospy frame, the MainWindowOperator must be used. A MainWindowOperator takes no arguments and is created as follows:
+
+	MainWindowOperator mwo = MainWindowOperator.getDefault();
+
+For further reference regarding ContainerOperators, please see
+
+	http://www.jarvana.com/jarvana/view/org/netbeans/jemmy/2.2.7.5/jemmy-2.2.7.5-javadoc.jar!/org/netbeans/jemmy/operators/ContainerOperator.html
+
+
+When an element has been selected, the individual components may be manipluated with ContainerOperators. 
+To select a button, use the code below, where cont is one of the ContainerOperators from above, text is the text displayed on the button, and index is the button's order if there are multiple with the same name (i.e. if there are three buttons labeled “preview”, the first's index is 0, then 1, then 2).
+
+JbuttonOperator jbo = new JbuttonOperator(ContainerOperator cont, String text, int index);
+
+There are many others elements and operators, such as JcheckBoxOperator, JfileChooserOperator, JtextFieldOperator, etc. See http://www.jarvana.com/jarvana/view/org/netbeans/jemmy/2.2.7.5/jemmy-2.2.7.5-javadoc.jar!/org/netbeans/jemmy/operators/JComponentOperator.html for more. Please see their individual JavaDocs for action commands that push buttons, write in forms, etc.
+
+If an element cannot be grabbed using a ContainerOperator, a temporary workaround is to invoke the element action:
+
+	new Action(String menuPath, String popupPath).perform();
+
+where menuPath is the path through the File menu to said action and popup is the path through the popup menu (which is null since it is unsupported).
+
+For more on Actions, see 
+
+	http://bits.netbeans.org/dev/javadoc/org-netbeans-modules-jellytools-platform/org/netbeans/jellytools/actions/Action.html
+
+
+
+


 */

+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- a/thunderbirdparser/nbproject/project.properties
+++ b/thunderbirdparser/nbproject/project.properties
@ -1,5 +1,3 @@
 file.reference.commons-lang-2.4.jar=release/modules/ext/commons-lang-2.4.jar
-file.reference.tika-core-1.1.jar=release/modules/ext/tika-core-1.1.jar
-file.reference.tika-parsers-1.1.jar=release/modules/ext/tika-parsers-1.1.jar
 javac.source=1.6
 javac.compilerargs=-Xlint -Xlint:-serial
--- a/thunderbirdparser/nbproject/project.xml
+++ b/thunderbirdparser/nbproject/project.xml
@ -53,17 +53,13 @@
            </module-dependencies>
            <public-packages/>
            <class-path-extension>
-                <runtime-relative-path>ext/tika-core-1.1.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/tika-core-1.1.jar</binary-origin>
+                <runtime-relative-path>ext/tika-core-0.10.jar</runtime-relative-path>
+                <binary-origin>release/modules/ext/tika-core-0.10.jar</binary-origin>
            </class-path-extension>
            <class-path-extension>
                <runtime-relative-path>ext/commons-lang-2.4.jar</runtime-relative-path>
                <binary-origin>release/modules/ext/commons-lang-2.4.jar</binary-origin>
            </class-path-extension>
-            <class-path-extension>
-                <runtime-relative-path>ext/tika-parsers-1.1.jar</runtime-relative-path>
-                <binary-origin>release/modules/ext/tika-parsers-1.1.jar</binary-origin>
-            </class-path-extension>
        </data>
    </configuration>
 </project>
--- a/thunderbirdparser/release/modules/ext/tika-core-0.10.jar
+++ b/thunderbirdparser/release/modules/ext/tika-core-0.10.jar
--- a/thunderbirdparser/release/modules/ext/tika-core-1.1.jar
+++ b/thunderbirdparser/release/modules/ext/tika-core-1.1.jar
--- a/thunderbirdparser/release/modules/ext/tika-parsers-1.1.jar
+++ b/thunderbirdparser/release/modules/ext/tika-parsers-1.1.jar
--- a/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java
+++ b/thunderbirdparser/src/org/sleuthkit/autopsy/thunderbirdparser/ThunderbirdMetadata.java
@ -17,7 +17,7 @@ import org.apache.tika.metadata.*;
 * @author arivera
 */
 public class ThunderbirdMetadata implements CreativeCommons, DublinCore, Geographic, HttpHeaders,
-        IPTC, Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
+        Message, MSOffice, ClimateForcast, TIFF, TikaMetadataKeys, TikaMimeKeys,
        Serializable {
    
    private int strArrCount = 0;