From 1ca219994565d52d4cca6932f06000e381265a29 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Tue, 15 Jan 2019 17:44:00 -0500 Subject: [PATCH 01/27] Created interface for the ingest job running service --- .../IngestJobRunningServiceInterface.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java diff --git a/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java b/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java new file mode 100755 index 0000000000..74b5cf75d0 --- /dev/null +++ b/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java @@ -0,0 +1,35 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2013-2019 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.appservices; + +/** + * Interface to run an ingest job in the background. // ELTODO + */ +public interface IngestJobRunningServiceInterface { + + /** + * Starts the service + */ + public void start(); + + /** + * Stops the service + */ + public void stop(); +} From 25e1f944fe12a5cae700fa76021947c2843edfa7 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Tue, 15 Jan 2019 18:42:04 -0500 Subject: [PATCH 02/27] Modified Autopsy startup window to run NBM in IDE --- .../autopsy/casemodule/StartupWindow.java | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java b/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java index 8b94b0d441..1c64983c10 100644 --- a/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java +++ b/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java @@ -21,10 +21,14 @@ package org.sleuthkit.autopsy.casemodule; import java.awt.Dimension; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; +import java.util.Collection; +import java.util.Iterator; import javax.swing.JDialog; +import org.openide.util.Lookup; import org.openide.util.NbBundle; import org.openide.util.lookup.ServiceProvider; import org.openide.windows.WindowManager; +import org.sleuthkit.autopsy.appservices.IngestJobRunningServiceInterface; /** * The default implementation of the Autopsy startup window. @@ -43,6 +47,19 @@ public final class StartupWindow extends JDialog implements StartupWindowInterfa } private void init() { + + // first check whether Autopsy is being run from command line + Collection jobRunningServices = Lookup.getDefault().lookupAll(IngestJobRunningServiceInterface.class); + Iterator it = jobRunningServices.iterator(); + if (it.hasNext()) { + // Autopsy is running from command line + IngestJobRunningServiceInterface processor = it.next(); + //LOGGER.log(Level.INFO, "Autopsy is running from command line. No startup window"); //NON-NLS + // ELTODO display some message or panel here + processor.start(); + return; + } + setSize(DIMENSIONS); welcomeWindow = new CueBannerPanel(); welcomeWindow.setCloseButtonActionListener(new ActionListener() { @@ -58,7 +75,9 @@ public final class StartupWindow extends JDialog implements StartupWindowInterfa @Override public void open() { - welcomeWindow.refresh(); + if (welcomeWindow != null) { + welcomeWindow.refresh(); + } setLocationRelativeTo(WindowManager.getDefault().getMainWindow()); setVisible(true); } From 20ddc1be249e84459605f493f9f78c10a2abb6c8 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Thu, 24 Jan 2019 11:21:02 -0500 Subject: [PATCH 03/27] Add a check for the first two bytes of Tika output if audio/mpeg --- .../modules/filetypeid/FileTypeDetector.java | 73 +++++++++++++++---- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java index 8c225381df..dace794e85 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java +++ b/Core/src/org/sleuthkit/autopsy/modules/filetypeid/FileTypeDetector.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; +import java.util.logging.Level; import java.util.stream.Collectors; import org.apache.tika.Tika; import org.apache.tika.io.TikaInputStream; @@ -57,7 +58,7 @@ public class FileTypeDetector { * @return A list of all detectable file types. * * @throws FileTypeDetectorInitException If an error occurs while assembling - * the list of types + * the list of types */ public static synchronized SortedSet getDetectedTypes() throws FileTypeDetectorInitException { TreeSet detectedTypes = new TreeSet<>((String string1, String string2) -> { @@ -108,9 +109,7 @@ public class FileTypeDetector { * Tika, and Autopsy file type definitions take precendence over Tika. * * @throws FileTypeDetectorInitException If an initialization error occurs, - * e.g., user-defined file type - * definitions exist but cannot be - * loaded. + * e.g., user-defined file type definitions exist but cannot be loaded. */ public FileTypeDetector() throws FileTypeDetectorInitException { try { @@ -140,7 +139,7 @@ public class FileTypeDetector { * user-defined MIME type by this detector. * * @param customTypes - * @param mimeType The MIME type name (e.g., "text/html"). + * @param mimeType The MIME type name (e.g., "text/html"). * * @return True or false. */ @@ -171,9 +170,9 @@ public class FileTypeDetector { * @param file The file to test. * * @return A MIME type name. If file type could not be detected, or results - * were uncertain, octet-stream is returned. - * - + * were uncertain, octet-stream is returned. + * + * */ public String getMIMEType(AbstractFile file) { /* @@ -235,6 +234,22 @@ public class FileTypeDetector { */ mimeType = removeOptionalParameter(mimeType); + /** + * We cannot trust Tika's audio/mpeg mimetype. Lets verify the + * first two bytes and confirm it is not 0xffff. Details in + * JIRA-4659 + */ + if (mimeType.contains("audio/mpeg")) { + try { + byte[] header = getNBytes(file, 0, 2); + if (byteIs0xFF(header[0]) && byteIs0xFF(header[1])) { + mimeType = MimeTypes.OCTET_STREAM; + } + } catch (TskCoreException ex) { + //Oh well, the mimetype is what it is. + logger.log(Level.WARNING, String.format("Could not verify audio/mpeg mimetype for file %s with id=%d", file.getName(), file.getId()), ex); + } + } } catch (Exception ignored) { /* * This exception is swallowed and not logged rather than @@ -255,6 +270,33 @@ public class FileTypeDetector { return mimeType; } + /** + * Determine if the byte is 255 (0xFF) by examining the last 4 bits and the + * first 4 bits. + * + * @param x byte + * @return Flag indicating the byte if 0xFF + */ + private boolean byteIs0xFF(byte x) { + return (x & 0x0F) == 0x0F && (x & 0xF0) == 0xF0; + } + + /** + * Retrieves the first N bytes from a file. + * + * @param file Abstract file to read + * @param offset Offset to begin reading + * @param n Number of bytes to read + * @return Byte array of size n + * + * @throws TskCoreException + */ + private byte[] getNBytes(AbstractFile file, int offset, int n) throws TskCoreException { + byte[] headerCache = new byte[n]; + file.read(headerCache, offset, n); + return headerCache; + } + /** * Removes the optional parameter from a MIME type string * @@ -280,7 +322,7 @@ public class FileTypeDetector { */ private String detectUserDefinedType(AbstractFile file) { String retValue = null; - + for (FileType fileType : userDefinedFileTypes) { if (fileType.matches(file)) { retValue = fileType.getMimeType(); @@ -291,7 +333,8 @@ public class FileTypeDetector { } /** - * Determines whether or not a file matches a custom file type defined by Autopsy. + * Determines whether or not a file matches a custom file type defined by + * Autopsy. * * @param file The file to test. * @@ -328,7 +371,7 @@ public class FileTypeDetector { * Constructs an exception to throw if an initialization error occurs, * e.g., user-defined file type definitions exist but cannot be loaded. * - * @param message The exception message, + * @param message The exception message, * @param throwable The underlying cause of the exception. */ FileTypeDetectorInitException(String message, Throwable throwable) { @@ -366,7 +409,7 @@ public class FileTypeDetector { * @return A MIME type name. * * @throws TskCoreException if detection is required and there is a problem - * writing the result to the case database. + * writing the result to the case database. * @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType * and AbstractFile.save to save the result to the file object and the * database. @@ -386,10 +429,10 @@ public class FileTypeDetector { * @param file The file. * * @return A MIME type name. If file type could not be detected or results - * were uncertain, octet-stream is returned. + * were uncertain, octet-stream is returned. * * @throws TskCoreException if detection is required and there is a problem - * writing the result to the case database. + * writing the result to the case database. * * @deprecated Use getMIMEType instead, and call AbstractFile.setMIMEType * and AbstractFile.save to save the result to the file object and the @@ -410,7 +453,7 @@ public class FileTypeDetector { * @param file The file to test. * * @return A MIME type name. If file type could not be detected or results - * were uncertain, octet-stream is returned. + * were uncertain, octet-stream is returned. * * @throws TskCoreException * @deprecated Use getMIMEType instead. From 6b6044240cc59873d26e9a0a61c193b73574d10e Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Thu, 24 Jan 2019 15:54:35 -0500 Subject: [PATCH 04/27] Improve domain checking capability in NetworkUtils --- Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java b/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java index 78547f8370..42266745f2 100644 --- a/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java +++ b/Core/src/org/sleuthkit/autopsy/coreutils/NetworkUtils.java @@ -92,6 +92,12 @@ public class NetworkUtils { if (base.matches(".*[~`!@#$%^&\\*\\(\\)\\+={}\\[\\];:\\?<>,/ ].*")) { return ""; } + + //verify that the base domain actually has a '.', details JIRA-4609 + if(!base.contains(".")) { + return ""; + } + return base; } From 672f0a05f0364bd7b3e05a39c14461b2807f02ab Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 25 Jan 2019 14:01:52 -0500 Subject: [PATCH 05/27] Supporting a startup panel --- .../appservices/IngestJobRunningServiceInterface.java | 11 ++++++++++- .../sleuthkit/autopsy/casemodule/StartupWindow.java | 5 +++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java b/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java index 74b5cf75d0..835fed72c1 100755 --- a/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java +++ b/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java @@ -18,6 +18,8 @@ */ package org.sleuthkit.autopsy.appservices; +import javax.swing.JPanel; + /** * Interface to run an ingest job in the background. // ELTODO */ @@ -31,5 +33,12 @@ public interface IngestJobRunningServiceInterface { /** * Stops the service */ - public void stop(); + public void stop(); + + /** + * Returns a panel to be displayed while using this service + * + * @return panel to be displayed while using this service + */ + public JPanel getStartupWindow(); } diff --git a/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java b/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java index 1c64983c10..579400e279 100644 --- a/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java +++ b/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java @@ -54,8 +54,9 @@ public final class StartupWindow extends JDialog implements StartupWindowInterfa if (it.hasNext()) { // Autopsy is running from command line IngestJobRunningServiceInterface processor = it.next(); - //LOGGER.log(Level.INFO, "Autopsy is running from command line. No startup window"); //NON-NLS - // ELTODO display some message or panel here + add(processor.getStartupWindow()); + pack(); + setResizable(false); processor.start(); return; } From e272d30c04f845d456456daff75469d1ff2c5556 Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Fri, 25 Jan 2019 16:07:53 -0500 Subject: [PATCH 06/27] Moved interface out of Autopsy --- .../IngestJobRunningServiceInterface.java | 44 ------------------- .../autopsy/casemodule/StartupWindow.java | 19 +------- 2 files changed, 1 insertion(+), 62 deletions(-) delete mode 100755 Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java diff --git a/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java b/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java deleted file mode 100755 index 835fed72c1..0000000000 --- a/Core/src/org/sleuthkit/autopsy/appservices/IngestJobRunningServiceInterface.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Autopsy Forensic Browser - * - * Copyright 2013-2019 Basis Technology Corp. - * Contact: carrier sleuthkit org - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.sleuthkit.autopsy.appservices; - -import javax.swing.JPanel; - -/** - * Interface to run an ingest job in the background. // ELTODO - */ -public interface IngestJobRunningServiceInterface { - - /** - * Starts the service - */ - public void start(); - - /** - * Stops the service - */ - public void stop(); - - /** - * Returns a panel to be displayed while using this service - * - * @return panel to be displayed while using this service - */ - public JPanel getStartupWindow(); -} diff --git a/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java b/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java index 579400e279..d15a8aa915 100644 --- a/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java +++ b/Core/src/org/sleuthkit/autopsy/casemodule/StartupWindow.java @@ -21,14 +21,10 @@ package org.sleuthkit.autopsy.casemodule; import java.awt.Dimension; import java.awt.event.ActionEvent; import java.awt.event.ActionListener; -import java.util.Collection; -import java.util.Iterator; import javax.swing.JDialog; -import org.openide.util.Lookup; import org.openide.util.NbBundle; import org.openide.util.lookup.ServiceProvider; import org.openide.windows.WindowManager; -import org.sleuthkit.autopsy.appservices.IngestJobRunningServiceInterface; /** * The default implementation of the Autopsy startup window. @@ -47,20 +43,7 @@ public final class StartupWindow extends JDialog implements StartupWindowInterfa } private void init() { - - // first check whether Autopsy is being run from command line - Collection jobRunningServices = Lookup.getDefault().lookupAll(IngestJobRunningServiceInterface.class); - Iterator it = jobRunningServices.iterator(); - if (it.hasNext()) { - // Autopsy is running from command line - IngestJobRunningServiceInterface processor = it.next(); - add(processor.getStartupWindow()); - pack(); - setResizable(false); - processor.start(); - return; - } - + setSize(DIMENSIONS); welcomeWindow = new CueBannerPanel(); welcomeWindow.setCloseButtonActionListener(new ActionListener() { From 60901d9b27e9b8a3e8db2e1341cce582994722fe Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Tue, 29 Jan 2019 15:09:28 -0500 Subject: [PATCH 07/27] Moved the interface to Experimental --- .../IngestJobRunningServiceInterface.java | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100755 Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningServiceInterface.java diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningServiceInterface.java b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningServiceInterface.java new file mode 100755 index 0000000000..74f47002a3 --- /dev/null +++ b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningServiceInterface.java @@ -0,0 +1,44 @@ +/* + * Autopsy Forensic Browser + * + * Copyright 2011-2018 Basis Technology Corp. + * Contact: carrier sleuthkit org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sleuthkit.autopsy.experimental.configuration; + +import javax.swing.JPanel; + +/** + * Interface to run an ingest job in the background. + */ +public interface IngestJobRunningServiceInterface { + + /** + * Starts the service + */ + public void start(); + + /** + * Stops the service + */ + public void stop(); + + /** + * Returns a panel to be displayed while using this service + * + * @return panel to be displayed while using this service + */ + public JPanel getStartupWindow(); +} From 964d1aab2ee083fb5974d9019226767bcf17a47f Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Tue, 29 Jan 2019 15:41:23 -0500 Subject: [PATCH 08/27] ensure path variable in diff script is always defined before use --- test/script/tskdbdiff.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 1dbfe2a180..ca44ac9c09 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -654,6 +654,7 @@ def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports # make a copy of files_table and update it with new data from artifacts_table and reports_table mapping = files_table.copy() for k, v in objects_table.items(): + path = "" if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id if k in reports_table.keys(): # For a report we use the report path par_obj_id = v[0] From 566267081444367373c67d1d5e9c3cb349918ffe Mon Sep 17 00:00:00 2001 From: Ann Priestman Date: Wed, 30 Jan 2019 12:20:12 -0500 Subject: [PATCH 09/27] Avoid NPE if an image has no paths --- .../autopsy/contentviewers/Metadata.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/contentviewers/Metadata.java b/Core/src/org/sleuthkit/autopsy/contentviewers/Metadata.java index 6bd42367e2..4adae57b8d 100644 --- a/Core/src/org/sleuthkit/autopsy/contentviewers/Metadata.java +++ b/Core/src/org/sleuthkit/autopsy/contentviewers/Metadata.java @@ -138,7 +138,8 @@ public class Metadata extends javax.swing.JPanel implements DataContentViewer { "Metadata.tableRowTitle.timezone=Time Zone", "Metadata.tableRowTitle.deviceId=Device ID", "Metadata.tableRowTitle.acquisitionDetails=Acquisition Details", - "Metadata.nodeText.unknown=Unknown"}) + "Metadata.nodeText.unknown=Unknown", + "Metadata.nodeText.none=None"}) @Override public void setNode(Node node) { AbstractFile file = node.getLookup().lookup(AbstractFile.class); @@ -251,15 +252,20 @@ public class Metadata extends javax.swing.JPanel implements DataContentViewer { // Add all the data source paths to the "Local Path" value cell. String[] imagePaths = image.getPaths(); - StringBuilder pathValues = new StringBuilder("
"); - pathValues.append(imagePaths[0]); - pathValues.append("
"); - for (int i=1; i < imagePaths.length; i++) { - pathValues.append("
"); - pathValues.append(imagePaths[i]); + if (imagePaths.length > 0) { + StringBuilder pathValues = new StringBuilder("
"); + pathValues.append(imagePaths[0]); pathValues.append("
"); + for (int i=1; i < imagePaths.length; i++) { + pathValues.append("
"); + pathValues.append(imagePaths[i]); + pathValues.append("
"); + } + addRow(sb, NbBundle.getMessage(this.getClass(), "Metadata.tableRowTitle.localPath"), pathValues.toString()); + } else { + addRow(sb, NbBundle.getMessage(this.getClass(), "Metadata.tableRowTitle.localPath"), + NbBundle.getMessage(this.getClass(), "Metadata.nodeText.none")); } - addRow(sb, NbBundle.getMessage(this.getClass(), "Metadata.tableRowTitle.localPath"), pathValues.toString()); } setText(sb.toString()); From 2c257c9e2c1f9f067c1abed23dfa54497b4b7126 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Thu, 31 Jan 2019 10:01:40 -0500 Subject: [PATCH 10/27] Fixed typo in pop-up --- .../sleuthkit/autopsy/corecomponents/DataContentViewerHex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java index 82c7df497a..f83b52c0b9 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java @@ -359,7 +359,7 @@ public class DataContentViewerHex extends javax.swing.JPanel implements DataCont }//GEN-LAST:event_goToOffsetTextFieldActionPerformed @NbBundle.Messages({"DataContentViewerHex.launchError=Unable to launch HxD Editor. " - + "Please set-up the HdX install location in Tools -> Options -> External Viewer"}) + + "Please set-up the HxD install location in Tools -> Options -> External Viewer"}) private void launchHxDButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_launchHxDButtonActionPerformed try { File HdXExecutable = new File(UserPreferences.getExternalHexEditorPath()); From dbdd5b333c4eaf3a1bb01ed97c07fd0604ab05e7 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Thu, 31 Jan 2019 12:46:58 -0500 Subject: [PATCH 11/27] Moved file copying off the EDT, added a cancellable progress bar --- .../corecomponents/DataContentViewerHex.java | 87 +++++++++++++------ 1 file changed, 60 insertions(+), 27 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java index f83b52c0b9..d1750a6c52 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java @@ -31,8 +31,10 @@ import org.openide.util.NbBundle; import org.sleuthkit.autopsy.coreutils.Logger; import javax.swing.JMenuItem; import javax.swing.JOptionPane; +import javax.swing.SwingWorker; import javax.swing.text.BadLocationException; import javax.swing.text.Utilities; +import org.netbeans.api.progress.ProgressHandle; import org.openide.nodes.Node; import org.openide.util.lookup.ServiceProvider; import org.sleuthkit.autopsy.casemodule.Case; @@ -359,36 +361,67 @@ public class DataContentViewerHex extends javax.swing.JPanel implements DataCont }//GEN-LAST:event_goToOffsetTextFieldActionPerformed @NbBundle.Messages({"DataContentViewerHex.launchError=Unable to launch HxD Editor. " - + "Please set-up the HxD install location in Tools -> Options -> External Viewer"}) + + "Please set-up the HxD install location in Tools -> Options -> External Viewer", + "DataContentViewerHex.copyingFile=Copying file to open in HxD..."}) private void launchHxDButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_launchHxDButtonActionPerformed - try { - File HdXExecutable = new File(UserPreferences.getExternalHexEditorPath()); - if(!HdXExecutable.exists() || !HdXExecutable.canExecute()) { - JOptionPane.showMessageDialog(null, DataContentViewerHex_launchError()); - return; - } - - String tempDirectory = Case.getCurrentCaseThrows().getTempDirectory(); - File dataSourceInTempDirectory = Paths.get(tempDirectory, - FileUtil.escapeFileName(dataSource.getId() + dataSource.getName())).toFile(); - ContentUtils.writeToFile(dataSource, dataSourceInTempDirectory); - - try { - ProcessBuilder launchHdXExecutable = new ProcessBuilder(); - launchHdXExecutable.command(String.format("\"%s\" \"%s\"", - HdXExecutable.getAbsolutePath(), - dataSourceInTempDirectory.getAbsolutePath())); - launchHdXExecutable.start(); - } catch (IOException ex) { - JOptionPane.showMessageDialog(null, DataContentViewerHex_launchError()); - dataSourceInTempDirectory.delete(); - } - } catch (NoCurrentCaseException | IOException ex) { - logger.log(Level.SEVERE, "Unable to copy file into temp directory", ex); - JOptionPane.showMessageDialog(null, DataContentViewerHex_launchError()); - } + new BackgroundFileCopyTask().execute(); }//GEN-LAST:event_launchHxDButtonActionPerformed + private class BackgroundFileCopyTask extends SwingWorker { + private boolean wasCancelled = false; + + @Override + public Void doInBackground() throws InterruptedException { + ProgressHandle progress = ProgressHandle.createHandle(DataContentViewerHex_copyingFile(), () -> { + //Cancel the swing worker (which will interrupt the ContentUtils call) + this.cancel(true); + wasCancelled = true; + return true; + }); + + try { + File HxDExecutable = new File(UserPreferences.getExternalHexEditorPath()); + if(!HxDExecutable.exists() || !HxDExecutable.canExecute()) { + JOptionPane.showMessageDialog(null, DataContentViewerHex_launchError()); + return null; + } + + String tempDirectory = Case.getCurrentCaseThrows().getTempDirectory(); + File tempFile = Paths.get(tempDirectory, + FileUtil.escapeFileName(dataSource.getId() + dataSource.getName())).toFile(); + + progress.start(100); + ContentUtils.writeToFile(dataSource, tempFile, progress, this, true); + + if(wasCancelled) { + tempFile.delete(); + progress.finish(); + return null; + } + + try { + ProcessBuilder launchHxDExecutable = new ProcessBuilder(); + launchHxDExecutable.command(String.format("\"%s\" \"%s\"", + HxDExecutable.getAbsolutePath(), + tempFile.getAbsolutePath())); + launchHxDExecutable.start(); + } catch (IOException ex) { + logger.log(Level.WARNING, "Unsuccessful attempt to launch HxD", ex); + JOptionPane.showMessageDialog(null, DataContentViewerHex_launchError()); + tempFile.delete(); + } + } catch (NoCurrentCaseException | IOException ex) { + logger.log(Level.SEVERE, "Unable to copy file into temp directory", ex); + JOptionPane.showMessageDialog(null, DataContentViewerHex_launchError()); + } + + progress.finish(); + return null; + } + } + + + // Variables declaration - do not modify//GEN-BEGIN:variables private javax.swing.JMenuItem copyMenuItem; private javax.swing.JLabel currentPageLabel; From 9aa7d5eb98119bca377933ea4face1f8322c17fb Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Thu, 31 Jan 2019 12:49:57 -0500 Subject: [PATCH 12/27] Added comments --- .../autopsy/corecomponents/DataContentViewerHex.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java index d1750a6c52..63aa9279c2 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java @@ -367,13 +367,17 @@ public class DataContentViewerHex extends javax.swing.JPanel implements DataCont new BackgroundFileCopyTask().execute(); }//GEN-LAST:event_launchHxDButtonActionPerformed + /** + * Performs the file copying and process launching in a SwingWorker so that the + * UI is not blocked when opening large files. + */ private class BackgroundFileCopyTask extends SwingWorker { private boolean wasCancelled = false; @Override public Void doInBackground() throws InterruptedException { ProgressHandle progress = ProgressHandle.createHandle(DataContentViewerHex_copyingFile(), () -> { - //Cancel the swing worker (which will interrupt the ContentUtils call) + //Cancel the swing worker (which will interrupt the ContentUtils call below) this.cancel(true); wasCancelled = true; return true; From ab7fc19a692ac25ffe87d5630a3ad4bfe5c779e2 Mon Sep 17 00:00:00 2001 From: Ann Priestman Date: Thu, 31 Jan 2019 12:56:43 -0500 Subject: [PATCH 13/27] Remove leading slash and normalize the rest --- .../MSOfficeEmbeddedContentExtractor.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java index e7e959e175..16c2b0e3e8 100644 --- a/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/modules/embeddedfileextractor/MSOfficeEmbeddedContentExtractor.java @@ -516,8 +516,7 @@ class MSOfficeEmbeddedContentExtractor { * @return */ private String getFileRelativePath(String fileName) { - // Used explicit FWD slashes to maintain DB consistency across operating systems. - return "/" + moduleDirRelative + "/" + this.parentFileName + "/" + fileName; //NON-NLS + return Paths.get(moduleDirRelative, this.parentFileName, fileName).toString(); } /** From 4580b8fd515adfe904d0d21b6689bdf61768a65b Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Fri, 1 Feb 2019 10:08:54 -0500 Subject: [PATCH 14/27] Made Tika use the cached mimetype for the file (since this may be overridden by autopsy) --- .../textextractors/TikaTextExtractor.java | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index 0a05a238c4..2ca55b0307 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -50,6 +50,8 @@ import org.apache.tika.parser.ParsingReader; import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.ocr.TesseractOCRConfig; import org.apache.tika.parser.pdf.PDFParserConfig; +import org.apache.tika.detect.Detector; +import org.apache.tika.mime.MediaType; import org.openide.util.NbBundle; import org.openide.modules.InstalledFileLocator; import org.openide.util.Lookup; @@ -125,7 +127,7 @@ final class TikaTextExtractor implements TextExtractor { private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory); private static final String SQLITE_MIMETYPE = "application/x-sqlite3"; - private final AutoDetectParser parser = new AutoDetectParser(); + private final AutoDetectParser parser; private final Content content; private boolean tesseractOCREnabled; @@ -145,6 +147,19 @@ final class TikaTextExtractor implements TextExtractor { public TikaTextExtractor(Content content) { this.content = content; + if(content instanceof AbstractFile) { + //Override the detector in Tika and use the already computed + //mimetype. This also saves on unneccessary file reads. + parser = new AutoDetectParser(new Detector() { + @Override + public MediaType detect(InputStream in, Metadata mtdt) throws IOException { + return MediaType.parse(AbstractFile.class.cast(content).getMIMEType()); + } + + }); + } else { + parser = new AutoDetectParser(); + } } /** From 0d54d68baad344309b8f6d202d01542ab537a2de Mon Sep 17 00:00:00 2001 From: Eugene Livis Date: Tue, 5 Feb 2019 16:05:29 -0500 Subject: [PATCH 15/27] Addressed code review feedback --- ...ServiceInterface.java => IngestJobRunningService.java} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename Experimental/src/org/sleuthkit/autopsy/experimental/configuration/{IngestJobRunningServiceInterface.java => IngestJobRunningService.java} (88%) diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningServiceInterface.java b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningService.java similarity index 88% rename from Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningServiceInterface.java rename to Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningService.java index 74f47002a3..67fe15136a 100755 --- a/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningServiceInterface.java +++ b/Experimental/src/org/sleuthkit/autopsy/experimental/configuration/IngestJobRunningService.java @@ -23,22 +23,22 @@ import javax.swing.JPanel; /** * Interface to run an ingest job in the background. */ -public interface IngestJobRunningServiceInterface { +public interface IngestJobRunningService { /** * Starts the service */ - public void start(); + void start(); /** * Stops the service */ - public void stop(); + void stop(); /** * Returns a panel to be displayed while using this service * * @return panel to be displayed while using this service */ - public JPanel getStartupWindow(); + JPanel getStartupWindow(); } From c0d3067a3efb4ccfa644a435aa178bcab8103e98 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Thu, 7 Feb 2019 15:42:49 -0500 Subject: [PATCH 16/27] 4726 add image names table and lookup for normalizing data source names --- test/script/tskdbdiff.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index e0df3e3bce..8c47acddb5 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -320,7 +320,8 @@ class TskDbDiff(object): id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser) id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser) id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser) - id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table) + id_images_table = build_id_images_names_table(conn.cursor(), isMultiUser) + id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table) if isMultiUser: # Use PostgreSQL os.environ['PGPASSWORD']=pgSettings.password @@ -340,7 +341,7 @@ class TskDbDiff(object): continue else: dump_line += line - dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table) + dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table) db_log.write('%s\n' % dump_line) dump_line = '' postgreSQL_db.close() @@ -354,7 +355,7 @@ class TskDbDiff(object): for line in conn.iterdump(): if 'INSERT INTO "image_gallery_groups_seen"' in line: continue - line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table) + line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table) db_log.write('%s\n' % line) # Now sort the file srtcmdlst = ["sort", dump_file, "-o", dump_file] @@ -406,7 +407,7 @@ class PGSettings(object): return self.password -def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table): +def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table): """ Make testing more consistent and reasonable by doctoring certain db entries. Args: @@ -515,6 +516,8 @@ def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info parent_path = vs_info_table[parent_id] elif parent_id in fs_info_table.keys(): parent_path = fs_info_table[parent_id] + elif parent_id in images_table.keys(): + parent_path = images_table[parent_id] elif parent_id == 'NULL': parent_path = "NULL" @@ -615,11 +618,23 @@ def build_id_objects_table(db_cursor, isPostgreSQL): Args: db_cursor: the database cursor """ - # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary + # for each row in the db, take the object id, device_id, then create a tuple in the dictionary # with the object id as the key and par_obj_id, type as the value mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")]) return mapping +def build_id_images_names_table(db_cursor, isPostgreSQL): + """Build the map of object ids to name. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id and name then create a tuple in the dictionary + # with the object id as the key and name, type as the value + mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")]) + #data_sources which are logical file sets will be found in the files table + return mapping + def build_id_artifact_types_table(db_cursor, isPostgreSQL): """Build the map of object ids to artifact ids. @@ -642,7 +657,7 @@ def build_id_reports_table(db_cursor, isPostgreSQL): return mapping -def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table): +def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table): """Build the map of object ids to artifact ids. Args: @@ -666,6 +681,8 @@ def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports path = mapping[par_obj_id] elif par_obj_id in reports_table.keys(): path = reports_table[par_obj_id] + elif par_obj_id in images_table.keys(): + path = images_table[par_obj_id] mapping[k] = path + "/" + artifacts_table[k] elif v[0] not in mapping.keys(): if v[0] in artifacts_table.keys(): From c591cd47a32e05309d5037a889a22d7c735fb4de Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Thu, 7 Feb 2019 15:56:55 -0500 Subject: [PATCH 17/27] 4726 changed name of function to build table --- test/script/tskdbdiff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 8c47acddb5..0efaf2f603 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -320,7 +320,7 @@ class TskDbDiff(object): id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser) id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser) id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser) - id_images_table = build_id_images_names_table(conn.cursor(), isMultiUser) + id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser) id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table) if isMultiUser: # Use PostgreSQL @@ -623,7 +623,7 @@ def build_id_objects_table(db_cursor, isPostgreSQL): mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")]) return mapping -def build_id_images_names_table(db_cursor, isPostgreSQL): +def build_id_image_names_table(db_cursor, isPostgreSQL): """Build the map of object ids to name. Args: From 8bc0417ba8cda7cd385bda37d3c9259ca9514234 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Thu, 7 Feb 2019 17:33:33 -0500 Subject: [PATCH 18/27] 4726 fix comment mistake --- test/script/tskdbdiff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 0efaf2f603..3e1101a76a 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -618,7 +618,7 @@ def build_id_objects_table(db_cursor, isPostgreSQL): Args: db_cursor: the database cursor """ - # for each row in the db, take the object id, device_id, then create a tuple in the dictionary + # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary # with the object id as the key and par_obj_id, type as the value mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")]) return mapping From e464da53fa88d490d50c787d53f0efc559e5b903 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Fri, 8 Feb 2019 12:57:30 -0500 Subject: [PATCH 19/27] Removed the image_galley_group_seen inserts in the multi-user dumps --- test/script/tskdbdiff.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 54b48a1987..3f44af5786 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -333,13 +333,17 @@ class TskDbDiff(object): for line in postgreSQL_db: line = line.strip('\r\n ') # Deal with pg_dump result file - if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line or 'INSERT INTO "image_gallery_groups_seen"' in line): # It's comment or alter statement or catalog entry or set idle entry or empty line + if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line continue elif not line.endswith(';'): # Statement not finished dump_line += line continue else: dump_line += line + + if 'INSERT INTO image_gallery_groups_seen' in dump_line: + dump_line = '' + continue; dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table) db_log.write('%s\n' % dump_line) dump_line = '' From 9c7f2dd1417df44aa68e235d20b4214af217dfd5 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Mon, 11 Feb 2019 11:08:58 -0500 Subject: [PATCH 20/27] Fixed typo --- .../sleuthkit/autopsy/corecomponents/DataContentViewerHex.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java index 63aa9279c2..601d74293d 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentViewerHex.java @@ -361,7 +361,7 @@ public class DataContentViewerHex extends javax.swing.JPanel implements DataCont }//GEN-LAST:event_goToOffsetTextFieldActionPerformed @NbBundle.Messages({"DataContentViewerHex.launchError=Unable to launch HxD Editor. " - + "Please set-up the HxD install location in Tools -> Options -> External Viewer", + + "Please specify the HxD install location in Tools -> Options -> External Viewer", "DataContentViewerHex.copyingFile=Copying file to open in HxD..."}) private void launchHxDButtonActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_launchHxDButtonActionPerformed new BackgroundFileCopyTask().execute(); From d28a85759920b0edb4c4ee7a5203ad03f0a36da0 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Mon, 11 Feb 2019 13:18:36 -0500 Subject: [PATCH 21/27] Solved the edge case of no mime-type --- .../textextractors/TikaTextExtractor.java | 51 +++++++++++-------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index 2ca55b0307..420c25a1a4 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -127,7 +127,7 @@ final class TikaTextExtractor implements TextExtractor { private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory); private static final String SQLITE_MIMETYPE = "application/x-sqlite3"; - private final AutoDetectParser parser; + private AutoDetectParser parser; private final Content content; private boolean tesseractOCREnabled; @@ -136,7 +136,7 @@ final class TikaTextExtractor implements TextExtractor { private static final File TESSERACT_PATH = locateTesseractExecutable(); private String languagePacks = formatLanguagePacks(PlatformUtil.getOcrLanguagePacks()); private static final String TESSERACT_OUTPUT_FILE_NAME = "tess_output"; //NON-NLS - + private ProcessTerminator processTerminator; private static final List TIKA_SUPPORTED_TYPES @@ -147,25 +147,32 @@ final class TikaTextExtractor implements TextExtractor { public TikaTextExtractor(Content content) { this.content = content; - if(content instanceof AbstractFile) { - //Override the detector in Tika and use the already computed - //mimetype. This also saves on unneccessary file reads. + + if (!(content instanceof AbstractFile)) { + parser = new AutoDetectParser(); + return; + } + + AbstractFile file = (AbstractFile) content; + if (file.getMIMEType() == null) { + parser = new AutoDetectParser(); + } else { parser = new AutoDetectParser(new Detector() { + /** + * Set the Tika logic to use the pre-computed mime type + */ @Override public MediaType detect(InputStream in, Metadata mtdt) throws IOException { - return MediaType.parse(AbstractFile.class.cast(content).getMIMEType()); + return MediaType.parse(file.getMIMEType()); } - - }); - } else { - parser = new AutoDetectParser(); + }); } } /** * If Tesseract has been installed and is set to be used through - * configuration, then ocr is enabled. OCR can only currently be run on - * 64 bit Windows OS. + * configuration, then ocr is enabled. OCR can only currently be run on 64 + * bit Windows OS. * * @return Flag indicating if OCR is set to be used. */ @@ -214,7 +221,7 @@ final class TikaTextExtractor implements TextExtractor { TesseractOCRConfig ocrConfig = new TesseractOCRConfig(); String tesseractFolder = TESSERACT_PATH.getParent(); ocrConfig.setTesseractPath(tesseractFolder); - + ocrConfig.setLanguage(languagePacks); ocrConfig.setTessdataPath(PlatformUtil.getOcrLanguagePacksPath()); parseContext.set(TesseractOCRConfig.class, ocrConfig); @@ -284,7 +291,7 @@ final class TikaTextExtractor implements TextExtractor { File outputFile = null; try { String tempDirectory = Case.getCurrentCaseThrows().getTempDirectory(); - + //Appending file id makes the name unique String tempFileName = FileUtil.escapeFileName(file.getId() + file.getName()); inputFile = Paths.get(tempDirectory, tempFileName).toFile(); @@ -325,7 +332,7 @@ final class TikaTextExtractor implements TextExtractor { } } } - + /** * Wraps the creation of a TikaReader into a Future so that it can be * cancelled. @@ -437,11 +444,11 @@ final class TikaTextExtractor implements TextExtractor { */ @Override public boolean isSupported() { - if(!(content instanceof AbstractFile)) { + if (!(content instanceof AbstractFile)) { return false; } - - String detectedType = ((AbstractFile)content).getMIMEType(); + + String detectedType = ((AbstractFile) content).getMIMEType(); if (detectedType == null || BINARY_MIME_TYPES.contains(detectedType) //any binary unstructured blobs (string extraction will be used) || ARCHIVE_MIME_TYPES.contains(detectedType) @@ -450,7 +457,7 @@ final class TikaTextExtractor implements TextExtractor { ) { return false; } - + return TIKA_SUPPORTED_TYPES.contains(detectedType); } @@ -500,11 +507,11 @@ final class TikaTextExtractor implements TextExtractor { if (context != null) { ImageConfig configInstance = context.lookup(ImageConfig.class); if (configInstance != null) { - if(Objects.nonNull(configInstance.getOCREnabled())) { + if (Objects.nonNull(configInstance.getOCREnabled())) { this.tesseractOCREnabled = configInstance.getOCREnabled(); } - - if(Objects.nonNull(configInstance.getOCRLanguages())) { + + if (Objects.nonNull(configInstance.getOCRLanguages())) { this.languagePacks = formatLanguagePacks(configInstance.getOCRLanguages()); } } From dd17770a05b06dbdec7a1fe9604e231d7e11d2ac Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Mon, 11 Feb 2019 13:35:15 -0500 Subject: [PATCH 22/27] Cleaned up the logic to be less verbose --- .../textextractors/TikaTextExtractor.java | 27 +++++++------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index 420c25a1a4..a75ccab260 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -147,25 +147,16 @@ final class TikaTextExtractor implements TextExtractor { public TikaTextExtractor(Content content) { this.content = content; + + parser = new AutoDetectParser(); - if (!(content instanceof AbstractFile)) { - parser = new AutoDetectParser(); - return; - } - - AbstractFile file = (AbstractFile) content; - if (file.getMIMEType() == null) { - parser = new AutoDetectParser(); - } else { - parser = new AutoDetectParser(new Detector() { - /** - * Set the Tika logic to use the pre-computed mime type - */ - @Override - public MediaType detect(InputStream in, Metadata mtdt) throws IOException { - return MediaType.parse(file.getMIMEType()); - } - }); + if (content instanceof AbstractFile) { + AbstractFile file = (AbstractFile) content; + if(file.getMIMEType() != null) { + //Set the Tika logic to use the pre-computed mime type + parser.setDetector((InputStream inStream, Metadata metaData) -> + MediaType.parse(file.getMIMEType())); + } } } From a445d799a45eb7eee61e70edb4d98685f849b31c Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Mon, 11 Feb 2019 13:36:29 -0500 Subject: [PATCH 23/27] Comments --- .../org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index a75ccab260..4d4fad6a6a 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -153,7 +153,7 @@ final class TikaTextExtractor implements TextExtractor { if (content instanceof AbstractFile) { AbstractFile file = (AbstractFile) content; if(file.getMIMEType() != null) { - //Set the Tika logic to use the pre-computed mime type + //Force Tika to use our pre-computed mime type during detection parser.setDetector((InputStream inStream, Metadata metaData) -> MediaType.parse(file.getMIMEType())); } From ef98f2e2bffa23441788f20e30c6a31dfe756720 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Mon, 11 Feb 2019 13:36:52 -0500 Subject: [PATCH 24/27] Marked variable as final --- .../org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index 4d4fad6a6a..464d1725dc 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -127,7 +127,7 @@ final class TikaTextExtractor implements TextExtractor { private final ExecutorService executorService = Executors.newSingleThreadExecutor(tikaThreadFactory); private static final String SQLITE_MIMETYPE = "application/x-sqlite3"; - private AutoDetectParser parser; + private final AutoDetectParser parser; private final Content content; private boolean tesseractOCREnabled; From 1244110ab8bd837433f7dd02ebbf9e383a9d2fdb Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Mon, 11 Feb 2019 13:37:31 -0500 Subject: [PATCH 25/27] Ran source formatter on code --- .../autopsy/textextractors/TikaTextExtractor.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index 464d1725dc..46ccf44a28 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -147,15 +147,15 @@ final class TikaTextExtractor implements TextExtractor { public TikaTextExtractor(Content content) { this.content = content; - + parser = new AutoDetectParser(); if (content instanceof AbstractFile) { AbstractFile file = (AbstractFile) content; - if(file.getMIMEType() != null) { + if (file.getMIMEType() != null) { //Force Tika to use our pre-computed mime type during detection - parser.setDetector((InputStream inStream, Metadata metaData) -> - MediaType.parse(file.getMIMEType())); + parser.setDetector((InputStream inStream, Metadata metaData) + -> MediaType.parse(file.getMIMEType())); } } } From e4f4491148a5980d3cc67d72f88e89dfb1582093 Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Mon, 11 Feb 2019 16:16:38 -0500 Subject: [PATCH 26/27] Added edge case for empty mime type --- .../org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index 46ccf44a28..e6185512b7 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -152,7 +152,7 @@ final class TikaTextExtractor implements TextExtractor { if (content instanceof AbstractFile) { AbstractFile file = (AbstractFile) content; - if (file.getMIMEType() != null) { + if (file.getMIMEType() != null && !file.getMIMEType().isEmpty()) { //Force Tika to use our pre-computed mime type during detection parser.setDetector((InputStream inStream, Metadata metaData) -> MediaType.parse(file.getMIMEType())); From 3d0d70b036197faeff6cc93f12d6697fc103b3fa Mon Sep 17 00:00:00 2001 From: "U-BASIS\\dsmyda" Date: Tue, 12 Feb 2019 09:29:32 -0500 Subject: [PATCH 27/27] removed unused import --- .../org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java | 1 - 1 file changed, 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java index e6185512b7..9491e75783 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TikaTextExtractor.java @@ -50,7 +50,6 @@ import org.apache.tika.parser.ParsingReader; import org.apache.tika.parser.microsoft.OfficeParserConfig; import org.apache.tika.parser.ocr.TesseractOCRConfig; import org.apache.tika.parser.pdf.PDFParserConfig; -import org.apache.tika.detect.Detector; import org.apache.tika.mime.MediaType; import org.openide.util.NbBundle; import org.openide.modules.InstalledFileLocator;