diff --git a/Experimental/build.xml b/Experimental/build.xml
index a304da0a5d..442a08bd55 100644
--- a/Experimental/build.xml
+++ b/Experimental/build.xml
@@ -19,8 +19,12 @@
+
+
+
+
-
+
diff --git a/Experimental/nbproject/project.properties b/Experimental/nbproject/project.properties
index c5203cbba5..731535afff 100644
--- a/Experimental/nbproject/project.properties
+++ b/Experimental/nbproject/project.properties
@@ -1,5 +1,6 @@
file.reference.c3p0-0.9.5.jar=release/modules/ext/c3p0-0.9.5.jar
file.reference.jackson-core-2.7.0.jar=release/modules/ext/jackson-core-2.7.0.jar
+file.reference.jtidy-r938.jar=release/modules/ext/jtidy-r938.jar
file.reference.LGoodDatePicker-10.3.1.jar=release/modules/ext/LGoodDatePicker-10.3.1.jar
file.reference.mchange-commons-java-0.2.9.jar=release/modules/ext/mchange-commons-java-0.2.9.jar
file.reference.postgresql-9.4-1201-jdbc41.jar=release/modules/ext/postgresql-9.4-1201-jdbc41.jar
diff --git a/Experimental/nbproject/project.xml b/Experimental/nbproject/project.xml
index 953292b3c5..3259799a2a 100644
--- a/Experimental/nbproject/project.xml
+++ b/Experimental/nbproject/project.xml
@@ -170,6 +170,10 @@
org.sleuthkit.autopsy.experimental.autoingest
org.sleuthkit.autopsy.experimental.configuration
+
+ ext/jtidy-r938.jar
+ release/modules/ext/jtidy-r938.jar
+
ext/LGoodDatePicker-10.3.1.jar
release/modules/ext/LGoodDatePicker-10.3.1.jar
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java
index 93e6e5956c..14e9d3e1b7 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java
@@ -51,56 +51,106 @@ public final class AutopsyManifestFileParser implements ManifestFileParser {
@Override
public boolean fileIsManifest(Path filePath) {
boolean fileIsManifest = false;
- try {
- Path fileName = filePath.getFileName();
- if (fileName.toString().toUpperCase().endsWith(MANIFEST_FILE_NAME_SIGNATURE)) {
- Document doc = this.createManifestDOM(filePath);
- Element docElement = doc.getDocumentElement();
- fileIsManifest = docElement.getTagName().equals(ROOT_ELEM_TAG_NAME);
+
+ Path fileName = filePath.getFileName();
+ if (fileName.toString().toUpperCase().endsWith(MANIFEST_FILE_NAME_SIGNATURE)) {
+ try {
+ fileIsManifest = isAutopsyManifestFile(filePath);
+ if (!fileIsManifest) {
+ // if false was returned from above the mainfest file
+ // try to fix the issue and check again.
+ Path tempPath = ManifestFileParser.makeTidyManifestFile(filePath);
+ fileIsManifest = isAutopsyManifestFile(tempPath);
+ tempPath.toFile().delete();
+ }
+ } catch (Exception unused) {
+ fileIsManifest = false;
}
- } catch (Exception unused) {
- fileIsManifest = false;
}
+
return fileIsManifest;
}
@Override
public Manifest parse(Path filePath) throws ManifestFileParserException {
+ Path tempPath = null;
try {
BasicFileAttributes attrs = Files.readAttributes(filePath, BasicFileAttributes.class);
Date dateFileCreated = new Date(attrs.creationTime().toMillis());
- Document doc = this.createManifestDOM(filePath);
+ Document doc;
+ try {
+ doc = createManifestDOM(filePath);
+ } catch (Exception ex) {
+ // If the above call to createManifestDOM threw an exception
+ // try to fix the given XML file.
+ tempPath = ManifestFileParser.makeTidyManifestFile(filePath);
+ doc = createManifestDOM(tempPath);
+ }
+
XPath xpath = XPathFactory.newInstance().newXPath();
-
+
XPathExpression expr = xpath.compile(CASE_NAME_XPATH);
String caseName = (String) expr.evaluate(doc, XPathConstants.STRING);
if (caseName.isEmpty()) {
throw new ManifestFileParserException("Case name not found, manifest is invalid");
}
-
+
expr = xpath.compile(DEVICE_ID_XPATH);
String deviceId = (String) expr.evaluate(doc, XPathConstants.STRING);
if (deviceId.isEmpty()) {
deviceId = UUID.randomUUID().toString();
}
-
+
expr = xpath.compile(DATA_SOURCE_NAME_XPATH);
String dataSourceName = (String) expr.evaluate(doc, XPathConstants.STRING);
if (dataSourceName.isEmpty()) {
- throw new ManifestFileParserException("Data source path not found, manifest is invalid");
+ throw new ManifestFileParserException("Data source path not found, manifest is invalid");
}
Path dataSourcePath = filePath.getParent().resolve(dataSourceName);
-
+
return new Manifest(filePath, dateFileCreated, caseName, deviceId, dataSourcePath, new HashMap<>());
} catch (Exception ex) {
throw new ManifestFileParserException(String.format("Error parsing manifest %s", filePath), ex);
+ } finally {
+ if (tempPath != null) {
+ tempPath.toFile().delete();
+ }
}
}
+ /**
+ * Creates the DOM object for the file at the given path.
+ *
+ * @param manifestFilePath Path to XML file.
+ *
+ * @return DOM object for the given XML file.
+ *
+ * @throws ParserConfigurationException
+ * @throws SAXException
+ * @throws IOException
+ */
private Document createManifestDOM(Path manifestFilePath) throws ParserConfigurationException, SAXException, IOException {
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
return docBuilder.parse(manifestFilePath.toFile());
}
+ /**
+ * Check to see if the given file is an autopsy auto ingest manifest file by
+ * if the root element is ROOT_ELEM_TAG_NAME.
+ *
+ * @param filePath Path to the manifest file.
+ *
+ * @return True if this a well formed autopsy auto ingest manifest file.
+ */
+ private boolean isAutopsyManifestFile(Path filePath) {
+ try {
+ Document doc = this.createManifestDOM(filePath);
+ Element docElement = doc.getDocumentElement();
+ return docElement.getTagName().equals(ROOT_ELEM_TAG_NAME);
+ } catch (Exception unused) {
+ return false;
+ }
+ }
+
}
diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java
index 863155afcc..a1b7df9f20 100644
--- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java
+++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java
@@ -18,32 +18,65 @@
*/
package org.sleuthkit.autopsy.experimental.autoingest;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
import java.nio.file.Path;
+import java.nio.file.Paths;
+import org.w3c.tidy.Tidy;
/**
- * Responsible for parsing the manifest files that
- * describe cases, devices, and data sources.
- * These are used by autoingest to create cases and add
- * data sources to the correct case.
+ * Responsible for parsing the manifest files that describe cases, devices, and
+ * data sources. These are used by autoingest to create cases and add data
+ * sources to the correct case.
*/
public interface ManifestFileParser {
-
+
/**
* Checks if a file is this type of manifest file
+ *
* @param filePath Path to potential manifest file
+ *
* @return True if the file is a manifest that this parser supports
- */
+ */
boolean fileIsManifest(Path filePath);
-
+
/**
- * Parses the given file. Will only be called if
- * fileIsManifest() previously returned true.
+ * Parses the given file. Will only be called if fileIsManifest() previously
+ * returned true.
+ *
* @param filePath Path to manifest file
+ *
* @return Parsed results
- * @throws org.sleuthkit.autopsy.experimental.autoingest.ManifestFileParser.ManifestFileParserException
+ *
+ * @throws
+ * org.sleuthkit.autopsy.experimental.autoingest.ManifestFileParser.ManifestFileParserException
*/
Manifest parse(Path filePath) throws ManifestFileParserException;
-
+
+ /**
+ * Creates a "tidy" version of the given XML file in same parent directory.
+ *
+ * @param filePath Path to original XML file.
+ *
+ * @return Path to the newly created tidy version of the file.
+ *
+ * @throws IOException
+ */
+ static Path makeTidyManifestFile(Path filePath) throws IOException {
+ File tempFile = File.createTempFile("mani", "tdy", filePath.getParent().toFile());
+
+ try (FileInputStream br = new FileInputStream(filePath.toFile()); FileOutputStream out = new FileOutputStream(tempFile);) {
+ Tidy tidy = new Tidy();
+ tidy.setXmlOut(true);
+ tidy.setXmlTags(true);
+ tidy.parseDOM(br, out);
+ }
+
+ return Paths.get(tempFile.toString());
+ }
+
public final static class ManifestFileParserException extends Exception {
private static final long serialVersionUID = 1L;
@@ -67,5 +100,5 @@ public interface ManifestFileParser {
super(message, cause);
}
}
-
+
}
diff --git a/thirdparty/JTidy/LICENSE.txt b/thirdparty/JTidy/LICENSE.txt
new file mode 100755
index 0000000000..10b3a0fe80
--- /dev/null
+++ b/thirdparty/JTidy/LICENSE.txt
@@ -0,0 +1,53 @@
+/**
+* Java HTML Tidy - JTidy
+* HTML parser and pretty printer
+*
+* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
+* Institute of Technology, Institut National de Recherche en
+* Informatique et en Automatique, Keio University). All Rights
+* Reserved.
+*
+* Contributing Author(s):
+*
+* Dave Raggett
+* Andy Quick (translation to Java)
+* Gary L Peskin (Java development)
+* Sami Lempinen (release management)
+* Fabrizio Giustina
+*
+* The contributing author(s) would like to thank all those who
+* helped with testing, bug fixes, and patience. This wouldn't
+* have been possible without all of you.
+*
+* COPYRIGHT NOTICE:
+*
+* This software and documentation is provided "as is," and
+* the copyright holders and contributing author(s) make no
+* representations or warranties, express or implied, including
+* but not limited to, warranties of merchantability or fitness
+* for any particular purpose or that the use of the software or
+* documentation will not infringe any third party patents,
+* copyrights, trademarks or other rights.
+*
+* The copyright holders and contributing author(s) will not be
+* liable for any direct, indirect, special or consequential damages
+* arising out of any use of the software or documentation, even if
+* advised of the possibility of such damage.
+*
+* Permission is hereby granted to use, copy, modify, and distribute
+* this source code, or portions hereof, documentation and executables,
+* for any purpose, without fee, subject to the following restrictions:
+*
+* 1. The origin of this source code must not be misrepresented.
+* 2. Altered versions must be plainly marked as such and must
+* not be misrepresented as being the original source.
+* 3. This Copyright notice may not be removed or altered from any
+* source or altered source distribution.
+*
+* The copyright holders and contributing author(s) specifically
+* permit, without fee, and encourage the use of this source code
+* as a component for supporting the Hypertext Markup Language in
+* commercial products. If you use this source code in a product,
+* acknowledgment is not required but would be appreciated.
+*
+*/
\ No newline at end of file
diff --git a/thirdparty/JTidy/jtidy-r938.jar b/thirdparty/JTidy/jtidy-r938.jar
new file mode 100755
index 0000000000..efde902f3e
Binary files /dev/null and b/thirdparty/JTidy/jtidy-r938.jar differ