diff --git a/Experimental/build.xml b/Experimental/build.xml index a304da0a5d..442a08bd55 100644 --- a/Experimental/build.xml +++ b/Experimental/build.xml @@ -19,8 +19,12 @@ + + + + - + diff --git a/Experimental/nbproject/project.properties b/Experimental/nbproject/project.properties index c5203cbba5..731535afff 100644 --- a/Experimental/nbproject/project.properties +++ b/Experimental/nbproject/project.properties @@ -1,5 +1,6 @@ file.reference.c3p0-0.9.5.jar=release/modules/ext/c3p0-0.9.5.jar file.reference.jackson-core-2.7.0.jar=release/modules/ext/jackson-core-2.7.0.jar +file.reference.jtidy-r938.jar=release/modules/ext/jtidy-r938.jar file.reference.LGoodDatePicker-10.3.1.jar=release/modules/ext/LGoodDatePicker-10.3.1.jar file.reference.mchange-commons-java-0.2.9.jar=release/modules/ext/mchange-commons-java-0.2.9.jar file.reference.postgresql-9.4-1201-jdbc41.jar=release/modules/ext/postgresql-9.4-1201-jdbc41.jar diff --git a/Experimental/nbproject/project.xml b/Experimental/nbproject/project.xml index 953292b3c5..3259799a2a 100644 --- a/Experimental/nbproject/project.xml +++ b/Experimental/nbproject/project.xml @@ -170,6 +170,10 @@ org.sleuthkit.autopsy.experimental.autoingest org.sleuthkit.autopsy.experimental.configuration + + ext/jtidy-r938.jar + release/modules/ext/jtidy-r938.jar + ext/LGoodDatePicker-10.3.1.jar release/modules/ext/LGoodDatePicker-10.3.1.jar diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java index 93e6e5956c..14e9d3e1b7 100644 --- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java +++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/AutopsyManifestFileParser.java @@ -51,56 +51,106 @@ public final class AutopsyManifestFileParser implements ManifestFileParser { @Override public boolean fileIsManifest(Path filePath) { boolean fileIsManifest = false; - try { - Path fileName = filePath.getFileName(); - if (fileName.toString().toUpperCase().endsWith(MANIFEST_FILE_NAME_SIGNATURE)) { - Document doc = this.createManifestDOM(filePath); - Element docElement = doc.getDocumentElement(); - fileIsManifest = docElement.getTagName().equals(ROOT_ELEM_TAG_NAME); + + Path fileName = filePath.getFileName(); + if (fileName.toString().toUpperCase().endsWith(MANIFEST_FILE_NAME_SIGNATURE)) { + try { + fileIsManifest = isAutopsyManifestFile(filePath); + if (!fileIsManifest) { + // if false was returned from above the mainfest file + // try to fix the issue and check again. + Path tempPath = ManifestFileParser.makeTidyManifestFile(filePath); + fileIsManifest = isAutopsyManifestFile(tempPath); + tempPath.toFile().delete(); + } + } catch (Exception unused) { + fileIsManifest = false; } - } catch (Exception unused) { - fileIsManifest = false; } + return fileIsManifest; } @Override public Manifest parse(Path filePath) throws ManifestFileParserException { + Path tempPath = null; try { BasicFileAttributes attrs = Files.readAttributes(filePath, BasicFileAttributes.class); Date dateFileCreated = new Date(attrs.creationTime().toMillis()); - Document doc = this.createManifestDOM(filePath); + Document doc; + try { + doc = createManifestDOM(filePath); + } catch (Exception ex) { + // If the above call to createManifestDOM threw an exception + // try to fix the given XML file. + tempPath = ManifestFileParser.makeTidyManifestFile(filePath); + doc = createManifestDOM(tempPath); + } + XPath xpath = XPathFactory.newInstance().newXPath(); - + XPathExpression expr = xpath.compile(CASE_NAME_XPATH); String caseName = (String) expr.evaluate(doc, XPathConstants.STRING); if (caseName.isEmpty()) { throw new ManifestFileParserException("Case name not found, manifest is invalid"); } - + expr = xpath.compile(DEVICE_ID_XPATH); String deviceId = (String) expr.evaluate(doc, XPathConstants.STRING); if (deviceId.isEmpty()) { deviceId = UUID.randomUUID().toString(); } - + expr = xpath.compile(DATA_SOURCE_NAME_XPATH); String dataSourceName = (String) expr.evaluate(doc, XPathConstants.STRING); if (dataSourceName.isEmpty()) { - throw new ManifestFileParserException("Data source path not found, manifest is invalid"); + throw new ManifestFileParserException("Data source path not found, manifest is invalid"); } Path dataSourcePath = filePath.getParent().resolve(dataSourceName); - + return new Manifest(filePath, dateFileCreated, caseName, deviceId, dataSourcePath, new HashMap<>()); } catch (Exception ex) { throw new ManifestFileParserException(String.format("Error parsing manifest %s", filePath), ex); + } finally { + if (tempPath != null) { + tempPath.toFile().delete(); + } } } + /** + * Creates the DOM object for the file at the given path. + * + * @param manifestFilePath Path to XML file. + * + * @return DOM object for the given XML file. + * + * @throws ParserConfigurationException + * @throws SAXException + * @throws IOException + */ private Document createManifestDOM(Path manifestFilePath) throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); return docBuilder.parse(manifestFilePath.toFile()); } + /** + * Check to see if the given file is an autopsy auto ingest manifest file by + * if the root element is ROOT_ELEM_TAG_NAME. + * + * @param filePath Path to the manifest file. + * + * @return True if this a well formed autopsy auto ingest manifest file. + */ + private boolean isAutopsyManifestFile(Path filePath) { + try { + Document doc = this.createManifestDOM(filePath); + Element docElement = doc.getDocumentElement(); + return docElement.getTagName().equals(ROOT_ELEM_TAG_NAME); + } catch (Exception unused) { + return false; + } + } + } diff --git a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java index 863155afcc..a1b7df9f20 100644 --- a/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java +++ b/Experimental/src/org/sleuthkit/autopsy/experimental/autoingest/ManifestFileParser.java @@ -18,32 +18,65 @@ */ package org.sleuthkit.autopsy.experimental.autoingest; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; import java.nio.file.Path; +import java.nio.file.Paths; +import org.w3c.tidy.Tidy; /** - * Responsible for parsing the manifest files that - * describe cases, devices, and data sources. - * These are used by autoingest to create cases and add - * data sources to the correct case. + * Responsible for parsing the manifest files that describe cases, devices, and + * data sources. These are used by autoingest to create cases and add data + * sources to the correct case. */ public interface ManifestFileParser { - + /** * Checks if a file is this type of manifest file + * * @param filePath Path to potential manifest file + * * @return True if the file is a manifest that this parser supports - */ + */ boolean fileIsManifest(Path filePath); - + /** - * Parses the given file. Will only be called if - * fileIsManifest() previously returned true. + * Parses the given file. Will only be called if fileIsManifest() previously + * returned true. + * * @param filePath Path to manifest file + * * @return Parsed results - * @throws org.sleuthkit.autopsy.experimental.autoingest.ManifestFileParser.ManifestFileParserException + * + * @throws + * org.sleuthkit.autopsy.experimental.autoingest.ManifestFileParser.ManifestFileParserException */ Manifest parse(Path filePath) throws ManifestFileParserException; - + + /** + * Creates a "tidy" version of the given XML file in same parent directory. + * + * @param filePath Path to original XML file. + * + * @return Path to the newly created tidy version of the file. + * + * @throws IOException + */ + static Path makeTidyManifestFile(Path filePath) throws IOException { + File tempFile = File.createTempFile("mani", "tdy", filePath.getParent().toFile()); + + try (FileInputStream br = new FileInputStream(filePath.toFile()); FileOutputStream out = new FileOutputStream(tempFile);) { + Tidy tidy = new Tidy(); + tidy.setXmlOut(true); + tidy.setXmlTags(true); + tidy.parseDOM(br, out); + } + + return Paths.get(tempFile.toString()); + } + public final static class ManifestFileParserException extends Exception { private static final long serialVersionUID = 1L; @@ -67,5 +100,5 @@ public interface ManifestFileParser { super(message, cause); } } - + } diff --git a/thirdparty/JTidy/LICENSE.txt b/thirdparty/JTidy/LICENSE.txt new file mode 100755 index 0000000000..10b3a0fe80 --- /dev/null +++ b/thirdparty/JTidy/LICENSE.txt @@ -0,0 +1,53 @@ +/** +* Java HTML Tidy - JTidy +* HTML parser and pretty printer +* +* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts +* Institute of Technology, Institut National de Recherche en +* Informatique et en Automatique, Keio University). All Rights +* Reserved. +* +* Contributing Author(s): +* +* Dave Raggett +* Andy Quick (translation to Java) +* Gary L Peskin (Java development) +* Sami Lempinen (release management) +* Fabrizio Giustina +* +* The contributing author(s) would like to thank all those who +* helped with testing, bug fixes, and patience. This wouldn't +* have been possible without all of you. +* +* COPYRIGHT NOTICE: +* +* This software and documentation is provided "as is," and +* the copyright holders and contributing author(s) make no +* representations or warranties, express or implied, including +* but not limited to, warranties of merchantability or fitness +* for any particular purpose or that the use of the software or +* documentation will not infringe any third party patents, +* copyrights, trademarks or other rights. +* +* The copyright holders and contributing author(s) will not be +* liable for any direct, indirect, special or consequential damages +* arising out of any use of the software or documentation, even if +* advised of the possibility of such damage. +* +* Permission is hereby granted to use, copy, modify, and distribute +* this source code, or portions hereof, documentation and executables, +* for any purpose, without fee, subject to the following restrictions: +* +* 1. The origin of this source code must not be misrepresented. +* 2. Altered versions must be plainly marked as such and must +* not be misrepresented as being the original source. +* 3. This Copyright notice may not be removed or altered from any +* source or altered source distribution. +* +* The copyright holders and contributing author(s) specifically +* permit, without fee, and encourage the use of this source code +* as a component for supporting the Hypertext Markup Language in +* commercial products. If you use this source code in a product, +* acknowledgment is not required but would be appreciated. +* +*/ \ No newline at end of file diff --git a/thirdparty/JTidy/jtidy-r938.jar b/thirdparty/JTidy/jtidy-r938.jar new file mode 100755 index 0000000000..efde902f3e Binary files /dev/null and b/thirdparty/JTidy/jtidy-r938.jar differ