Added code to cleanup auto ingest manifest files with bad chars

This commit is contained in:
Kelly Kelly 2020-11-20 17:39:08 -05:00
parent 69462b3385
commit 05a56282eb
7 changed files with 172 additions and 27 deletions

View File

@ -20,7 +20,11 @@
<ivy:retrieve conf="experimental" pattern="${basedir}/release/modules/ext/[artifact]-[revision](-[classifier]).[ext]" />
</target>
<target name="retrieve-all" depends="resolve">
<target name="get-thirdparty-dependencies">
<copy file="${thirdparty.dir}/JTidy/jtidy-r938.jar" todir="${ext.dir}" />
</target>
<target name="retrieve-all" depends="resolve, get-thirdparty-dependencies">
<ivy:retrieve conf="*" pattern="${basedir}/release/modules/ext/[artifact]-[revision](-[classifier]).[ext]" />
</target>

View File

@ -1,5 +1,6 @@
file.reference.c3p0-0.9.5.jar=release/modules/ext/c3p0-0.9.5.jar
file.reference.jackson-core-2.7.0.jar=release/modules/ext/jackson-core-2.7.0.jar
file.reference.jtidy-r938.jar=release/modules/ext/jtidy-r938.jar
file.reference.LGoodDatePicker-10.3.1.jar=release/modules/ext/LGoodDatePicker-10.3.1.jar
file.reference.mchange-commons-java-0.2.9.jar=release/modules/ext/mchange-commons-java-0.2.9.jar
file.reference.postgresql-9.4-1201-jdbc41.jar=release/modules/ext/postgresql-9.4-1201-jdbc41.jar

View File

@ -170,6 +170,10 @@
<package>org.sleuthkit.autopsy.experimental.autoingest</package>
<package>org.sleuthkit.autopsy.experimental.configuration</package>
</public-packages>
<class-path-extension>
<runtime-relative-path>ext/jtidy-r938.jar</runtime-relative-path>
<binary-origin>release/modules/ext/jtidy-r938.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/LGoodDatePicker-10.3.1.jar</runtime-relative-path>
<binary-origin>release/modules/ext/LGoodDatePicker-10.3.1.jar</binary-origin>

View File

@ -51,25 +51,42 @@ public final class AutopsyManifestFileParser implements ManifestFileParser {
@Override
public boolean fileIsManifest(Path filePath) {
boolean fileIsManifest = false;
try {
Path fileName = filePath.getFileName();
if (fileName.toString().toUpperCase().endsWith(MANIFEST_FILE_NAME_SIGNATURE)) {
Document doc = this.createManifestDOM(filePath);
Element docElement = doc.getDocumentElement();
fileIsManifest = docElement.getTagName().equals(ROOT_ELEM_TAG_NAME);
try {
fileIsManifest = isAutopsyManifestFile(filePath);
if (!fileIsManifest) {
// if false was returned from above the mainfest file
// try to fix the issue and check again.
Path tempPath = ManifestFileParser.makeTidyManifestFile(filePath);
fileIsManifest = isAutopsyManifestFile(tempPath);
tempPath.toFile().delete();
}
} catch (Exception unused) {
fileIsManifest = false;
}
}
return fileIsManifest;
}
@Override
public Manifest parse(Path filePath) throws ManifestFileParserException {
Path tempPath = null;
try {
BasicFileAttributes attrs = Files.readAttributes(filePath, BasicFileAttributes.class);
Date dateFileCreated = new Date(attrs.creationTime().toMillis());
Document doc = this.createManifestDOM(filePath);
Document doc;
try {
doc = createManifestDOM(filePath);
} catch (Exception ex) {
// If the above call to createManifestDOM threw an exception
// try to fix the given XML file.
tempPath = ManifestFileParser.makeTidyManifestFile(filePath);
doc = createManifestDOM(tempPath);
}
XPath xpath = XPathFactory.newInstance().newXPath();
XPathExpression expr = xpath.compile(CASE_NAME_XPATH);
@ -94,13 +111,46 @@ public final class AutopsyManifestFileParser implements ManifestFileParser {
return new Manifest(filePath, dateFileCreated, caseName, deviceId, dataSourcePath, new HashMap<>());
} catch (Exception ex) {
throw new ManifestFileParserException(String.format("Error parsing manifest %s", filePath), ex);
} finally {
if (tempPath != null) {
tempPath.toFile().delete();
}
}
}
/**
* Creates the DOM object for the file at the given path.
*
* @param manifestFilePath Path to XML file.
*
* @return DOM object for the given XML file.
*
* @throws ParserConfigurationException
* @throws SAXException
* @throws IOException
*/
private Document createManifestDOM(Path manifestFilePath) throws ParserConfigurationException, SAXException, IOException {
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
return docBuilder.parse(manifestFilePath.toFile());
}
/**
* Check to see if the given file is an autopsy auto ingest manifest file by
* if the root element is ROOT_ELEM_TAG_NAME.
*
* @param filePath Path to the manifest file.
*
* @return True if this a well formed autopsy auto ingest manifest file.
*/
private boolean isAutopsyManifestFile(Path filePath) {
try {
Document doc = this.createManifestDOM(filePath);
Element docElement = doc.getDocumentElement();
return docElement.getTagName().equals(ROOT_ELEM_TAG_NAME);
} catch (Exception unused) {
return false;
}
}
}

View File

@ -18,32 +18,65 @@
*/
package org.sleuthkit.autopsy.experimental.autoingest;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.w3c.tidy.Tidy;
/**
* Responsible for parsing the manifest files that
* describe cases, devices, and data sources.
* These are used by autoingest to create cases and add
* data sources to the correct case.
* Responsible for parsing the manifest files that describe cases, devices, and
* data sources. These are used by autoingest to create cases and add data
* sources to the correct case.
*/
public interface ManifestFileParser {
/**
* Checks if a file is this type of manifest file
*
* @param filePath Path to potential manifest file
*
* @return True if the file is a manifest that this parser supports
*/
boolean fileIsManifest(Path filePath);
/**
* Parses the given file. Will only be called if
* fileIsManifest() previously returned true.
* Parses the given file. Will only be called if fileIsManifest() previously
* returned true.
*
* @param filePath Path to manifest file
*
* @return Parsed results
* @throws org.sleuthkit.autopsy.experimental.autoingest.ManifestFileParser.ManifestFileParserException
*
* @throws
* org.sleuthkit.autopsy.experimental.autoingest.ManifestFileParser.ManifestFileParserException
*/
Manifest parse(Path filePath) throws ManifestFileParserException;
/**
* Creates a "tidy" version of the given XML file in same parent directory.
*
* @param filePath Path to original XML file.
*
* @return Path to the newly created tidy version of the file.
*
* @throws IOException
*/
static Path makeTidyManifestFile(Path filePath) throws IOException {
File tempFile = File.createTempFile("mani", "tdy", filePath.getParent().toFile());
try (FileInputStream br = new FileInputStream(filePath.toFile()); FileOutputStream out = new FileOutputStream(tempFile);) {
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
tidy.setXmlTags(true);
tidy.parseDOM(br, out);
}
return Paths.get(tempFile.toString());
}
public final static class ManifestFileParserException extends Exception {
private static final long serialVersionUID = 1L;

53
thirdparty/JTidy/LICENSE.txt vendored Executable file
View File

@ -0,0 +1,53 @@
/**
* Java HTML Tidy - JTidy
* HTML parser and pretty printer
*
* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
* Institute of Technology, Institut National de Recherche en
* Informatique et en Automatique, Keio University). All Rights
* Reserved.
*
* Contributing Author(s):
*
* Dave Raggett <dsr@w3.org>
* Andy Quick <ac.quick@sympatico.ca> (translation to Java)
* Gary L Peskin <garyp@firstech.com> (Java development)
* Sami Lempinen <sami@lempinen.net> (release management)
* Fabrizio Giustina <fgiust at users.sourceforge.net>
*
* The contributing author(s) would like to thank all those who
* helped with testing, bug fixes, and patience. This wouldn't
* have been possible without all of you.
*
* COPYRIGHT NOTICE:
*
* This software and documentation is provided "as is," and
* the copyright holders and contributing author(s) make no
* representations or warranties, express or implied, including
* but not limited to, warranties of merchantability or fitness
* for any particular purpose or that the use of the software or
* documentation will not infringe any third party patents,
* copyrights, trademarks or other rights.
*
* The copyright holders and contributing author(s) will not be
* liable for any direct, indirect, special or consequential damages
* arising out of any use of the software or documentation, even if
* advised of the possibility of such damage.
*
* Permission is hereby granted to use, copy, modify, and distribute
* this source code, or portions hereof, documentation and executables,
* for any purpose, without fee, subject to the following restrictions:
*
* 1. The origin of this source code must not be misrepresented.
* 2. Altered versions must be plainly marked as such and must
* not be misrepresented as being the original source.
* 3. This Copyright notice may not be removed or altered from any
* source or altered source distribution.
*
* The copyright holders and contributing author(s) specifically
* permit, without fee, and encourage the use of this source code
* as a component for supporting the Hypertext Markup Language in
* commercial products. If you use this source code in a product,
* acknowledgment is not required but would be appreciated.
*
*/

BIN
thirdparty/JTidy/jtidy-r938.jar vendored Executable file

Binary file not shown.