5078-HTML-viewer-not-correctly-interpreting-Unicode

Determine encoding of html file and display html file using encoding.
This commit is contained in:
Mark McKinnon 2019-10-29 22:13:23 -04:00
parent 364e2dc19f
commit fcd4dace0a
4 changed files with 37 additions and 6 deletions

View File

@ -44,6 +44,7 @@
<dependency conf="core->default" org="org.apache.opennlp" name="opennlp-tools" rev="1.9.1"/>
<dependency org="org.sejda.webp-imageio" name="webp-imageio-sejda" rev="0.1.0"/>
<dependency org="net.sourceforge.jchardet" name="jchardet" rev="1.0"/>
<dependency conf="core->default" org="commons-validator" name="commons-validator" rev="1.6"/>
<dependency conf="core->default" org="net.htmlparser.jericho" name="jericho-html" rev="3.3"/>

View File

@ -35,6 +35,7 @@ file.reference.java-libpst-0.8.1.jar=release\\modules\\ext\\java-libpst-0.8.1.ja
file.reference.javax.activation-1.2.0.jar=release\\modules\\ext\\javax.activation-1.2.0.jar
file.reference.javax.annotation-api-1.3.2.jar=release\\modules\\ext\\javax.annotation-api-1.3.2.jar
file.reference.jbig2-imageio-3.0.2.jar=release\\modules\\ext\\jbig2-imageio-3.0.2.jar
file.reference.jchardet-1.0.jar=release/modules/ext/jchardet-1.0.jar
file.reference.jcl-over-slf4j-1.7.25.jar=release\\modules\\ext\\jcl-over-slf4j-1.7.25.jar
file.reference.jdom-2.0.5-contrib.jar=release/modules/ext/jdom-2.0.5-contrib.jar
file.reference.jdom-2.0.5.jar=release/modules/ext/jdom-2.0.5.jar

View File

@ -517,6 +517,14 @@
<runtime-relative-path>ext/google-http-client-1.29.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/google-http-client-1.29.0.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/sleuthkit-postgresql-4.7.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/sleuthkit-postgresql-4.7.0.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/jchardet-1.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/jchardet-1.0.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/bcpkix-jdk15on-1.60.jar</runtime-relative-path>
<binary-origin>release\modules\ext\bcpkix-jdk15on-1.60.jar</binary-origin>
@ -605,10 +613,6 @@
<runtime-relative-path>ext/jbig2-imageio-3.0.2.jar</runtime-relative-path>
<binary-origin>release\modules\ext\jbig2-imageio-3.0.2.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/sleuthkit-postgresql-4.7.0.jar</runtime-relative-path>
<binary-origin>release/modules/ext/sleuthkit-postgresql-4.7.0.jar</binary-origin>
</class-path-extension>
<class-path-extension>
<runtime-relative-path>ext/apache-mime4j-dom-0.8.2.jar</runtime-relative-path>
<binary-origin>release\modules\ext\apache-mime4j-dom-0.8.2.jar</binary-origin>

View File

@ -20,9 +20,11 @@ package org.sleuthkit.autopsy.contentviewers;
import java.awt.Component;
import java.awt.Cursor;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.List;
import java.util.logging.Level;
import org.mozilla.universalchardet.UniversalDetector;
import org.openide.util.NbBundle;
import org.openide.windows.WindowManager;
import org.sleuthkit.autopsy.coreutils.Logger;
@ -65,14 +67,37 @@ final class HtmlViewer extends javax.swing.JPanel implements FileTypeViewer {
int fileSize = (int) abstractFile.getSize();
byte[] buffer = new byte[fileSize];
abstractFile.read(buffer, 0, fileSize);
String encoding = determineEncoding(buffer);
if (encoding != null) {
return new String(buffer, encoding);
} else {
return new String(buffer);
} catch (TskCoreException ex) {
}
} catch (TskCoreException | UnsupportedEncodingException ex) {
logger.log(Level.SEVERE, String.format("Unable to read from file '%s' (id=%d).",
abstractFile.getName(), abstractFile.getId()), ex);
return String.format("<p>%s</p>", Bundle.HtmlViewer_file_error());
}
}
/**
* This method will try and determine the encoding of the html file based on its contents
*
* @param buffer byte array of the html file to check
*
* @return encoding type, null if encoding could not be determined
*/
private String determineEncoding(byte[] buffer) {
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(buffer, 0, buffer.length - 1);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
return encoding;
}
/**
* This method is called from within the constructor to initialize the form.
* WARNING: Do NOT modify this code. The content of this method is always