From fcd4dace0ae89a39f96fa5b09a7b51c1c0069d70 Mon Sep 17 00:00:00 2001 From: Mark McKinnon Date: Tue, 29 Oct 2019 22:13:23 -0400 Subject: [PATCH] 5078-HTML-viewer-not-correctly-interpreting-Unicode Determine encoding of html file and display html file using encoding. --- Core/ivy.xml | 1 + Core/nbproject/project.properties | 1 + Core/nbproject/project.xml | 12 +++++--- .../autopsy/contentviewers/HtmlViewer.java | 29 +++++++++++++++++-- 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/Core/ivy.xml b/Core/ivy.xml index 9ba4a3c371..8ff512127b 100644 --- a/Core/ivy.xml +++ b/Core/ivy.xml @@ -44,6 +44,7 @@ + diff --git a/Core/nbproject/project.properties b/Core/nbproject/project.properties index 2a952926ba..a04f1516f5 100644 --- a/Core/nbproject/project.properties +++ b/Core/nbproject/project.properties @@ -35,6 +35,7 @@ file.reference.java-libpst-0.8.1.jar=release\\modules\\ext\\java-libpst-0.8.1.ja file.reference.javax.activation-1.2.0.jar=release\\modules\\ext\\javax.activation-1.2.0.jar file.reference.javax.annotation-api-1.3.2.jar=release\\modules\\ext\\javax.annotation-api-1.3.2.jar file.reference.jbig2-imageio-3.0.2.jar=release\\modules\\ext\\jbig2-imageio-3.0.2.jar +file.reference.jchardet-1.0.jar=release/modules/ext/jchardet-1.0.jar file.reference.jcl-over-slf4j-1.7.25.jar=release\\modules\\ext\\jcl-over-slf4j-1.7.25.jar file.reference.jdom-2.0.5-contrib.jar=release/modules/ext/jdom-2.0.5-contrib.jar file.reference.jdom-2.0.5.jar=release/modules/ext/jdom-2.0.5.jar diff --git a/Core/nbproject/project.xml b/Core/nbproject/project.xml index c33dabfbc7..9a473f40e7 100644 --- a/Core/nbproject/project.xml +++ b/Core/nbproject/project.xml @@ -517,6 +517,14 @@ ext/google-http-client-1.29.0.jar release/modules/ext/google-http-client-1.29.0.jar + + ext/sleuthkit-postgresql-4.7.0.jar + release/modules/ext/sleuthkit-postgresql-4.7.0.jar + + + ext/jchardet-1.0.jar + release/modules/ext/jchardet-1.0.jar + ext/bcpkix-jdk15on-1.60.jar release\modules\ext\bcpkix-jdk15on-1.60.jar @@ -605,10 +613,6 @@ ext/jbig2-imageio-3.0.2.jar release\modules\ext\jbig2-imageio-3.0.2.jar - - ext/sleuthkit-postgresql-4.7.0.jar - release/modules/ext/sleuthkit-postgresql-4.7.0.jar - ext/apache-mime4j-dom-0.8.2.jar release\modules\ext\apache-mime4j-dom-0.8.2.jar diff --git a/Core/src/org/sleuthkit/autopsy/contentviewers/HtmlViewer.java b/Core/src/org/sleuthkit/autopsy/contentviewers/HtmlViewer.java index fb88ed9312..c9e4411437 100755 --- a/Core/src/org/sleuthkit/autopsy/contentviewers/HtmlViewer.java +++ b/Core/src/org/sleuthkit/autopsy/contentviewers/HtmlViewer.java @@ -20,9 +20,11 @@ package org.sleuthkit.autopsy.contentviewers; import java.awt.Component; import java.awt.Cursor; +import java.io.UnsupportedEncodingException; import java.util.Arrays; import java.util.List; import java.util.logging.Level; +import org.mozilla.universalchardet.UniversalDetector; import org.openide.util.NbBundle; import org.openide.windows.WindowManager; import org.sleuthkit.autopsy.coreutils.Logger; @@ -65,14 +67,37 @@ final class HtmlViewer extends javax.swing.JPanel implements FileTypeViewer { int fileSize = (int) abstractFile.getSize(); byte[] buffer = new byte[fileSize]; abstractFile.read(buffer, 0, fileSize); - return new String(buffer); - } catch (TskCoreException ex) { + String encoding = determineEncoding(buffer); + if (encoding != null) { + return new String(buffer, encoding); + } else { + return new String(buffer); + } + } catch (TskCoreException | UnsupportedEncodingException ex) { logger.log(Level.SEVERE, String.format("Unable to read from file '%s' (id=%d).", abstractFile.getName(), abstractFile.getId()), ex); return String.format("

%s

", Bundle.HtmlViewer_file_error()); } } + /** + * This method will try and determine the encoding of the html file based on its contents + * + * @param buffer byte array of the html file to check + * + * @return encoding type, null if encoding could not be determined + */ + private String determineEncoding(byte[] buffer) { + UniversalDetector detector = new UniversalDetector(null); + + detector.handleData(buffer, 0, buffer.length - 1); + detector.dataEnd(); + + String encoding = detector.getDetectedCharset(); + detector.reset(); + return encoding; + } + /** * This method is called from within the constructor to initialize the form. * WARNING: Do NOT modify this code. The content of this method is always