From 76d7292ea2cf9272254f8be236b0ed873398f52a Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Tue, 7 Apr 2020 16:13:02 -0400 Subject: [PATCH 1/6] initial implementation --- .../textextractors/TextFileExtractor.java | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java index 4a89b74d3c..710a7d3508 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java @@ -25,8 +25,10 @@ import java.io.Reader; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.logging.Level; +import org.apache.commons.lang.StringUtils; import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.textutils.EncodingUtils; +import org.sleuthkit.autopsy.modules.filetypeid.FileTypeDetector; import org.sleuthkit.datamodel.AbstractFile; import org.sleuthkit.datamodel.ReadContentInputStream; import org.sleuthkit.datamodel.TskCoreException; @@ -38,6 +40,7 @@ public final class TextFileExtractor implements TextExtractor { private static final Logger logger = Logger.getLogger(TextFileExtractor.class.getName()); private final AbstractFile file; + private static final String PLAIN_TEXT_MIME_TYPE = "text/plain"; private Charset encoding = null; @@ -74,6 +77,28 @@ public final class TextFileExtractor implements TextExtractor { @Override public boolean isSupported() { - return file.getMIMEType().equals("text/plain"); + // if file is null, it is not supported + if (file == null) + return false; + + // get the MIME type + String mimeType = file.getMIMEType(); + + // if it is not present, attempt to use the FileTypeDetector to determine + if (StringUtils.isEmpty(mimeType)) { + FileTypeDetector fileTypeDetector = null; + try { + fileTypeDetector = new FileTypeDetector(); + } catch (FileTypeDetector.FileTypeDetectorInitException ex) { + logger.log(Level.WARNING, "Unable to create file type detector for determining MIME type."); + } + mimeType = fileTypeDetector.getMIMEType(file); + + // if able to determine mime type, + if (!StringUtils.isEmpty(mimeType)) + file.setMIMEType(mimeType); + } + + return (StringUtils.isEmpty(mimeType)) ? false : mimeType.equals(PLAIN_TEXT_MIME_TYPE); } } From 3c47c8b5f69448f29f231b7370358c90e9eb0069 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Tue, 7 Apr 2020 16:13:20 -0400 Subject: [PATCH 2/6] initial implementation --- .../textextractors/TextFileExtractorTest.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java new file mode 100644 index 0000000000..9f9dc8493c --- /dev/null +++ b/Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java @@ -0,0 +1,23 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package org.sleuthkit.autopsy.textextractors; + +import junit.framework.Assert; +import org.junit.Test; + + +/** + * Tests methods present in the TextFileExtractor + */ +public class TextFileExtractorTest { + + @Test + public void testIsSupported() { + Assert.assertFalse(new TextFileExtractor(null).isSupported()); + } + + +} From 5a89c431a6ec1905fe20b359edc04edf8ddeca91 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 8 Apr 2020 09:13:02 -0400 Subject: [PATCH 3/6] updates to TextFileExtractor --- .../autopsy/textextractors/TextFileExtractor.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java index 710a7d3508..33c7c04a4e 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java @@ -76,11 +76,7 @@ public final class TextFileExtractor implements TextExtractor { } @Override - public boolean isSupported() { - // if file is null, it is not supported - if (file == null) - return false; - + public boolean isSupported() { // get the MIME type String mimeType = file.getMIMEType(); @@ -91,6 +87,7 @@ public final class TextFileExtractor implements TextExtractor { fileTypeDetector = new FileTypeDetector(); } catch (FileTypeDetector.FileTypeDetectorInitException ex) { logger.log(Level.WARNING, "Unable to create file type detector for determining MIME type."); + return false; } mimeType = fileTypeDetector.getMIMEType(file); @@ -99,6 +96,6 @@ public final class TextFileExtractor implements TextExtractor { file.setMIMEType(mimeType); } - return (StringUtils.isEmpty(mimeType)) ? false : mimeType.equals(PLAIN_TEXT_MIME_TYPE); + return PLAIN_TEXT_MIME_TYPE.equals(mimeType); } } From 98933985031dc8ad8759959b716fbb91f48bb76a Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 8 Apr 2020 11:02:29 -0400 Subject: [PATCH 4/6] cleanup --- .../textextractors/TextFileExtractorTest.java | 23 ------------------- 1 file changed, 23 deletions(-) delete mode 100644 Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java deleted file mode 100644 index 9f9dc8493c..0000000000 --- a/Core/test/unit/src/org/sleuthkit/autopsy/textextractors/TextFileExtractorTest.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package org.sleuthkit.autopsy.textextractors; - -import junit.framework.Assert; -import org.junit.Test; - - -/** - * Tests methods present in the TextFileExtractor - */ -public class TextFileExtractorTest { - - @Test - public void testIsSupported() { - Assert.assertFalse(new TextFileExtractor(null).isSupported()); - } - - -} From 1b238183046b0f25e83d7f4322e314076d59e4a8 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 8 Apr 2020 13:51:25 -0400 Subject: [PATCH 5/6] update log entry to severe --- .../org/sleuthkit/autopsy/textextractors/TextFileExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java index 33c7c04a4e..dda59cdcfd 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java @@ -86,7 +86,7 @@ public final class TextFileExtractor implements TextExtractor { try { fileTypeDetector = new FileTypeDetector(); } catch (FileTypeDetector.FileTypeDetectorInitException ex) { - logger.log(Level.WARNING, "Unable to create file type detector for determining MIME type."); + logger.log(Level.SEVERE, "Unable to create file type detector for determining MIME type."); return false; } mimeType = fileTypeDetector.getMIMEType(file); From 797b4a99060ac555b8f0c09feb5af6b228fbe819 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Thu, 9 Apr 2020 07:54:25 -0400 Subject: [PATCH 6/6] remove in-memory mime type change --- .../sleuthkit/autopsy/textextractors/TextFileExtractor.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java index dda59cdcfd..af31797b52 100644 --- a/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java +++ b/Core/src/org/sleuthkit/autopsy/textextractors/TextFileExtractor.java @@ -90,10 +90,6 @@ public final class TextFileExtractor implements TextExtractor { return false; } mimeType = fileTypeDetector.getMIMEType(file); - - // if able to determine mime type, - if (!StringUtils.isEmpty(mimeType)) - file.setMIMEType(mimeType); } return PLAIN_TEXT_MIME_TYPE.equals(mimeType);