Merge pull request #4031 from sleuthkit/release-4.8.0

Merge in updated release 4.8.0
This commit is contained in:
Richard Cordovano 2018-08-08 14:49:25 -04:00 committed by GitHub
commit 7983b872e3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 29 additions and 29 deletions

View File

@ -570,7 +570,9 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT); putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
} }
if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt"))) { if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
//Carved Files should be the only type of unallocated files capable of a txt extension and
//should be ignored by the TextFileExtractor because they may contain more than one text encoding
try { try {
if (Ingester.getDefault().indexText(txtFileExtractor, aFile, context)) { if (Ingester.getDefault().indexText(txtFileExtractor, aFile, context)) {
putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED); putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);

View File

@ -17,8 +17,9 @@
* limitations under the License. * limitations under the License.
*/ */
package org.sleuthkit.autopsy.keywordsearch; package org.sleuthkit.autopsy.keywordsearch;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.BufferedInputStream;
import java.io.Reader; import java.io.Reader;
import java.util.logging.Level; import java.util.logging.Level;
import org.apache.tika.parser.txt.CharsetDetector; import org.apache.tika.parser.txt.CharsetDetector;
@ -53,15 +54,16 @@ final class TextFileExtractor extends ContentTextExtractor {
@Override @Override
public Reader getReader(Content source) throws TextExtractorException { public Reader getReader(Content source) throws TextExtractorException {
CharsetDetector detector = new CharsetDetector(); CharsetDetector detector = new CharsetDetector();
ReadContentInputStream stream = new ReadContentInputStream(source); //wrap stream in a BufferedInputStream so that it supports the mark/reset methods necessary for the CharsetDetector
InputStream stream = new BufferedInputStream(new ReadContentInputStream(source));
try { try {
detector.setText(stream); detector.setText(stream);
} catch (IOException ex) { } catch (IOException ex) {
throw new TextExtractorException("Unable to get string from detected text in UnicodeTextExtractor", ex); throw new TextExtractorException("Unable to get string from detected text in TextFileExtractor", ex);
} }
CharsetMatch match = detector.detect(); CharsetMatch match = detector.detect();
if (match.getConfidence() < MIN_MATCH_CONFIDENCE) { if (match.getConfidence() < MIN_MATCH_CONFIDENCE) {
throw new TextExtractorException("Text does not match any character set with a high enough confidence for UnicodeTextExtractor"); throw new TextExtractorException("Text does not match any character set with a high enough confidence for TextFileExtractor");
} }
return match.getReader(); return match.getReader();

View File

@ -1,29 +1,25 @@
---------------- VERSION 4.8.0 -------------- ---------------- VERSION 4.8.0 --------------
New Features: New Features:
- The case tree view can now be grouped by data source. - Data Source Grouping:
- Added a common files search tool that finds all instances of a file in a case. -- The case tree view can now be grouped by data source.
- Text extraction optionally includes optical character recognition (OCR). -- Keyword and file search can now be restricted to a data source.
- Data source(s) filter added to ad hoc keyword search and file search by - Central Repository / Corrrelation:
attributes. -- New common files search feature that finds files that exist in multiple devices in the same case.
- SQLite tables can be now be exported to CSV files. -- The Other Occurrences content viewer now shows matches in the current case (in addition to central repository).
- User defined tags now appear first in tagging menus. -- Central repository options panel now shows cases that are in repo.
- Eliminated one tagging sub menu layer for faster tagging. - A comment about a file can be created and saved in the central repository so that future cases and see it.
- Added Replace Tag item to tagging menus (shortcut for delete tag, add tag). - Keyword Search:
- The Other Occurrences content viewer now shows matches in the current case. -- Can enable OCR text extraction of PDF and JPG files using Tesseract.
- A listing of cases in the central repository is displayed by the -- Keyword search module normalizes Unicode text.
central repository options panel. -- Keyword search module uses ICU to convert text files that do not have a BOM.
- An interesting file artifact is now created when a "zip bomb" is detected. - Tagging:
- Text and queries sent to Solr are now normalized to handle diacritics, -- Tagging menu changed to have user defined tags at top and "quick tag" removed one level of menus.
ligatures, narrow and wide width Japanese characters, etc. -- New "Replace Tag" feature to change the tag on an item.
- An object detection ingest module that uses OpenCV and user-supplied - Other:
classifiers has been added to the "experimental" Net Beans Module (NBM). -- SQLite tables can be now be exported to CSV files.
- A data source processor that runs Volatility on a memory image has been -- An interesting file artifact is now created when a "zip bomb" is detected.
added to the "experimental" NBM. -- An object detection ingest module was added to the Experimental module. It requires an OpenCV trained model.
- Comments can be added to all files (file correlation properties) recorded
in the central repository using a results view context menu item.
- Comments can be added to all correlation properties recorded
in the central repository using an Other Occurrences results content viewer
context menu item.
Bug Fixes: Bug Fixes:
- Expanding the case tree is more efficient. - Expanding the case tree is more efficient.