mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 10:17:41 +00:00
Clean up in TextFileExtractor
This commit is contained in:
parent
f9445ff7d8
commit
ad7e0ceaff
@ -69,7 +69,8 @@ public final class TextFileExtractor implements TextExtractor {
|
|||||||
// detection library to use. If CharsetDetector's own confidence is at least
|
// detection library to use. If CharsetDetector's own confidence is at least
|
||||||
// MIN_MATCH_CONFIDENCE, CharsetDetector's result will be used for decoding.
|
// MIN_MATCH_CONFIDENCE, CharsetDetector's result will be used for decoding.
|
||||||
// Otherwise, Decodetect will be used.
|
// Otherwise, Decodetect will be used.
|
||||||
// NOte: We initially used a confidence of 35, but it was causing some
|
//
|
||||||
|
// Note: We initially used a confidence of 35, but it was causing some
|
||||||
// Chrome Cache files to get flagged as UTF-16 with confidence 40.
|
// Chrome Cache files to get flagged as UTF-16 with confidence 40.
|
||||||
// These files had a small amount of binary data and then ASCII.
|
// These files had a small amount of binary data and then ASCII.
|
||||||
static final private int MIN_CHARSETDETECT_MATCH_CONFIDENCE = 41;
|
static final private int MIN_CHARSETDETECT_MATCH_CONFIDENCE = 41;
|
||||||
@ -124,7 +125,7 @@ public final class TextFileExtractor implements TextExtractor {
|
|||||||
|
|
||||||
// Encoding detection is hard. We use several libraries since the data passed in is often messy.
|
// Encoding detection is hard. We use several libraries since the data passed in is often messy.
|
||||||
// First try CharsetDetector (from Tika / ICU4J).
|
// First try CharsetDetector (from Tika / ICU4J).
|
||||||
// It is a rule-baesd detection approach.
|
// It is a rule-based detection approach.
|
||||||
try (InputStream stream = new BufferedInputStream(new ReadContentInputStream(file))) {
|
try (InputStream stream = new BufferedInputStream(new ReadContentInputStream(file))) {
|
||||||
CharsetDetector detector = new CharsetDetector();
|
CharsetDetector detector = new CharsetDetector();
|
||||||
detector.setText(stream);
|
detector.setText(stream);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user