mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-17 18:17:43 +00:00
Merge pull request #4026 from wschaeferB/4088-TextFileExtractorFix-4.8
4088 text file extractor fix 4.8
This commit is contained in:
commit
9078338963
@ -570,7 +570,9 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
|
putIngestStatus(jobId, aFile.getId(), IngestStatus.SKIPPED_ERROR_TEXTEXTRACT);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt"))) {
|
if ((wasTextAdded == false) && (aFile.getNameExtension().equalsIgnoreCase("txt") && !(aFile.getType().equals(TskData.TSK_DB_FILES_TYPE_ENUM.CARVED)))) {
|
||||||
|
//Carved Files should be the only type of unallocated files capable of a txt extension and
|
||||||
|
//should be ignored by the TextFileExtractor because they may contain more than one text encoding
|
||||||
try {
|
try {
|
||||||
if (Ingester.getDefault().indexText(txtFileExtractor, aFile, context)) {
|
if (Ingester.getDefault().indexText(txtFileExtractor, aFile, context)) {
|
||||||
putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
|
putIngestStatus(jobId, aFile.getId(), IngestStatus.TEXT_INGESTED);
|
||||||
|
@ -17,8 +17,9 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.keywordsearch;
|
package org.sleuthkit.autopsy.keywordsearch;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import org.apache.tika.parser.txt.CharsetDetector;
|
import org.apache.tika.parser.txt.CharsetDetector;
|
||||||
@ -53,15 +54,16 @@ final class TextFileExtractor extends ContentTextExtractor {
|
|||||||
@Override
|
@Override
|
||||||
public Reader getReader(Content source) throws TextExtractorException {
|
public Reader getReader(Content source) throws TextExtractorException {
|
||||||
CharsetDetector detector = new CharsetDetector();
|
CharsetDetector detector = new CharsetDetector();
|
||||||
ReadContentInputStream stream = new ReadContentInputStream(source);
|
//wrap stream in a BufferedInputStream so that it supports the mark/reset methods necessary for the CharsetDetector
|
||||||
|
InputStream stream = new BufferedInputStream(new ReadContentInputStream(source));
|
||||||
try {
|
try {
|
||||||
detector.setText(stream);
|
detector.setText(stream);
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
throw new TextExtractorException("Unable to get string from detected text in UnicodeTextExtractor", ex);
|
throw new TextExtractorException("Unable to get string from detected text in TextFileExtractor", ex);
|
||||||
}
|
}
|
||||||
CharsetMatch match = detector.detect();
|
CharsetMatch match = detector.detect();
|
||||||
if (match.getConfidence() < MIN_MATCH_CONFIDENCE) {
|
if (match.getConfidence() < MIN_MATCH_CONFIDENCE) {
|
||||||
throw new TextExtractorException("Text does not match any character set with a high enough confidence for UnicodeTextExtractor");
|
throw new TextExtractorException("Text does not match any character set with a high enough confidence for TextFileExtractor");
|
||||||
}
|
}
|
||||||
|
|
||||||
return match.getReader();
|
return match.getReader();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user