mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-12 16:06:15 +00:00
Further refactoring
This commit is contained in:
parent
5f144cdbc6
commit
a914e4b76e
@ -18,21 +18,10 @@
|
||||
*/
|
||||
package org.sleuthkit.autopsy.textextractors;
|
||||
|
||||
import com.ethteck.decodetect.core.Decodetect;
|
||||
import com.ethteck.decodetect.core.DecodetectResult;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.openide.util.Lookup;
|
||||
import org.sleuthkit.datamodel.Content;
|
||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
|
||||
/**
|
||||
* Extracts the text out of Content instances and exposes them as a Reader.
|
||||
|
@ -89,6 +89,7 @@ public class TextExtractorFactory {
|
||||
*/
|
||||
private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) {
|
||||
List<TextExtractor> fileExtractors = Arrays.asList(
|
||||
new TextFileExtractor(content),
|
||||
new HtmlTextExtractor(content),
|
||||
new SqliteTextExtractor(content),
|
||||
new TikaTextExtractor(content));
|
||||
|
@ -35,9 +35,9 @@ import org.sleuthkit.datamodel.Content;
|
||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||
|
||||
/**
|
||||
* Extract text from .txt files
|
||||
* Extract text from text files
|
||||
*/
|
||||
public final class TextFileExtractor {
|
||||
public final class TextFileExtractor implements TextExtractor {
|
||||
public static Charset UNKNOWN_CHARSET = new Charset("unknown", null) {
|
||||
@Override
|
||||
public boolean contains(Charset cs) {
|
||||
@ -55,18 +55,29 @@ public final class TextFileExtractor {
|
||||
}
|
||||
};
|
||||
|
||||
public Reader getReader(AbstractFile source) throws TextFileExtractorException {
|
||||
Charset encoding = getEncoding(source);
|
||||
private final AbstractFile file;
|
||||
|
||||
public TextFileExtractor(AbstractFile file) {
|
||||
this.file = file;
|
||||
}
|
||||
|
||||
public Reader getReader() {
|
||||
Charset encoding = getEncoding(file);
|
||||
if (encoding == UNKNOWN_CHARSET) {
|
||||
encoding = StandardCharsets.UTF_8;
|
||||
}
|
||||
return getReader(source, encoding);
|
||||
return getReader(encoding);
|
||||
}
|
||||
|
||||
public Reader getReader(AbstractFile source, Charset encoding) throws TextFileExtractorException {
|
||||
return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(source)), encoding);
|
||||
public Reader getReader(Charset encoding) {
|
||||
return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(file)), encoding);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isSupported() {
|
||||
return file.getMIMEType().equals("text/plain");
|
||||
}
|
||||
|
||||
public class TextFileExtractorException extends Exception {
|
||||
public TextFileExtractorException(String msg, Throwable ex) {
|
||||
super(msg, ex);
|
||||
|
@ -682,8 +682,8 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
||||
*/
|
||||
private boolean indexTextFile(AbstractFile aFile) {
|
||||
try {
|
||||
TextFileExtractor textFileExtractor = new TextFileExtractor();
|
||||
Reader textReader = textFileExtractor.getReader(aFile);
|
||||
TextFileExtractor textFileExtractor = new TextFileExtractor(aFile);
|
||||
Reader textReader = textFileExtractor.getReader();
|
||||
if (textReader == null) {
|
||||
logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
|
||||
} else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
|
||||
@ -692,8 +692,6 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
||||
}
|
||||
} catch (IngesterException ex) {
|
||||
logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex);
|
||||
} catch (TextFileExtractorException ex) {
|
||||
logger.log(Level.INFO, "Could not extract text with TextFileExtractor", ex);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user