mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-13 08:26:15 +00:00
Further refactoring
This commit is contained in:
parent
5f144cdbc6
commit
a914e4b76e
@ -18,21 +18,10 @@
|
|||||||
*/
|
*/
|
||||||
package org.sleuthkit.autopsy.textextractors;
|
package org.sleuthkit.autopsy.textextractors;
|
||||||
|
|
||||||
import com.ethteck.decodetect.core.Decodetect;
|
|
||||||
import com.ethteck.decodetect.core.DecodetectResult;
|
|
||||||
import java.io.BufferedInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.nio.charset.CharsetDecoder;
|
|
||||||
import java.nio.charset.CharsetEncoder;
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import org.openide.util.Lookup;
|
import org.openide.util.Lookup;
|
||||||
import org.sleuthkit.datamodel.Content;
|
|
||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extracts the text out of Content instances and exposes them as a Reader.
|
* Extracts the text out of Content instances and exposes them as a Reader.
|
||||||
|
@ -89,6 +89,7 @@ public class TextExtractorFactory {
|
|||||||
*/
|
*/
|
||||||
private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) {
|
private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) {
|
||||||
List<TextExtractor> fileExtractors = Arrays.asList(
|
List<TextExtractor> fileExtractors = Arrays.asList(
|
||||||
|
new TextFileExtractor(content),
|
||||||
new HtmlTextExtractor(content),
|
new HtmlTextExtractor(content),
|
||||||
new SqliteTextExtractor(content),
|
new SqliteTextExtractor(content),
|
||||||
new TikaTextExtractor(content));
|
new TikaTextExtractor(content));
|
||||||
|
@ -35,9 +35,9 @@ import org.sleuthkit.datamodel.Content;
|
|||||||
import org.sleuthkit.datamodel.ReadContentInputStream;
|
import org.sleuthkit.datamodel.ReadContentInputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Extract text from .txt files
|
* Extract text from text files
|
||||||
*/
|
*/
|
||||||
public final class TextFileExtractor {
|
public final class TextFileExtractor implements TextExtractor {
|
||||||
public static Charset UNKNOWN_CHARSET = new Charset("unknown", null) {
|
public static Charset UNKNOWN_CHARSET = new Charset("unknown", null) {
|
||||||
@Override
|
@Override
|
||||||
public boolean contains(Charset cs) {
|
public boolean contains(Charset cs) {
|
||||||
@ -55,16 +55,27 @@ public final class TextFileExtractor {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
public Reader getReader(AbstractFile source) throws TextFileExtractorException {
|
private final AbstractFile file;
|
||||||
Charset encoding = getEncoding(source);
|
|
||||||
|
public TextFileExtractor(AbstractFile file) {
|
||||||
|
this.file = file;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Reader getReader() {
|
||||||
|
Charset encoding = getEncoding(file);
|
||||||
if (encoding == UNKNOWN_CHARSET) {
|
if (encoding == UNKNOWN_CHARSET) {
|
||||||
encoding = StandardCharsets.UTF_8;
|
encoding = StandardCharsets.UTF_8;
|
||||||
}
|
}
|
||||||
return getReader(source, encoding);
|
return getReader(encoding);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Reader getReader(AbstractFile source, Charset encoding) throws TextFileExtractorException {
|
public Reader getReader(Charset encoding) {
|
||||||
return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(source)), encoding);
|
return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(file)), encoding);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isSupported() {
|
||||||
|
return file.getMIMEType().equals("text/plain");
|
||||||
}
|
}
|
||||||
|
|
||||||
public class TextFileExtractorException extends Exception {
|
public class TextFileExtractorException extends Exception {
|
||||||
|
@ -682,8 +682,8 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
*/
|
*/
|
||||||
private boolean indexTextFile(AbstractFile aFile) {
|
private boolean indexTextFile(AbstractFile aFile) {
|
||||||
try {
|
try {
|
||||||
TextFileExtractor textFileExtractor = new TextFileExtractor();
|
TextFileExtractor textFileExtractor = new TextFileExtractor(aFile);
|
||||||
Reader textReader = textFileExtractor.getReader(aFile);
|
Reader textReader = textFileExtractor.getReader();
|
||||||
if (textReader == null) {
|
if (textReader == null) {
|
||||||
logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
|
logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
|
||||||
} else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
|
} else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
|
||||||
@ -692,8 +692,6 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
|
|||||||
}
|
}
|
||||||
} catch (IngesterException ex) {
|
} catch (IngesterException ex) {
|
||||||
logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex);
|
logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex);
|
||||||
} catch (TextFileExtractorException ex) {
|
|
||||||
logger.log(Level.INFO, "Could not extract text with TextFileExtractor", ex);
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user