Further refactoring

This commit is contained in:
Ethan Roseman 2019-11-21 10:42:17 +09:00
parent 5f144cdbc6
commit a914e4b76e
4 changed files with 22 additions and 23 deletions

View File

@ -18,21 +18,10 @@
*/ */
package org.sleuthkit.autopsy.textextractors; package org.sleuthkit.autopsy.textextractors;
import com.ethteck.decodetect.core.Decodetect;
import com.ethteck.decodetect.core.DecodetectResult;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader; import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.util.Collections; import java.util.Collections;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.openide.util.Lookup; import org.openide.util.Lookup;
import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.ReadContentInputStream;
/** /**
* Extracts the text out of Content instances and exposes them as a Reader. * Extracts the text out of Content instances and exposes them as a Reader.

View File

@ -89,6 +89,7 @@ public class TextExtractorFactory {
*/ */
private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) { private static List<TextExtractor> getFileExtractors(AbstractFile content, Lookup context) {
List<TextExtractor> fileExtractors = Arrays.asList( List<TextExtractor> fileExtractors = Arrays.asList(
new TextFileExtractor(content),
new HtmlTextExtractor(content), new HtmlTextExtractor(content),
new SqliteTextExtractor(content), new SqliteTextExtractor(content),
new TikaTextExtractor(content)); new TikaTextExtractor(content));

View File

@ -35,9 +35,9 @@ import org.sleuthkit.datamodel.Content;
import org.sleuthkit.datamodel.ReadContentInputStream; import org.sleuthkit.datamodel.ReadContentInputStream;
/** /**
* Extract text from .txt files * Extract text from text files
*/ */
public final class TextFileExtractor { public final class TextFileExtractor implements TextExtractor {
public static Charset UNKNOWN_CHARSET = new Charset("unknown", null) { public static Charset UNKNOWN_CHARSET = new Charset("unknown", null) {
@Override @Override
public boolean contains(Charset cs) { public boolean contains(Charset cs) {
@ -55,18 +55,29 @@ public final class TextFileExtractor {
} }
}; };
public Reader getReader(AbstractFile source) throws TextFileExtractorException { private final AbstractFile file;
Charset encoding = getEncoding(source);
public TextFileExtractor(AbstractFile file) {
this.file = file;
}
public Reader getReader() {
Charset encoding = getEncoding(file);
if (encoding == UNKNOWN_CHARSET) { if (encoding == UNKNOWN_CHARSET) {
encoding = StandardCharsets.UTF_8; encoding = StandardCharsets.UTF_8;
} }
return getReader(source, encoding); return getReader(encoding);
} }
public Reader getReader(AbstractFile source, Charset encoding) throws TextFileExtractorException { public Reader getReader(Charset encoding) {
return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(source)), encoding); return new InputStreamReader(new BufferedInputStream(new ReadContentInputStream(file)), encoding);
} }
@Override
public boolean isSupported() {
return file.getMIMEType().equals("text/plain");
}
public class TextFileExtractorException extends Exception { public class TextFileExtractorException extends Exception {
public TextFileExtractorException(String msg, Throwable ex) { public TextFileExtractorException(String msg, Throwable ex) {
super(msg, ex); super(msg, ex);

View File

@ -682,8 +682,8 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
*/ */
private boolean indexTextFile(AbstractFile aFile) { private boolean indexTextFile(AbstractFile aFile) {
try { try {
TextFileExtractor textFileExtractor = new TextFileExtractor(); TextFileExtractor textFileExtractor = new TextFileExtractor(aFile);
Reader textReader = textFileExtractor.getReader(aFile); Reader textReader = textFileExtractor.getReader();
if (textReader == null) { if (textReader == null) {
logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName()); logger.log(Level.INFO, "Unable to extract with TextFileExtractor, Reader was null for file: {0}", aFile.getName());
} else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) { } else if (Ingester.getDefault().indexText(textReader, aFile.getId(), aFile.getName(), aFile, context)) {
@ -692,8 +692,6 @@ public final class KeywordSearchIngestModule implements FileIngestModule {
} }
} catch (IngesterException ex) { } catch (IngesterException ex) {
logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex); logger.log(Level.WARNING, "Unable to index " + aFile.getName(), ex);
} catch (TextFileExtractorException ex) {
logger.log(Level.INFO, "Could not extract text with TextFileExtractor", ex);
} }
return false; return false;
} }