Merge branch 'develop' of https://www.github.com/sleuthkit/autopsy into develop

This commit is contained in:
alexjacks92 2014-04-17 10:23:59 -04:00
commit c089afb7a7
10 changed files with 61 additions and 67 deletions

View File

@ -122,7 +122,7 @@ public class IngestManager {
*
* @return True if any ingest jobs are in progress, false otherwise
*/
public boolean isIngestRunning() {
public synchronized boolean isIngestRunning() {
return (ingestJobs.isEmpty() == false);
}

View File

@ -125,8 +125,7 @@ public final class SevenZipIngestModule extends IngestModuleAdapter implements F
}
}
// if first instance of this module for this job then check 7zip init
if (refCounter.incrementAndGet(jobId) == 1) {
if (!SevenZip.isInitializedSuccessfully() && (SevenZip.getLastInitializationException() == null)) {
try {
SevenZip.initSevenZipFromPlatformJAR();
String platform = SevenZip.getUsedPlatform();

View File

@ -40,17 +40,18 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
class AbstractFileHtmlExtract implements AbstractFileExtract {
private static final Logger logger = Logger.getLogger(AbstractFileHtmlExtract.class.getName());
private static Ingester ingester;
static final Charset outCharset = Server.DEFAULT_INDEXED_TEXT_CHARSET;
static final int MAX_EXTR_TEXT_CHARS = 512 * 1024;
private static final int SINGLE_READ_CHARS = 1024;
private static final int EXTRA_CHARS = 128; //for whitespace
private static final char[] TEXT_CHUNK_BUF = new char[MAX_EXTR_TEXT_CHARS];
private static final int EXTRA_CHARS = 128; //for whitespace
private static final int MAX_SIZE = 50000000;
private KeywordSearchIngestModule module;
private Ingester ingester;
//private static final String UTF16BOM = "\uFEFF"; disabled prepending of BOM
private final char[] textChunkBuf = new char[MAX_EXTR_TEXT_CHARS];
private KeywordSearchIngestModule module;
private AbstractFile sourceFile;
private int numChunks = 0;
//private static final String UTF16BOM = "\uFEFF"; disabled prepending of BOM
static final List<String> WEB_MIME_TYPES = Arrays.asList(
"application/javascript",
"application/xhtml+xml",
@ -98,7 +99,7 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
@Override
public boolean index(AbstractFile sourceFile) throws IngesterException {
this.sourceFile = sourceFile;
this.numChunks = 0; //unknown until indexing is done
numChunks = 0; //unknown until indexing is done
boolean success = false;
Reader reader = null;
@ -122,12 +123,12 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
long totalRead = 0;
boolean eof = false;
//we read max 1024 chars at time, this seems to max what this Reader would return
while (!eof && (readSize = reader.read(TEXT_CHUNK_BUF, 0, SINGLE_READ_CHARS)) != -1) {
while (!eof && (readSize = reader.read(textChunkBuf, 0, SINGLE_READ_CHARS)) != -1) {
totalRead += readSize;
//consume more bytes to fill entire chunk (leave EXTRA_CHARS to end the word)
while ((totalRead < MAX_EXTR_TEXT_CHARS - SINGLE_READ_CHARS - EXTRA_CHARS)
&& (readSize = reader.read(TEXT_CHUNK_BUF, (int) totalRead, SINGLE_READ_CHARS)) != -1) {
&& (readSize = reader.read(textChunkBuf, (int) totalRead, SINGLE_READ_CHARS)) != -1) {
totalRead += readSize;
}
if (readSize == -1) {
@ -136,8 +137,8 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
} else {
//try to read until whitespace to not break words
while ((totalRead < MAX_EXTR_TEXT_CHARS - 1)
&& !Character.isWhitespace(TEXT_CHUNK_BUF[(int) totalRead - 1])
&& (readSize = reader.read(TEXT_CHUNK_BUF, (int) totalRead, 1)) != -1) {
&& !Character.isWhitespace(textChunkBuf[(int) totalRead - 1])
&& (readSize = reader.read(textChunkBuf, (int) totalRead, 1)) != -1) {
totalRead += readSize;
}
if (readSize == -1) {
@ -156,9 +157,9 @@ import org.sleuthkit.datamodel.ReadContentInputStream;
//inject BOM here (saves byte buffer realloc later), will be converted to specific encoding BOM
//sb.append(UTF16BOM); disabled BOM, not needing as bypassing Tika
if (totalRead < MAX_EXTR_TEXT_CHARS) {
sb.append(TEXT_CHUNK_BUF, 0, (int) totalRead);
sb.append(textChunkBuf, 0, (int) totalRead);
} else {
sb.append(TEXT_CHUNK_BUF);
sb.append(textChunkBuf);
}
//reset for next chunk

View File

@ -36,34 +36,32 @@ import org.sleuthkit.datamodel.AbstractFile;
* the original source file) up to 1MB then and indexes chunks as text with Solr
*/
class AbstractFileStringExtract implements AbstractFileExtract {
private KeywordSearchIngestModule module;
private Ingester ingester;
private int numChunks;
private static Ingester ingester;
private static final Logger logger = Logger.getLogger(AbstractFileStringExtract.class.getName());
static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
private AbstractFile sourceFile;
//single static buffer for all extractions. Safe, indexing can only happen in one thread
private static final byte[] STRING_CHUNK_BUF = new byte[(int) MAX_STRING_CHUNK_SIZE];
private static final long MAX_STRING_CHUNK_SIZE = 1 * 1024 * 1024L;
//private static final int BOM_LEN = 3;
private static final int BOM_LEN = 0; //disabled prepending of BOM
private static final Charset INDEX_CHARSET = Server.DEFAULT_INDEXED_TEXT_CHARSET;
private static final SCRIPT DEFAULT_SCRIPT = SCRIPT.LATIN_2;
private final byte[] stringChunkBuf = new byte[(int) MAX_STRING_CHUNK_SIZE];
private KeywordSearchIngestModule module;
private AbstractFile sourceFile;
private int numChunks = 0;
private final List<SCRIPT> extractScripts = new ArrayList<SCRIPT>();
private Map<String, String> extractOptions = new HashMap<String, String>();
private Map<String, String> extractOptions = new HashMap<String, String>();
//disabled prepending of BOM
//static {
//prepend UTF-8 BOM to start of the buffer
//STRING_CHUNK_BUF[0] = (byte) 0xEF;
//STRING_CHUNK_BUF[1] = (byte) 0xBB;
//STRING_CHUNK_BUF[2] = (byte) 0xBF;
//stringChunkBuf[0] = (byte) 0xEF;
//stringChunkBuf[1] = (byte) 0xBB;
//stringChunkBuf[2] = (byte) 0xBF;
//}
public AbstractFileStringExtract(KeywordSearchIngestModule module) {
this.module = module;
this.ingester = Server.getIngester();
this.extractScripts.add(DEFAULT_SCRIPT);
ingester = Server.getIngester();
extractScripts.add(DEFAULT_SCRIPT);
}
@Override
@ -132,14 +130,14 @@ class AbstractFileStringExtract implements AbstractFileExtract {
//break input stream into chunks
long readSize = 0;
while ((readSize = stringStream.read(STRING_CHUNK_BUF, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) {
while ((readSize = stringStream.read(stringChunkBuf, BOM_LEN, (int) MAX_STRING_CHUNK_SIZE - BOM_LEN)) != -1) {
//FileOutputStream debug = new FileOutputStream("c:\\temp\\" + sourceFile.getName() + Integer.toString(this.numChunks+1));
//debug.write(STRING_CHUNK_BUF, 0, (int)readSize);
//debug.write(stringChunkBuf, 0, (int)readSize);
AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);
try {
chunk.index(ingester, STRING_CHUNK_BUF, readSize + BOM_LEN, INDEX_CHARSET);
chunk.index(ingester, stringChunkBuf, readSize + BOM_LEN, INDEX_CHARSET);
++this.numChunks;
} catch (IngesterException ingEx) {
success = false;

View File

@ -34,15 +34,14 @@ import org.sleuthkit.datamodel.TskCoreException;
* object, extract international strings from the file and read output as a
* stream of UTF-8 strings as encoded bytes.
*
* Currently not-thread safe (reusing static buffers for efficiency)
*/
class AbstractFileStringIntStream extends InputStream {
private static final Logger logger = Logger.getLogger(AbstractFileStringIntStream.class.getName());
private static final int FILE_BUF_SIZE = 1024 * 1024;
private AbstractFile content;
private final byte[] oneCharBuf = new byte[1];
private final StringExtract stringExtractor;
private static final int FILE_BUF_SIZE = 1024 * 1024;
private static final byte[] fileReadBuff = new byte[FILE_BUF_SIZE]; //NOTE: need to run all stream extraction in same thread
private final StringExtract stringExtractor;
private final byte[] fileReadBuff = new byte[FILE_BUF_SIZE];
private long fileReadOffset = 0L;
private byte[] convertBuff; //stores extracted string encoded as bytes, before returned to user
private int convertBuffOffset = 0; //offset to start returning data to user on next read()
@ -51,7 +50,7 @@ import org.sleuthkit.datamodel.TskCoreException;
private boolean extractUTF8;
private boolean extractUTF16;
private Charset outCharset;
private static final Logger logger = Logger.getLogger(AbstractFileStringIntStream.class.getName());
private StringExtractResult lastExtractResult;
/**

View File

@ -42,9 +42,11 @@ import org.sleuthkit.datamodel.TskException;
private AbstractFile content;
private Charset outputCharset;
//internal data
private long contentOffset = 0; //offset in fscontent read into curReadBuf
private static final Logger logger = Logger.getLogger(AbstractFileStringStream.class.getName());
private static final String NLS = Character.toString((char) 10); //new line
private static final int READ_BUF_SIZE = 256;
private static final byte[] curReadBuf = new byte[READ_BUF_SIZE];
private long contentOffset = 0; //offset in fscontent read into curReadBuf
private final byte[] curReadBuf = new byte[READ_BUF_SIZE];
private int bytesInReadBuf = 0;
private int readBufOffset = 0; //offset in read buf processed
private StringBuilder curString = new StringBuilder();
@ -55,10 +57,8 @@ import org.sleuthkit.datamodel.TskException;
private boolean stringAtTempBoundary = false; //if temp has part of string that didn't make it in previous read()
private boolean stringAtBufBoundary = false; //if read buffer has string being processed, continue as string from prev read() in next read()
private boolean inString = false; //if current temp has min chars required
private static final byte[] oneCharBuf = new byte[1];
private final byte[] oneCharBuf = new byte[1];
private final int MIN_PRINTABLE_CHARS = 4; //num. of chars needed to qualify as a char string
private static final String NLS = Character.toString((char) 10); //new line
private static final Logger logger = Logger.getLogger(AbstractFileStringStream.class.getName());
/**
* Construct new string stream from FsContent

View File

@ -57,17 +57,16 @@ import org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException;
class AbstractFileTikaTextExtract implements AbstractFileExtract {
private static final Logger logger = Logger.getLogger(AbstractFileTikaTextExtract.class.getName());
private static Ingester ingester;
private static final Charset OUTPUT_CHARSET = Server.DEFAULT_INDEXED_TEXT_CHARSET;
static final int MAX_EXTR_TEXT_CHARS = 512 * 1024;
private static final int MAX_EXTR_TEXT_CHARS = 512 * 1024;
private static final int SINGLE_READ_CHARS = 1024;
private static final int EXTRA_CHARS = 128; //for whitespace
private static final char[] TEXT_CHUNK_BUF = new char[MAX_EXTR_TEXT_CHARS];
//private Tika tika;
private KeywordSearchIngestModule module;
private static Ingester ingester;
private AbstractFile sourceFile; //currently processed file
private int numChunks = 0;
//private static final String UTF16BOM = "\uFEFF"; disabled prepending of BOM
private final char[] textChunkBuf = new char[MAX_EXTR_TEXT_CHARS];
private KeywordSearchIngestModule module;
private AbstractFile sourceFile; //currently processed file
private int numChunks = 0;
private final ExecutorService tikaParseExecutor = Executors.newSingleThreadExecutor();
private final List<String> TIKA_SUPPORTED_TYPES = new ArrayList<>();
@ -80,7 +79,6 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
TIKA_SUPPORTED_TYPES.add(mt.getType() + "/" + mt.getSubtype());
}
logger.log(Level.INFO, "Tika supported media types: {0}", TIKA_SUPPORTED_TYPES);
}
@Override
@ -115,7 +113,7 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
@Override
public boolean index(AbstractFile sourceFile) throws Ingester.IngesterException {
this.sourceFile = sourceFile;
this.numChunks = 0; //unknown until indexing is done
numChunks = 0; //unknown until indexing is done
boolean success = false;
Reader reader = null;
@ -156,12 +154,12 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
long totalRead = 0;
boolean eof = false;
//we read max 1024 chars at time, this seems to max what this Reader would return
while (!eof && (readSize = reader.read(TEXT_CHUNK_BUF, 0, SINGLE_READ_CHARS)) != -1) {
while (!eof && (readSize = reader.read(textChunkBuf, 0, SINGLE_READ_CHARS)) != -1) {
totalRead += readSize;
//consume more bytes to fill entire chunk (leave EXTRA_CHARS to end the word)
while ((totalRead < MAX_EXTR_TEXT_CHARS - SINGLE_READ_CHARS - EXTRA_CHARS)
&& (readSize = reader.read(TEXT_CHUNK_BUF, (int) totalRead, SINGLE_READ_CHARS)) != -1) {
&& (readSize = reader.read(textChunkBuf, (int) totalRead, SINGLE_READ_CHARS)) != -1) {
totalRead += readSize;
}
if (readSize == -1) {
@ -170,8 +168,8 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
} else {
//try to read char-by-char until whitespace to not break words
while ((totalRead < MAX_EXTR_TEXT_CHARS - 1)
&& !Character.isWhitespace(TEXT_CHUNK_BUF[(int) totalRead - 1])
&& (readSize = reader.read(TEXT_CHUNK_BUF, (int) totalRead, 1)) != -1) {
&& !Character.isWhitespace(textChunkBuf[(int) totalRead - 1])
&& (readSize = reader.read(textChunkBuf, (int) totalRead, 1)) != -1) {
totalRead += readSize;
}
if (readSize == -1) {
@ -189,9 +187,9 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
//inject BOM here (saves byte buffer realloc later), will be converted to specific encoding BOM
//sb.append(UTF16BOM); disabled prepending of BOM
if (totalRead < MAX_EXTR_TEXT_CHARS) {
sb.append(TEXT_CHUNK_BUF, 0, (int) totalRead);
sb.append(textChunkBuf, 0, (int) totalRead);
} else {
sb.append(TEXT_CHUNK_BUF);
sb.append(textChunkBuf);
}
//reset for next chunk
@ -211,6 +209,7 @@ class AbstractFileTikaTextExtract implements AbstractFileExtract {
extracted = sb.toString();
//converts BOM automatically to charSet encoding
byte[] encodedBytes = extracted.getBytes(OUTPUT_CHARSET);
AbstractFileChunk chunk = new AbstractFileChunk(this, this.numChunks + 1);

View File

@ -395,7 +395,7 @@ class Ingester {
try {
solrServer.closeCore();
} catch (KeywordSearchModuleException ex) {
logger.log(Level.WARNING, "Cannot close core while restating", ex);
logger.log(Level.WARNING, "Cannot close core", ex);
}
solrServer.stop();
@ -403,7 +403,7 @@ class Ingester {
try {
solrServer.start();
} catch (KeywordSearchModuleException ex) {
logger.log(Level.WARNING, "Cannot start while restating", ex);
logger.log(Level.WARNING, "Cannot start", ex);
} catch (SolrServerNoPortException ex) {
logger.log(Level.WARNING, "Cannot start server with this port", ex);
}
@ -411,7 +411,7 @@ class Ingester {
try {
solrServer.openCore();
} catch (KeywordSearchModuleException ex) {
logger.log(Level.WARNING, "Cannot open core while restating", ex);
logger.log(Level.WARNING, "Cannot open core", ex);
}
}

View File

@ -57,7 +57,7 @@ class KeywordSearchPanel extends javax.swing.JPanel {
/**
* @return the default instance KeywordSearchPanel
*/
public static KeywordSearchPanel getDefault() {
public synchronized static KeywordSearchPanel getDefault() {
if (instance == null) {
instance = new KeywordSearchPanel();
}

View File

@ -57,7 +57,7 @@ public final class SearchRunner {
private static final Logger logger = Logger.getLogger(SearchRunner.class.getName());
private static SearchRunner instance = null;
private IngestServices services = IngestServices.getInstance();
private Ingester ingester = null; //guarded by "ingester"
private Ingester ingester = null;
private volatile boolean updateTimerRunning = false;
private Timer updateTimer;
private Map<Long, SearchJobInfo> jobs = new HashMap<>(); //guarded by "this"
@ -173,9 +173,7 @@ public final class SearchRunner {
* Commits index and notifies listeners of index update
*/
private void commit() {
synchronized(ingester) {
ingester.commit();
}
ingester.commit();
// Signal a potential change in number of text_ingested files
try {