Merge pull request #5350 from sleuthkit/develop

Merge develop branch into data-src-deletion branch
This commit is contained in:
Richard Cordovano 2019-10-17 12:57:06 -04:00 committed by GitHub
commit ab19fd7dc2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 172 additions and 96 deletions

View File

@ -22,6 +22,9 @@ import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.InvalidPathException;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.util.ArrayList; import java.util.ArrayList;
@ -57,6 +60,7 @@ import org.openide.util.NbBundle;
import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException; import org.sleuthkit.autopsy.casemodule.NoCurrentCaseException;
import org.sleuthkit.autopsy.casemodule.services.FileManager; import org.sleuthkit.autopsy.casemodule.services.FileManager;
import static org.sleuthkit.autopsy.coreutils.FileUtil.escapeFileName;
import org.sleuthkit.autopsy.coreutils.Logger; import org.sleuthkit.autopsy.coreutils.Logger;
import org.sleuthkit.autopsy.ingest.IngestJobContext; import org.sleuthkit.autopsy.ingest.IngestJobContext;
import org.sleuthkit.autopsy.ingest.IngestServices; import org.sleuthkit.autopsy.ingest.IngestServices;
@ -159,7 +163,8 @@ class DocumentEmbeddedContentExtractor {
void extractEmbeddedContent(AbstractFile abstractFile) { void extractEmbeddedContent(AbstractFile abstractFile) {
List<ExtractedFile> listOfExtractedImages = null; List<ExtractedFile> listOfExtractedImages = null;
List<AbstractFile> listOfExtractedImageAbstractFiles = null; List<AbstractFile> listOfExtractedImageAbstractFiles = null;
this.parentFileName = EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile); //save the parent file name with out illegal windows characters
this.parentFileName = utf8SanitizeFileName(EmbeddedFileExtractorIngestModule.getUniqueName(abstractFile));
// Skip files that already have been unpacked. // Skip files that already have been unpacked.
try { try {
@ -289,7 +294,6 @@ class DocumentEmbeddedContentExtractor {
// These get thrown in certain images. The reason is unknown. It is // These get thrown in certain images. The reason is unknown. It is
// likely due to problems with the file formats that POI is poorly // likely due to problems with the file formats that POI is poorly
// handling. // handling.
//Any runtime exception escaping //Any runtime exception escaping
LOGGER.log(Level.WARNING, "Word document container could not be initialized. Reason: {0}", ex.getMessage()); //NON-NLS LOGGER.log(Level.WARNING, "Word document container could not be initialized. Reason: {0}", ex.getMessage()); //NON-NLS
return null; return null;
@ -308,7 +312,7 @@ class DocumentEmbeddedContentExtractor {
byte[] data = null; byte[] data = null;
int pictureNumber = 0; //added to ensure uniqueness in cases where suggestFullFileName returns duplicates int pictureNumber = 0; //added to ensure uniqueness in cases where suggestFullFileName returns duplicates
for (Picture picture : listOfAllPictures) { for (Picture picture : listOfAllPictures) {
String fileName = UNKNOWN_IMAGE_NAME_PREFIX +pictureNumber +"."+ picture.suggestFileExtension(); String fileName = UNKNOWN_IMAGE_NAME_PREFIX + pictureNumber + "." + picture.suggestFileExtension();
try { try {
data = picture.getContent(); data = picture.getContent();
} catch (Exception ex) { } catch (Exception ex) {
@ -479,6 +483,7 @@ class DocumentEmbeddedContentExtractor {
* Extracts embedded attachments from PDF files. * Extracts embedded attachments from PDF files.
* *
* @param abstractFile Input PDF file * @param abstractFile Input PDF file
*
* @return List of extracted files to be made into derived file instances. * @return List of extracted files to be made into derived file instances.
*/ */
private List<ExtractedFile> extractEmbeddedContentFromPDF(AbstractFile abstractFile) { private List<ExtractedFile> extractEmbeddedContentFromPDF(AbstractFile abstractFile) {
@ -501,8 +506,8 @@ class DocumentEmbeddedContentExtractor {
}); });
return extractedFiles; return extractedFiles;
} catch (IOException | SAXException | TikaException ex) { } catch (IOException | SAXException | TikaException | InvalidPathException ex) {
LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs", ex); //NON-NLS LOGGER.log(Level.WARNING, "Error attempting to extract attachments from PDFs for file Name: " + abstractFile.getName() + " ID: " + abstractFile.getId(), ex); //NON-NLS
} }
return Collections.emptyList(); return Collections.emptyList();
} }
@ -557,6 +562,19 @@ class DocumentEmbeddedContentExtractor {
return Paths.get(moduleDirRelative, this.parentFileName, fileName).toString(); return Paths.get(moduleDirRelative, this.parentFileName, fileName).toString();
} }
/**
* UTF-8 sanitize and escape special characters in a file name or a file
* name component
*
* @param fileName to escape
*
* @return Sanitized string
*/
private static String utf8SanitizeFileName(String fileName) {
Charset charset = StandardCharsets.UTF_8;
return charset.decode(charset.encode(escapeFileName(fileName))).toString();
}
/** /**
* Represents a file extracted using either Tika or POI methods. Currently, * Represents a file extracted using either Tika or POI methods. Currently,
* POI is not capable of extracting ctime, crtime, mtime, and atime; these * POI is not capable of extracting ctime, crtime, mtime, and atime; these
@ -669,6 +687,8 @@ class DocumentEmbeddedContentExtractor {
//that might be included in the name) and make sure //that might be included in the name) and make sure
//to normalize the name //to normalize the name
name = FilenameUtils.normalize(FilenameUtils.getName(name)); name = FilenameUtils.normalize(FilenameUtils.getName(name));
//remove any illegal characters from name
name = utf8SanitizeFileName(name);
} }
// Get the suggested extension based on mime type. // Get the suggested extension based on mime type.

View File

@ -45,33 +45,69 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
private static final Charset UTF_8 = StandardCharsets.UTF_8; private static final Charset UTF_8 = StandardCharsets.UTF_8;
//Chunking algorithm paramaters-------------------------------------// //Chunking algorithm paramaters-------------------------------------//
/** the maximum size of a chunk, including the window. */ /**
* the maximum size of a chunk, including the window.
*/
private static final int MAX_TOTAL_CHUNK_SIZE = 32760; //bytes private static final int MAX_TOTAL_CHUNK_SIZE = 32760; //bytes
/** the minimum to read before we start the process of looking for /**
* whitespace to break at and creating an overlapping window. */ * the minimum to read before we start the process of looking for whitespace
* to break at and creating an overlapping window.
*/
private static final int MINIMUM_BASE_CHUNK_SIZE = 30 * 1024; //bytes private static final int MINIMUM_BASE_CHUNK_SIZE = 30 * 1024; //bytes
/** The maximum size of the chunk, before the overlapping window, even if we /**
* couldn't find whitespace to break at. */ * The maximum size of the chunk, before the overlapping window, even if we
* couldn't find whitespace to break at.
*/
private static final int MAXIMUM_BASE_CHUNK_SIZE = 31 * 1024; //bytes private static final int MAXIMUM_BASE_CHUNK_SIZE = 31 * 1024; //bytes
/** The amount of text we will read through before we give up on finding /**
* whitespace to break the chunk/window at. */ * The amount of text we will read through before we give up on finding
* whitespace to break the chunk/window at.
*/
private static final int WHITE_SPACE_BUFFER_SIZE = 512; //bytes private static final int WHITE_SPACE_BUFFER_SIZE = 512; //bytes
/** The number of characters to read in one go from the Reader. */ /**
* The number of characters to read in one go from the Reader.
*/
private static final int READ_CHARS_BUFFER_SIZE = 512; //chars private static final int READ_CHARS_BUFFER_SIZE = 512; //chars
/**
* When toLowerCase() is called on a character, the lower cased output
* can be different in size than the original input. I have seen a single
* input character turn into 3 characters (and 5 bytes) after lowercasing.
* I could not find any info as to what is the upper limit of how much a
* character can "increase in size" during lower casing. I'm guestimating
* and setting that limit at 10 bytes.
*/
private static final int MAX_CHAR_SIZE_INCREASE_IN_BYTES = 10; //bytes
////chunker state--------------------------------------------/// ////chunker state--------------------------------------------///
/** The Reader that this chunk reads from, and divides into chunks. It must /**
* be a buffered reader to ensure that mark/reset are supported. */ * The Reader that this chunk reads from, and divides into chunks. It must
* be a buffered reader to ensure that mark/reset are supported.
*/
private final PushbackReader reader; private final PushbackReader reader;
/** The local buffer of characters read from the Reader. */ /**
* The local buffer of characters read from the Reader.
*/
private final char[] tempChunkBuf = new char[READ_CHARS_BUFFER_SIZE]; private final char[] tempChunkBuf = new char[READ_CHARS_BUFFER_SIZE];
/** the size in bytes of the chunk (so far). */ /**
* the size in bytes of the chunk (so far).
*/
private int chunkSizeBytes = 0; private int chunkSizeBytes = 0;
/** Has the chunker reached the end of the Reader? If so, there are no more
* chunks, and the current chunk does not need a window. */ /**
* the size in bytes of the lowercased chunk (so far). Note that lowercasing
* in Java can change the size of the string so we need to make sure the
* lowercased string also fits in MAX_TOTAL_CHUNK_SIZE.
*/
private int lowerCasedChunkSizeBytes = 0;
/**
* Has the chunker reached the end of the Reader? If so, there are no more
* chunks, and the current chunk does not need a window.
*/
private boolean endOfReaderReached = false; private boolean endOfReaderReached = false;
/** Store any exception encountered reading from the Reader. */ /**
* Store any exception encountered reading from the Reader.
*/
private Exception ex; private Exception ex;
/** /**
@ -140,7 +176,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
* @param s The string to cleanup. * @param s The string to cleanup.
* *
* @return A StringBuilder with the same content as s but where all invalid * @return A StringBuilder with the same content as s but where all invalid
* code * points have been replaced. * code * points have been replaced.
*/ */
private static StringBuilder replaceInvalidUTF16(String s) { private static StringBuilder replaceInvalidUTF16(String s) {
/* encode the string to UTF-16 which does the replcement, see /* encode the string to UTF-16 which does the replcement, see
@ -162,16 +198,18 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
//reset state for the next chunk //reset state for the next chunk
chunkSizeBytes = 0; chunkSizeBytes = 0;
lowerCasedChunkSizeBytes = 0;
int baseChunkSizeChars = 0; int baseChunkSizeChars = 0;
StringBuilder currentChunk = new StringBuilder(); StringBuilder currentChunk = new StringBuilder();
StringBuilder currentWindow = new StringBuilder(); StringBuilder currentWindow = new StringBuilder();
StringBuilder lowerCasedChunk = new StringBuilder();
try { try {
currentChunk.append(readBaseChunk()); readBaseChunk(currentChunk, lowerCasedChunk);
baseChunkSizeChars = currentChunk.length(); //save the base chunk length baseChunkSizeChars = currentChunk.length(); //save the base chunk length
currentWindow.append(readWindow()); readWindow(currentWindow, lowerCasedChunk);
//add the window text to the current chunk. //add the window text to the current chunk.
currentChunk.append(currentWindow); currentChunk.append(currentWindow);
if (endOfReaderReached) { if (endOfReaderReached) {
/* if we have reached the end of the content,we won't make /* if we have reached the end of the content,we won't make
* another overlapping chunk, so the length of the base chunk * another overlapping chunk, so the length of the base chunk
@ -188,7 +226,7 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
} }
//sanitize the text and return a Chunk object, that includes the base chunk length. //sanitize the text and return a Chunk object, that includes the base chunk length.
return new Chunk(currentChunk, baseChunkSizeChars, chunkSizeBytes); return new Chunk(currentChunk, baseChunkSizeChars, lowerCasedChunk);
} }
/** /**
@ -196,14 +234,12 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
* *
* @throws IOException if there is a problem reading from the reader. * @throws IOException if there is a problem reading from the reader.
*/ */
private StringBuilder readBaseChunk() throws IOException { private void readBaseChunk(StringBuilder currentChunk, StringBuilder lowerCasedChunk) throws IOException {
StringBuilder currentChunk = new StringBuilder();
//read the chunk until the minimum base chunk size //read the chunk until the minimum base chunk size
readHelper(MINIMUM_BASE_CHUNK_SIZE, currentChunk); readHelper(MINIMUM_BASE_CHUNK_SIZE, currentChunk, lowerCasedChunk);
//keep reading until the maximum base chunk size or white space is reached. //keep reading until the maximum base chunk size or white space is reached.
readToWhiteSpaceHelper(MAXIMUM_BASE_CHUNK_SIZE, currentChunk); readToWhiteSpaceHelper(MAXIMUM_BASE_CHUNK_SIZE, currentChunk, lowerCasedChunk);
return currentChunk;
} }
/** /**
@ -211,14 +247,12 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
* *
* @throws IOException if there is a problem reading from the reader. * @throws IOException if there is a problem reading from the reader.
*/ */
private StringBuilder readWindow() throws IOException { private void readWindow(StringBuilder currentChunk, StringBuilder lowerCasedChunk) throws IOException {
StringBuilder currentWindow = new StringBuilder();
//read the window, leaving some room to look for white space to break at. //read the window, leaving some room to look for white space to break at.
readHelper(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, currentWindow); readHelper(MAX_TOTAL_CHUNK_SIZE - WHITE_SPACE_BUFFER_SIZE, currentChunk, lowerCasedChunk);
//keep reading until the max chunk size, or until whitespace is reached. //keep reading until the max chunk size, or until whitespace is reached.
readToWhiteSpaceHelper(MAX_TOTAL_CHUNK_SIZE, currentWindow); readToWhiteSpaceHelper(MAX_TOTAL_CHUNK_SIZE, currentChunk, lowerCasedChunk);
return currentWindow;
} }
/** /**
@ -229,10 +263,10 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
* *
* @throws IOException * @throws IOException
*/ */
private void readHelper(int maxBytes, StringBuilder currentSegment) throws IOException { private void readHelper(int maxBytes, StringBuilder currentSegment, StringBuilder currentLowerCasedSegment) throws IOException {
int charsRead = 0; int charsRead = 0;
//read chars up to maxBytes, or the end of the reader. //read chars up to maxBytes, or the end of the reader.
while ((chunkSizeBytes < maxBytes) while ((chunkSizeBytes < maxBytes) && (lowerCasedChunkSizeBytes < maxBytes)
&& (endOfReaderReached == false)) { && (endOfReaderReached == false)) {
charsRead = reader.read(tempChunkBuf, 0, READ_CHARS_BUFFER_SIZE); charsRead = reader.read(tempChunkBuf, 0, READ_CHARS_BUFFER_SIZE);
if (-1 == charsRead) { if (-1 == charsRead) {
@ -253,11 +287,19 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
//get the length in utf8 bytes of the read chars //get the length in utf8 bytes of the read chars
int segmentSize = chunkSegment.toString().getBytes(UTF_8).length; int segmentSize = chunkSegment.toString().getBytes(UTF_8).length;
// lower case the string and get it's size. NOTE: lower casing can
// change the size of the string!
String lowerCasedSegment = chunkSegment.toString().toLowerCase();
int lowerCasedSegmentSize = lowerCasedSegment.getBytes(UTF_8).length;
//if it will not put us past maxBytes //if it will not put us past maxBytes
if (chunkSizeBytes + segmentSize < maxBytes) { if ((chunkSizeBytes + segmentSize < maxBytes) && (lowerCasedChunkSizeBytes + lowerCasedSegmentSize < maxBytes)) {
//add it to the chunk //add it to the chunk
currentSegment.append(chunkSegment); currentSegment.append(chunkSegment);
chunkSizeBytes += segmentSize; chunkSizeBytes += segmentSize;
currentLowerCasedSegment.append(lowerCasedSegment);
lowerCasedChunkSizeBytes += lowerCasedSegmentSize;
} else { } else {
//unread it, and break out of read loop. //unread it, and break out of read loop.
reader.unread(tempChunkBuf, 0, charsRead); reader.unread(tempChunkBuf, 0, charsRead);
@ -275,11 +317,12 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
* *
* @throws IOException * @throws IOException
*/ */
private void readToWhiteSpaceHelper(int maxBytes, StringBuilder currentChunk) throws IOException { private void readToWhiteSpaceHelper(int maxBytes, StringBuilder currentChunk, StringBuilder lowerCasedChunk) throws IOException {
int charsRead = 0; int charsRead = 0;
boolean whitespaceFound = false; boolean whitespaceFound = false;
//read 1 char at a time up to maxBytes, whitespaceFound, or we reach the end of the reader. //read 1 char at a time up to maxBytes, whitespaceFound, or we reach the end of the reader.
while ((chunkSizeBytes < maxBytes) while ((chunkSizeBytes < maxBytes - MAX_CHAR_SIZE_INCREASE_IN_BYTES)
&& (lowerCasedChunkSizeBytes < maxBytes - MAX_CHAR_SIZE_INCREASE_IN_BYTES)
&& (whitespaceFound == false) && (whitespaceFound == false)
&& (endOfReaderReached == false)) { && (endOfReaderReached == false)) {
charsRead = reader.read(tempChunkBuf, 0, 1); charsRead = reader.read(tempChunkBuf, 0, 1);
@ -314,6 +357,12 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
//add read chars to the chunk and update the length. //add read chars to the chunk and update the length.
currentChunk.append(sanitizedChunkSegment); currentChunk.append(sanitizedChunkSegment);
chunkSizeBytes += sanitizedChunkSegment.toString().getBytes(UTF_8).length; chunkSizeBytes += sanitizedChunkSegment.toString().getBytes(UTF_8).length;
// lower case the string and get it's size. NOTE: lower casing can
// change the size of the string.
String lowerCasedSegment = sanitizedChunkSegment.toString().toLowerCase();
lowerCasedChunk.append(lowerCasedSegment);
lowerCasedChunkSizeBytes += lowerCasedSegment.getBytes(UTF_8).length;
} }
} }
} }
@ -326,16 +375,16 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
private final StringBuilder sb; private final StringBuilder sb;
private final int baseChunkSizeChars; private final int baseChunkSizeChars;
private final int chunkSizeBytes; private final StringBuilder lowerCasedChunk;
Chunk(StringBuilder sb, int baseChunkSizeChars, int chunkSizeBytes) { Chunk(StringBuilder sb, int baseChunkSizeChars, StringBuilder lowerCasedChunk) {
this.sb = sb; this.sb = sb;
this.baseChunkSizeChars = baseChunkSizeChars; this.baseChunkSizeChars = baseChunkSizeChars;
this.chunkSizeBytes = chunkSizeBytes; this.lowerCasedChunk = lowerCasedChunk;
} }
/** /**
* Get the content of the chunk. * Get the content of the original (non-lower cased) chunk.
* *
* @return The content of the chunk. * @return The content of the chunk.
*/ */
@ -345,16 +394,16 @@ class Chunker implements Iterator<Chunk>, Iterable<Chunk> {
} }
/** /**
* Get the size in bytes of the utf-8 encoding of the entire chunk. * Get the content of the lower cased chunk.
* *
* @return the size in bytes of the utf-8 encoding of the entire chunk * @return The content of the chunk.
*/ */
public int getChunkSizeBytes() { public String geLowerCasedChunk() {
return chunkSizeBytes; return lowerCasedChunk.toString();
} }
/** /**
* Get the length of the base chunk in java chars. * Get the length of the original (non-lower cased) base chunk in java chars.
* *
* @return the length of the base chunk in java chars. * @return the length of the base chunk in java chars.
*/ */

View File

@ -391,7 +391,7 @@ class HighlightedText implements IndexedText {
} }
//tune the highlighter //tune the highlighter
if (shouldUseOriginalHighlighter(contentIdStr)) { if (shouldUseOriginalHighlighter(filterQuery)) {
// use original highlighter // use original highlighter
q.setParam("hl.useFastVectorHighlighter", "off"); q.setParam("hl.useFastVectorHighlighter", "off");
q.setParam("hl.simple.pre", HIGHLIGHT_PRE); q.setParam("hl.simple.pre", HIGHLIGHT_PRE);
@ -618,11 +618,13 @@ class HighlightedText implements IndexedText {
* > <em></em><em>降っ</em>ています * > <em></em><em>降っ</em>ています
* Unified highlighter (from Solr 6.4) handles the case as expected: * Unified highlighter (from Solr 6.4) handles the case as expected:
* > <em>雨が降っ</em>ています * > <em>雨が降っ</em>ています
*
* @param filterQuery An already properly escaped filter query.
*/ */
private boolean shouldUseOriginalHighlighter(String contentID) throws NoOpenCoreException, KeywordSearchModuleException { private boolean shouldUseOriginalHighlighter(String filterQuery) throws NoOpenCoreException, KeywordSearchModuleException {
final SolrQuery q = new SolrQuery(); final SolrQuery q = new SolrQuery();
q.setQuery("*:*"); q.setQuery("*:*");
q.addFilterQuery(Server.Schema.ID.toString() + ":" + contentID); q.addFilterQuery(filterQuery);
q.setFields(Server.Schema.LANGUAGE.toString()); q.setFields(Server.Schema.LANGUAGE.toString());
QueryResponse response = solrServer.query(q, METHOD.POST); QueryResponse response = solrServer.query(q, METHOD.POST);

View File

@ -97,7 +97,7 @@ class Ingester {
* file, but the Solr server is probably fine. * file, but the Solr server is probably fine.
*/ */
void indexMetaDataOnly(AbstractFile file) throws IngesterException { void indexMetaDataOnly(AbstractFile file) throws IngesterException {
indexChunk("", file.getName().toLowerCase(), new HashMap<>(getContentFields(file))); indexChunk("", "", file.getName().toLowerCase(), new HashMap<>(getContentFields(file)));
} }
/** /**
@ -111,7 +111,7 @@ class Ingester {
* artifact, but the Solr server is probably fine. * artifact, but the Solr server is probably fine.
*/ */
void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException { void indexMetaDataOnly(BlackboardArtifact artifact, String sourceName) throws IngesterException {
indexChunk("", sourceName, new HashMap<>(getContentFields(artifact))); indexChunk("", "", sourceName, new HashMap<>(getContentFields(artifact)));
} }
/** /**
@ -166,7 +166,7 @@ class Ingester {
language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang)); language.ifPresent(lang -> languageSpecificContentIndexingHelper.updateLanguageSpecificFields(fields, chunk, lang));
try { try {
//add the chunk text to Solr index //add the chunk text to Solr index
indexChunk(chunk.toString(), sourceName, fields); indexChunk(chunk.toString(), chunk.geLowerCasedChunk(), sourceName, fields);
// add mini chunk when there's a language specific field // add mini chunk when there's a language specific field
if (chunker.hasNext() && language.isPresent()) { if (chunker.hasNext() && language.isPresent()) {
languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get()); languageSpecificContentIndexingHelper.indexMiniChunk(chunk, sourceName, new HashMap<>(contentFields), chunkId, language.get());
@ -197,7 +197,7 @@ class Ingester {
fields.put(Server.Schema.ID.toString(), Long.toString(sourceID)); fields.put(Server.Schema.ID.toString(), Long.toString(sourceID));
//"parent" docs don't have chunk_size //"parent" docs don't have chunk_size
fields.remove(Server.Schema.CHUNK_SIZE.toString()); fields.remove(Server.Schema.CHUNK_SIZE.toString());
indexChunk(null, sourceName, fields); indexChunk(null, null, sourceName, fields);
} }
} }
return true; return true;
@ -211,12 +211,13 @@ class Ingester {
* 4.0.0), see if possible to stream with UpdateRequestHandler * 4.0.0), see if possible to stream with UpdateRequestHandler
* *
* @param chunk The chunk content as a string, or null for metadata only * @param chunk The chunk content as a string, or null for metadata only
* @param lowerCasedChunk The lower cased chunk content as a string, or null for metadata only
* @param fields * @param fields
* @param size * @param size
* *
* @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException * @throws org.sleuthkit.autopsy.keywordsearch.Ingester.IngesterException
*/ */
private void indexChunk(String chunk, String sourceName, Map<String, Object> fields) throws IngesterException { private void indexChunk(String chunk, String lowerCasedChunk, String sourceName, Map<String, Object> fields) throws IngesterException {
if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) { if (fields.get(Server.Schema.IMAGE_ID.toString()) == null) {
//JMTODO: actually if the we couldn't get the image id it is set to -1, //JMTODO: actually if the we couldn't get the image id it is set to -1,
// but does this really mean we don't want to index it? // but does this really mean we don't want to index it?
@ -245,7 +246,7 @@ class Ingester {
// insensitive substring/regular expression search. // insensitive substring/regular expression search.
double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion()); double indexSchemaVersion = NumberUtils.toDouble(solrServer.getIndexInfo().getSchemaVersion());
if (indexSchemaVersion >= 2.1) { if (indexSchemaVersion >= 2.1) {
updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : chunk.toLowerCase())); updateDoc.addField(Server.Schema.CONTENT_STR.toString(), ((chunk == null) ? "" : lowerCasedChunk));
} }
TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk"); TimingMetric metric = HealthMonitor.getTimingMetric("Solr: Index chunk");

View File

@ -72,11 +72,11 @@ GStreamer for viewing video files
- Web page: http://gstreamer.freedesktop.org/ - Web page: http://gstreamer.freedesktop.org/
- License: http://www.gnu.org/licenses/lgpl.html - License: http://www.gnu.org/licenses/lgpl.html
GStreamer-java for viewing video files GStreamer 1.x Java Core for viewing video files
- Web page: http://code.google.com/p/gstreamer-java/ - Web page: https://github.com/gstreamer-java/gst1-java-core
- License: http://www.gnu.org/licenses/lgpl.html - License: https://github.com/gstreamer-java/gst1-java-core/blob/master/LICENSE.md
Regripper for pulling recently activity Regripper for pulling recent activity
(Including custom plugins) (Including custom plugins)
- Web page: http://regripper.wordpress.com/ - Web page: http://regripper.wordpress.com/
- License: http://www.gnu.org/licenses/gpl.html - License: http://www.gnu.org/licenses/gpl.html

View File

@ -12,19 +12,23 @@ The following need to be done at least once. They do not need to be repeated for
- Install a Java 8 JRE and JavaFX 8 and set JAVA_HOME. - Install a Java 8 JRE and JavaFX 8 and set JAVA_HOME.
-- Linux: Any Java 8 version of OpenJDK/OpenJFX distribution should suffice. The following instructions use the Zulu Community distribution. -- Linux: Any Java 8 version of OpenJDK/OpenJFX distribution should suffice. The following instructions use the Zulu Community distribution.
1. Download a 64 bit Java 8 JRE for your specific platform from https://www.azul.com/downloads/zulu-community 1. Download a 64 bit Java 8 JRE for your specific platform from https://www.azul.com/downloads/zulu-community
2. Install the JRE. e.g. 'sudo apt install ./zulu8.40.0.25-ca-jre8.0.222-linux_amd64.deb' 2. Install the JRE. e.g. % sudo apt install ./zulu8.40.0.25-ca-jre8.0.222-linux_amd64.deb
3. Download a 64 bit Java 8 JavaFX for your specific platform from the same location. 3. Download a 64 bit Java 8 JavaFX for your specific platform from the same location.
4. Extract the contents of the JavaFX archive into the folder where the JRE was installed. 4. Extract the contents of the JavaFX archive into the folder where the JRE was installed.
e.g. 'cd /usr/lib/jvm/zre-8-amd64; sudo tar xzf ~/Downloads/zulu8.40.0.25-ca-fx-jre8.0.222-linux_x64.tar.gz --strip-components=1' e.g. % sudo tar xzf ~/Downloads/zulu8.40.0.25-ca-fx-jre8.0.222-linux_x64.tar.gz -C /usr/lib/jvm/zre-8-amd64 --strip-components=1
5. Confirm Java 8 is being found by running 'java -version'
6. Set JAVA_HOME environment variable to location of JRE installation (e.g. /usr/lib/jvm/zre-8-amd64)
NOTE: You may need to log out and back in again after setting JAVA_HOME before the Autopsy NOTE: You may need to log out and back in again after setting JAVA_HOME before the Autopsy
unix_setup.sh script can see the value. unix_setup.sh script can see the value.
-- OS X: Use The Oracle website: https://www.java.com/ -- OS X: Any Java 8 version of OpenJDK/OpenJFX distribution should suffice.
Set JAVA_HOME with something like: export JAVA_HOME=`/usr/libexec/java_home` in .bash_profile 1. Install a 64 bit Java 8 JRE.
% brew tap adoptopenjdk/openjdk
% brew cask install adoptopenjdk8
2. Download a 64 bit Java 8 JavaFX for macOS from https://www.azul.com/downloads/zulu-community
3. Extract the contents of the JavaFX archive into the folder where the JRE was installed.
e.g. % sudo tar xf ~/Downloads/zulu8.40.0.25-ca-fx-jre8.0.222-macosx_x64.tar.gz -C /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home--strip-components=1
4. Confirm Java 8 is being found by running 'java -version'
5. Set JAVA_HOME environment variable to location of JRE installation.
* Install The Sleuth Kit Java Bindings * * Install The Sleuth Kit Java Bindings *

View File

@ -3,7 +3,7 @@
If you find that you need to debug some of the C/C++ code from The Sleuth Kit (TSK), then here are the steps to follow: If you find that you need to debug some of the C/C++ code from The Sleuth Kit (TSK), then here are the steps to follow:
-# Ensure that you have the Debug version of the TSK JNI dll built (both 32-bit and 64-bit to be safe). This assumes you built TSK from source and are not simply using the developer platform. You may have to build the libtsk_jni twice because sommetimes it complains about not being able to find a .map file. -# Ensure that you have the Debug version of the TSK JNI dll built (both 32-bit and 64-bit to be safe). This assumes you built TSK from source and are not simply using the developer platform. You may have to build the libtsk_jni twice because sommetimes it complains about not being able to find a .map file.
-# Run the 'dist-debug' target for the TSK DataModel project. This copies the debug versions of the dll into the JAR file. If you run the 'dist' target, then you will get Release versions of the dll and you won't have the needed symbols for debugging. -# Run the 'Debug-PostgreSQL' target for the TSK DataModel project. This copies the debug versions of the dll into the JAR file. If you run the 'dist' target, then you will get Release versions of the dll and you won't have the needed symbols for debugging.
-# Build the Autopsy suite so that it copies the new JAR file with the debug dlls. -# Build the Autopsy suite so that it copies the new JAR file with the debug dlls.
-# Set your breakpoints in the TSK source. -# Set your breakpoints in the TSK source.
-# Run Autopsy in the debugger. -# Run Autopsy in the debugger.

View File

@ -98,15 +98,15 @@ class ContactsDbIngestModule(DataSourceIngestModule):
# Where any setup and configuration is done # Where any setup and configuration is done
# 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext. # 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext.
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html
def startUp(self, context): def startUp(self, context):
self.context = context self.context = context
# Where the analysis is done. # Where the analysis is done.
# The 'dataSource' object being passed in is of type org.sleuthkit.datamodel.Content. # The 'dataSource' object being passed in is of type org.sleuthkit.datamodel.Content.
# See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.6.0/interfaceorg_1_1sleuthkit_1_1datamodel_1_1_content.html # See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.13.0/interfaceorg_1_1sleuthkit_1_1datamodel_1_1_content.html
# 'progressBar' is of type org.sleuthkit.autopsy.ingest.DataSourceIngestModuleProgress # 'progressBar' is of type org.sleuthkit.autopsy.ingest.DataSourceIngestModuleProgress
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_data_source_ingest_module_progress.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_data_source_ingest_module_progress.html
def process(self, dataSource, progressBar): def process(self, dataSource, progressBar):
# we don't know how much work there is yet # we don't know how much work there is yet

View File

@ -92,7 +92,7 @@ class FindBigRoundFilesIngestModule(FileIngestModule):
# Where any setup and configuration is done # Where any setup and configuration is done
# 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext. # 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext.
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html
# TODO: Add any setup code that you need here. # TODO: Add any setup code that you need here.
def startUp(self, context): def startUp(self, context):
self.filesFound = 0 self.filesFound = 0
@ -103,7 +103,7 @@ class FindBigRoundFilesIngestModule(FileIngestModule):
# Where the analysis is done. Each file will be passed into here. # Where the analysis is done. Each file will be passed into here.
# The 'file' object being passed in is of type org.sleuthkit.datamodel.AbstractFile. # The 'file' object being passed in is of type org.sleuthkit.datamodel.AbstractFile.
# See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.6.0/classorg_1_1sleuthkit_1_1datamodel_1_1_abstract_file.html # See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.13.0/classorg_1_1sleuthkit_1_1datamodel_1_1_abstract_file.html
def process(self, file): def process(self, file):
# Use blackboard class to index blackboard artifacts for keyword search # Use blackboard class to index blackboard artifacts for keyword search

View File

@ -27,7 +27,7 @@
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE. # OTHER DEALINGS IN THE SOFTWARE.
# See http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/index.html for documentation # See http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/index.html for documentation
# Simple report module for Autopsy. # Simple report module for Autopsy.
# Used as part of Python tutorials from Basis Technology - September 2015 # Used as part of Python tutorials from Basis Technology - September 2015
@ -71,7 +71,7 @@ class CSVReportModule(GeneralReportModuleAdapter):
# TODO: Update this method to make a report # TODO: Update this method to make a report
# The 'baseReportDir' object being passed in is a string with the directory that reports are being stored in. Report should go into baseReportDir + getRelativeFilePath(). # The 'baseReportDir' object being passed in is a string with the directory that reports are being stored in. Report should go into baseReportDir + getRelativeFilePath().
# The 'progressBar' object is of type ReportProgressPanel. # The 'progressBar' object is of type ReportProgressPanel.
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1report_1_1_report_progress_panel.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1report_1_1_report_progress_panel.html
def generateReport(self, baseReportDir, progressBar): def generateReport(self, baseReportDir, progressBar):
# Open the output file. # Open the output file.

View File

@ -29,7 +29,7 @@
# Simple data source-level ingest module for Autopsy. # Simple data source-level ingest module for Autopsy.
# Search for TODO for the things that you need to change # Search for TODO for the things that you need to change
# See http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/index.html for documentation # See http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/index.html for documentation
import jarray import jarray
import inspect import inspect
@ -94,7 +94,7 @@ class SampleJythonDataSourceIngestModule(DataSourceIngestModule):
# Where any setup and configuration is done # Where any setup and configuration is done
# 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext. # 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext.
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html
# TODO: Add any setup code that you need here. # TODO: Add any setup code that you need here.
def startUp(self, context): def startUp(self, context):
@ -104,9 +104,9 @@ class SampleJythonDataSourceIngestModule(DataSourceIngestModule):
# Where the analysis is done. # Where the analysis is done.
# The 'dataSource' object being passed in is of type org.sleuthkit.datamodel.Content. # The 'dataSource' object being passed in is of type org.sleuthkit.datamodel.Content.
# See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.6.0/interfaceorg_1_1sleuthkit_1_1datamodel_1_1_content.html # See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.13.0/interfaceorg_1_1sleuthkit_1_1datamodel_1_1_content.html
# 'progressBar' is of type org.sleuthkit.autopsy.ingest.DataSourceIngestModuleProgress # 'progressBar' is of type org.sleuthkit.autopsy.ingest.DataSourceIngestModuleProgress
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_data_source_ingest_module_progress.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_data_source_ingest_module_progress.html
# TODO: Add your analysis code in here. # TODO: Add your analysis code in here.
def process(self, dataSource, progressBar): def process(self, dataSource, progressBar):
@ -119,7 +119,7 @@ class SampleJythonDataSourceIngestModule(DataSourceIngestModule):
# For our example, we will use FileManager to get all # For our example, we will use FileManager to get all
# files with the word "test" # files with the word "test"
# in the name and then count and read them # in the name and then count and read them
# FileManager API: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1casemodule_1_1services_1_1_file_manager.html # FileManager API: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1casemodule_1_1services_1_1_file_manager.html
fileManager = Case.getCurrentCase().getServices().getFileManager() fileManager = Case.getCurrentCase().getServices().getFileManager()
files = fileManager.findFiles(dataSource, "%test%") files = fileManager.findFiles(dataSource, "%test%")

View File

@ -29,7 +29,7 @@
# Simple file-level ingest module for Autopsy. # Simple file-level ingest module for Autopsy.
# Search for TODO for the things that you need to change # Search for TODO for the things that you need to change
# See http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/index.html for documentation # See http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/index.html for documentation
import jarray import jarray
import inspect import inspect
@ -94,7 +94,7 @@ class SampleJythonFileIngestModule(FileIngestModule):
# Where any setup and configuration is done # Where any setup and configuration is done
# 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext. # 'context' is an instance of org.sleuthkit.autopsy.ingest.IngestJobContext.
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1ingest_1_1_ingest_job_context.html
# TODO: Add any setup code that you need here. # TODO: Add any setup code that you need here.
def startUp(self, context): def startUp(self, context):
self.filesFound = 0 self.filesFound = 0
@ -105,7 +105,7 @@ class SampleJythonFileIngestModule(FileIngestModule):
# Where the analysis is done. Each file will be passed into here. # Where the analysis is done. Each file will be passed into here.
# The 'file' object being passed in is of type org.sleuthkit.datamodel.AbstractFile. # The 'file' object being passed in is of type org.sleuthkit.datamodel.AbstractFile.
# See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.6.0/classorg_1_1sleuthkit_1_1datamodel_1_1_abstract_file.html # See: http://www.sleuthkit.org/sleuthkit/docs/jni-docs/4.13.0/classorg_1_1sleuthkit_1_1datamodel_1_1_abstract_file.html
# TODO: Add your analysis code in here. # TODO: Add your analysis code in here.
def process(self, file): def process(self, file):
# Skip non-files # Skip non-files

View File

@ -35,7 +35,7 @@
# don't need a configuration UI, start with the other sample module. # don't need a configuration UI, start with the other sample module.
# #
# Search for TODO for the things that you need to change # Search for TODO for the things that you need to change
# See http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/index.html for documentation # See http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/index.html for documentation
import jarray import jarray

View File

@ -31,7 +31,7 @@
# Sample report module for Autopsy. Use as a starting point for new modules. # Sample report module for Autopsy. Use as a starting point for new modules.
# #
# Search for TODO for the things that you need to change # Search for TODO for the things that you need to change
# See http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/index.html for documentation # See http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/index.html for documentation
import os import os
from java.lang import System from java.lang import System
@ -69,7 +69,7 @@ class SampleGeneralReportModule(GeneralReportModuleAdapter):
# TODO: Update this method to make a report # TODO: Update this method to make a report
# The 'baseReportDir' object being passed in is a string with the directory that reports are being stored in. Report should go into baseReportDir + getRelativeFilePath(). # The 'baseReportDir' object being passed in is a string with the directory that reports are being stored in. Report should go into baseReportDir + getRelativeFilePath().
# The 'progressBar' object is of type ReportProgressPanel. # The 'progressBar' object is of type ReportProgressPanel.
# See: http://sleuthkit.org/autopsy/docs/api-docs/4.6.0/classorg_1_1sleuthkit_1_1autopsy_1_1report_1_1_report_progress_panel.html # See: http://sleuthkit.org/autopsy/docs/api-docs/4.13.0/classorg_1_1sleuthkit_1_1autopsy_1_1report_1_1_report_progress_panel.html
def generateReport(self, baseReportDir, progressBar): def generateReport(self, baseReportDir, progressBar):
# For an example, we write a file with the number of files created in the past 2 weeks # For an example, we write a file with the number of files created in the past 2 weeks