Always index meta data of known files (skip content), and 0 byte files

This commit is contained in:
adam-m 2012-06-28 13:34:08 -04:00
parent 8ba8775931
commit ed9dceb502
4 changed files with 117 additions and 76 deletions

View File

@ -71,7 +71,7 @@ class GetAllFilesContentVisitor extends GetFilesContentVisitor {
StringBuilder queryB = new StringBuilder(); StringBuilder queryB = new StringBuilder();
queryB.append("SELECT * FROM tsk_files WHERE ( (fs_obj_id = ").append(fs.getId()); queryB.append("SELECT * FROM tsk_files WHERE ( (fs_obj_id = ").append(fs.getId());
queryB.append(") OR (fs_obj_id = NULL) ) AND (size > 0)"); queryB.append(") OR (fs_obj_id = NULL) )");
queryB.append(" AND ( (meta_type = ").append(TskData.TSK_FS_META_TYPE_ENUM.TSK_FS_META_TYPE_REG.getMetaType()); queryB.append(" AND ( (meta_type = ").append(TskData.TSK_FS_META_TYPE_ENUM.TSK_FS_META_TYPE_REG.getMetaType());
queryB.append(") OR (meta_type = ").append(TskData.TSK_FS_META_TYPE_ENUM.TSK_FS_META_TYPE_DIR.getMetaType()); queryB.append(") OR (meta_type = ").append(TskData.TSK_FS_META_TYPE_ENUM.TSK_FS_META_TYPE_DIR.getMetaType());
queryB.append( " AND (name != '.') AND (name != '..')"); queryB.append( " AND (name != '.') AND (name != '..')");

View File

@ -365,6 +365,8 @@ public class ExtractedContentViewer implements DataContentViewer {
return false; return false;
} }
if (content.getSize() == 0)
return false;
final Server solrServer = KeywordSearch.getServer(); final Server solrServer = KeywordSearch.getServer();
@ -375,8 +377,6 @@ public class ExtractedContentViewer implements DataContentViewer {
final long contentID = content.getId(); final long contentID = content.getId();
try { try {
return solrServer.queryIsIndexed(contentID); return solrServer.queryIsIndexed(contentID);
} catch (NoOpenCoreException ex) { } catch (NoOpenCoreException ex) {

View File

@ -139,13 +139,15 @@ public class Ingester {
/** /**
* Sends a file to Solr to have its content extracted and added to the * Sends a file to Solr to have its content extracted and added to the
* index. commit() should be called once you're done ingesting files. * index. commit() should be called once you're done ingesting files.
* If the file is a directory or ingestContent is set to false, the file name is indexed only.
* *
* @param f File to ingest * @param fsContent File to ingest
* @param ingestContent if true, index the file and the content, otherwise indesx metadata only
* @throws IngesterException if there was an error processing a specific * @throws IngesterException if there was an error processing a specific
* file, but the Solr server is probably fine. * file, but the Solr server is probably fine.
*/ */
void ingest(FsContent fsContent) throws IngesterException { void ingest(FsContent fsContent, boolean ingestContent) throws IngesterException {
if (fsContent.isDir() ) { if (fsContent.isDir() || ingestContent == false ) {
ingest(new NullContentStream(fsContent), getContentFields(fsContent), 0); ingest(new NullContentStream(fsContent), getContentFields(fsContent), 0);
} }
else { else {
@ -438,25 +440,20 @@ public class Ingester {
} }
/** /**
* Determine if the file is ingestible/indexable by keyword search * Determine if the file content is ingestible/indexable by keyword search
* Ingestible abstract file is either a directory, or an allocated file with supported extensions. * Ingestible abstract file is either a directory, or an allocated file with supported extensions.
* Note: currently only checks by extension and abstract type, it does not check actual file content. * Note: currently only checks by extension and abstract type, it does not check actual file content.
* @param aFile * @param aFile
* @return true if it is ingestible, false otherwise * @return true if it is ingestible, false otherwise
*/ */
static boolean isIngestible(AbstractFile aFile) { static boolean isIngestible(AbstractFile aFile) {
boolean isIngestible = false;
TSK_DB_FILES_TYPE_ENUM aType = aFile.getType(); TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
if (aType.equals(TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS) if (! aType.equals(TSK_DB_FILES_TYPE_ENUM.FS) )
|| aType.equals(TSK_DB_FILES_TYPE_ENUM.UNUSED_BLOCKS)) return false;
return isIngestible;
FsContent fsContent = (FsContent) aFile; FsContent fsContent = (FsContent) aFile;
if (fsContent.isDir())
//we index dir name, not content
return true;
boolean isIngestible = false;
final String fileName = fsContent.getName(); final String fileName = fsContent.getName();
for (final String ext : ingestibleExtensions) { for (final String ext : ingestibleExtensions) {
if (fileName.toLowerCase().endsWith(ext)) { if (fileName.toLowerCase().endsWith(ext)) {

View File

@ -38,6 +38,7 @@ import org.apache.solr.client.solrj.SolrServerException;
import org.netbeans.api.progress.ProgressHandle; import org.netbeans.api.progress.ProgressHandle;
import org.netbeans.api.progress.ProgressHandleFactory; import org.netbeans.api.progress.ProgressHandleFactory;
import org.openide.util.Cancellable; import org.openide.util.Cancellable;
import org.openide.util.Exceptions;
import org.sleuthkit.autopsy.casemodule.Case; import org.sleuthkit.autopsy.casemodule.Case;
import org.sleuthkit.autopsy.ingest.IngestManager; import org.sleuthkit.autopsy.ingest.IngestManager;
import org.sleuthkit.autopsy.ingest.IngestManagerProxy; import org.sleuthkit.autopsy.ingest.IngestManagerProxy;
@ -55,14 +56,13 @@ import org.sleuthkit.datamodel.SleuthkitCase;
import org.sleuthkit.datamodel.TskData; import org.sleuthkit.datamodel.TskData;
/** /**
* An ingest service on a file level * An ingest service on a file level Performs indexing of allocated and Solr
* Performs indexing of allocated and Solr supported files, * supported files, string extraction and indexing of unallocated and not Solr
* string extraction and indexing of unallocated and not Solr supported files * supported files Index commit is done periodically (determined by user set
* Index commit is done periodically (determined by user set ingest update interval) * ingest update interval) Runs a periodic keyword / regular expression search
* Runs a periodic keyword / regular expression search on currently configured lists for ingest * on currently configured lists for ingest and writes results to blackboard
* and writes results to blackboard
* Reports interesting events to Inbox and to viewers * Reports interesting events to Inbox and to viewers
* *
* Registered as a service in layer.xml * Registered as a service in layer.xml
*/ */
public final class KeywordSearchIngestService implements IngestServiceAbstractFile { public final class KeywordSearchIngestService implements IngestServiceAbstractFile {
@ -92,19 +92,20 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
private volatile int messageID = 0; private volatile int messageID = 0;
private boolean processedFiles; private boolean processedFiles;
private volatile boolean finalSearcherDone = true; private volatile boolean finalSearcherDone = true;
private final String hashDBServiceName = "Hash Lookup"; private final String hashDBServiceName = "Hash Lookup"; //NOTE this needs to match the HashDB service getName()
private SleuthkitCase caseHandle = null; private SleuthkitCase caseHandle = null;
private boolean skipKnown = true; private boolean skipKnown = true;
boolean initialized = false; boolean initialized = false;
private enum IngestStatus { private enum IngestStatus {
INGESTED, EXTRACTED_INGESTED, SKIPPED, INGESTED, EXTRACTED_INGESTED, SKIPPED, INGESTED_META
}; };
private Map<Long, IngestStatus> ingestStatus; private Map<Long, IngestStatus> ingestStatus;
/** /**
* Returns singleton instance of the service, creates one if needed * Returns singleton instance of the service, creates one if needed
*
* @return instance of the service * @return instance of the service
*/ */
public static synchronized KeywordSearchIngestService getDefault() { public static synchronized KeywordSearchIngestService getDefault() {
@ -115,10 +116,12 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* Starts processing of every file provided by IngestManager. * Starts processing of every file provided by IngestManager. Checks if it
* Checks if it is time to commit and run search * is time to commit and run search
*
* @param abstractFile file/unallocated file/directory to process * @param abstractFile file/unallocated file/directory to process
* @return ProcessResult.OK in most cases and ERROR only if error in the pipeline, otherwise does not advice to stop the pipeline * @return ProcessResult.OK in most cases and ERROR only if error in the
* pipeline, otherwise does not advice to stop the pipeline
*/ */
@Override @Override
public ProcessResult process(AbstractFile abstractFile) { public ProcessResult process(AbstractFile abstractFile) {
@ -133,8 +136,12 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
IngestServiceAbstractFile.ProcessResult hashDBResult = managerProxy.getAbstractFileServiceResult(hashDBServiceName); IngestServiceAbstractFile.ProcessResult hashDBResult = managerProxy.getAbstractFileServiceResult(hashDBServiceName);
//logger.log(Level.INFO, "hashdb result: " + hashDBResult + "file: " + AbstractFile.getName()); //logger.log(Level.INFO, "hashdb result: " + hashDBResult + "file: " + AbstractFile.getName());
if (hashDBResult == IngestServiceAbstractFile.ProcessResult.COND_STOP && skipKnown) { if (hashDBResult == IngestServiceAbstractFile.ProcessResult.COND_STOP && skipKnown) {
//index meta-data only
indexer.indexFile(abstractFile, false);
return ProcessResult.OK; return ProcessResult.OK;
} else if (hashDBResult == IngestServiceAbstractFile.ProcessResult.ERROR) { } else if (hashDBResult == IngestServiceAbstractFile.ProcessResult.ERROR) {
//index meta-data only
indexer.indexFile(abstractFile, false);
//notify depending service that keyword search (would) encountered error for this file //notify depending service that keyword search (would) encountered error for this file
return ProcessResult.ERROR; return ProcessResult.ERROR;
} }
@ -145,7 +152,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
checkRunCommitSearch(); checkRunCommitSearch();
indexer.indexFile(abstractFile); //index the file and content (if the content is supported)
indexer.indexFile(abstractFile, true);
return ProcessResult.OK; return ProcessResult.OK;
} }
@ -196,8 +204,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* Handle stop event (ingest interrupted) * Handle stop event (ingest interrupted) Cleanup resources, threads, timers
* Cleanup resources, threads, timers
*/ */
@Override @Override
public void stop() { public void stop() {
@ -234,9 +241,10 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* Initializes the service for new ingest run * Initializes the service for new ingest run Sets up threads, timers,
* Sets up threads, timers, retrieves settings, keyword lists to run on * retrieves settings, keyword lists to run on
* @param managerProxy *
* @param managerProxy
*/ */
@Override @Override
public void init(IngestManagerProxy managerProxy) { public void init(IngestManagerProxy managerProxy) {
@ -320,9 +328,11 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* The services maintains background threads, return true if background threads are running * The services maintains background threads, return true if background
* or there are pending tasks to be run in the future, such as the final search post-ingest completion * threads are running or there are pending tasks to be run in the future,
* @return * such as the final search post-ingest completion
*
* @return
*/ */
@Override @Override
public boolean hasBackgroundJobsRunning() { public boolean hasBackgroundJobsRunning() {
@ -353,6 +363,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
*/ */
private void postIndexSummary() { private void postIndexSummary() {
int indexed = 0; int indexed = 0;
int indexed_meta = 0;
int indexed_extr = 0; int indexed_extr = 0;
int skipped = 0; int skipped = 0;
for (IngestStatus s : ingestStatus.values()) { for (IngestStatus s : ingestStatus.values()) {
@ -360,6 +371,9 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
case INGESTED: case INGESTED:
++indexed; ++indexed;
break; break;
case INGESTED_META:
++indexed_meta;
break;
case EXTRACTED_INGESTED: case EXTRACTED_INGESTED:
++indexed_extr; ++indexed_extr;
break; break;
@ -373,6 +387,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
StringBuilder msg = new StringBuilder(); StringBuilder msg = new StringBuilder();
msg.append("Indexed files: ").append(indexed).append("<br />Indexed strings: ").append(indexed_extr); msg.append("Indexed files: ").append(indexed).append("<br />Indexed strings: ").append(indexed_extr);
msg.append("<br />Indexed meta-data only: ").append(indexed_meta).append("<br />");
msg.append("<br />Skipped files: ").append(skipped).append("<br />"); msg.append("<br />Skipped files: ").append(skipped).append("<br />");
String indexStats = msg.toString(); String indexStats = msg.toString();
logger.log(Level.INFO, "Keyword Indexing Completed: " + indexStats); logger.log(Level.INFO, "Keyword Indexing Completed: " + indexStats);
@ -423,8 +438,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* Check if time to commit, if so, run commit. * Check if time to commit, if so, run commit. Then run search if search
* Then run search if search timer is also set. * timer is also set.
*/ */
void checkRunCommitSearch() { void checkRunCommitSearch() {
if (commitIndex) { if (commitIndex) {
@ -446,8 +461,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* CommitTimerAction to run by commitTimer * CommitTimerAction to run by commitTimer Sets a flag to indicate we are
* Sets a flag to indicate we are ready for commit * ready for commit
*/ */
private class CommitTimerAction implements ActionListener { private class CommitTimerAction implements ActionListener {
@ -461,8 +476,8 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* SearchTimerAction to run by searchTimer * SearchTimerAction to run by searchTimer Sets a flag to indicate we are
* Sets a flag to indicate we are ready to search * ready to search
*/ */
private class SearchTimerAction implements ActionListener { private class SearchTimerAction implements ActionListener {
@ -477,7 +492,7 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
/** /**
* File indexer, processes and indexes known/allocated files, * File indexer, processes and indexes known/allocated files,
* unknown/unallocated files and directories accordingly * unknown/unallocated files and directories accordingly
*/ */
private class Indexer { private class Indexer {
@ -495,42 +510,70 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
return indexed; return indexed;
} }
private void indexFile(AbstractFile aFile) { private void indexFile(AbstractFile aFile, boolean indexContent) {
//logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName()); //logger.log(Level.INFO, "Processing AbstractFile: " + abstractFile.getName());
boolean ingestibleFile = Ingester.isIngestible(aFile);
final long size = aFile.getSize(); FsContent fsContent = null;
//limit size of entire file, do not limit strings //check if alloc fs file or dir
if (size == 0 || (ingestibleFile && size > MAX_INDEX_SIZE)) { TskData.TSK_DB_FILES_TYPE_ENUM aType = aFile.getType();
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED); if (aType.equals(TskData.TSK_DB_FILES_TYPE_ENUM.FS)) {
fsContent = (FsContent) aFile;
}
//if alloc fs file and not index content, or a dir, index meta data only
if (fsContent != null
&& (indexContent == false || fsContent.isDir())) {
try {
ingester.ingest(fsContent, false); //meta-data only
ingestStatus.put(aFile.getId(), IngestStatus.INGESTED_META);
} catch (IngesterException ex) {
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
logger.log(Level.WARNING, "Unable to index meta-data for fsContent: " + fsContent.getId(), ex);
}
return; return;
} }
if (ingestibleFile == true) { boolean ingestibleFile = Ingester.isIngestible(aFile);
//we know it's an allocated file or dir (FsContent)
FsContent fileDir = (FsContent) aFile; final long size = aFile.getSize();
//if fs file, limit size of entire file, do not limit strings
if (fsContent != null && (size == 0 || (ingestibleFile && size > MAX_INDEX_SIZE))) {
//if fs file, index meta only, otherwise if unalloc, skip
try {
ingester.ingest(fsContent, false); //meta-data only
ingestStatus.put(aFile.getId(), IngestStatus.INGESTED_META);
} catch (IngesterException ex) {
ingestStatus.put(aFile.getId(), IngestStatus.SKIPPED);
logger.log(Level.WARNING, "Unable to index meta-data for fsContent: " + fsContent.getId(), ex);
}
return;
}
if (fsContent != null && ingestibleFile == true) {
//we know it's an allocated fs file (FsContent) with supported content
try { try {
//logger.log(Level.INFO, "indexing: " + fsContent.getName()); //logger.log(Level.INFO, "indexing: " + fsContent.getName());
ingester.ingest(fileDir); ingester.ingest(fsContent, true);
ingestStatus.put(fileDir.getId(), IngestStatus.INGESTED); ingestStatus.put(fsContent.getId(), IngestStatus.INGESTED);
} catch (IngesterException e) { } catch (IngesterException e) {
ingestStatus.put(fileDir.getId(), IngestStatus.SKIPPED); ingestStatus.put(fsContent.getId(), IngestStatus.SKIPPED);
//try to extract strings if not a dir //try to extract strings, if a file
if (fileDir.isFile() == true) { if (fsContent.isFile() == true) {
processNonIngestible(fileDir); processNonIngestible(fsContent);
} }
} catch (Exception e) { } catch (Exception e) {
ingestStatus.put(fileDir.getId(), IngestStatus.SKIPPED); ingestStatus.put(fsContent.getId(), IngestStatus.SKIPPED);
//try to extract strings if not a dir //try to extract strings if a file
if (fileDir.isFile() == true) { if (fsContent.isFile() == true) {
processNonIngestible(fileDir); processNonIngestible(fsContent);
} }
} }
} else { } else {
//unallocated or unsupported type by Solr //unallocated file or unsupported content type by Solr
processNonIngestible(aFile); processNonIngestible(aFile);
} }
} }
@ -547,10 +590,10 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
} }
/** /**
* Searcher responsible for searching the current index and writing results to blackboard * Searcher responsible for searching the current index and writing results
* and the inbox. Also, posts results to listeners as Ingest data events. * to blackboard and the inbox. Also, posts results to listeners as Ingest
* Searches entire index, and keeps track of only new results to report and save. * data events. Searches entire index, and keeps track of only new results
* Runs as a background thread. * to report and save. Runs as a background thread.
*/ */
private class Searcher extends SwingWorker<Object, Void> { private class Searcher extends SwingWorker<Object, Void> {
@ -574,7 +617,6 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
final String displayName = "Keyword Search" + (finalRun ? " - Finalizing" : ""); final String displayName = "Keyword Search" + (finalRun ? " - Finalizing" : "");
progress = ProgressHandleFactory.createHandle(displayName + (" (Pending)"), new Cancellable() { progress = ProgressHandleFactory.createHandle(displayName + (" (Pending)"), new Cancellable() {
@Override @Override
public boolean cancel() { public boolean cancel() {
logger.log(Level.INFO, "Cancelling the searcher by user."); logger.log(Level.INFO, "Cancelling the searcher by user.");
@ -833,14 +875,14 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
//without relying on done() method that is not guaranteed to run after background thread completes //without relying on done() method that is not guaranteed to run after background thread completes
//NEED to call this method always right before doInBackground() returns //NEED to call this method always right before doInBackground() returns
/** /**
* Performs the cleanup that needs to be done right AFTER doInBackground() returns * Performs the cleanup that needs to be done right AFTER
* without relying on done() method that is not guaranteed to run after background thread completes * doInBackground() returns without relying on done() method that is not
* REQUIRED to call this method always right before doInBackground() returns * guaranteed to run after background thread completes REQUIRED to call
* this method always right before doInBackground() returns
*/ */
private void finalizeSearcher() { private void finalizeSearcher() {
logger.log(Level.INFO, "Searcher finalizing"); logger.log(Level.INFO, "Searcher finalizing");
SwingUtilities.invokeLater(new Runnable() { SwingUtilities.invokeLater(new Runnable() {
@Override @Override
public void run() { public void run() {
progress.finish(); progress.finish();
@ -871,9 +913,9 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
/** /**
* Checks if the content has already been hit previously * Checks if the content has already been hit previously
* *
* @param previousHits the previous hits to check against * @param previousHits the previous hits to check against
* @param hit a hit to check for, that potentially had already been hit * @param hit a hit to check for, that potentially had already been hit
* @return true if the potential hit has already been hit, false otherwise * @return true if the potential hit has already been hit, false otherwise
*/ */
private static boolean previouslyHit(List<ContentHit> previousHits, ContentHit hit) { private static boolean previouslyHit(List<ContentHit> previousHits, ContentHit hit) {
@ -890,7 +932,9 @@ public final class KeywordSearchIngestService implements IngestServiceAbstractFi
/** /**
* Set the skip known files setting on the service * Set the skip known files setting on the service
* @param skip true if skip, otherwise, will process known files as well, as reported by HashDB service *
* @param skip true if skip, otherwise, will process known files as well, as
* reported by HashDB service
*/ */
void setSkipKnown(boolean skip) { void setSkipKnown(boolean skip) {
this.skipKnown = skip; this.skipKnown = skip;