Update IngestScheduler to support also imageless Content objects such as local files, make file counting code (for progress) also working for non-image sources

This commit is contained in:
adam-m 2013-05-09 10:57:29 -04:00
parent 3523baccb9
commit a04de7afa9

View File

@ -56,7 +56,8 @@ import org.sleuthkit.datamodel.TskData.TSK_FS_META_TYPE_ENUM;
* *
* Currently a singleton object only. * Currently a singleton object only.
* *
* Contains internal schedulers for content objects into image and file ingest pipelines. * Contains internal schedulers for content objects into image and file ingest
* pipelines.
* *
*/ */
class IngestScheduler { class IngestScheduler {
@ -90,7 +91,6 @@ class IngestScheduler {
return fileScheduler; return fileScheduler;
} }
/** /**
* FileScheduler ingest scheduler * FileScheduler ingest scheduler
* *
@ -157,7 +157,7 @@ class IngestScheduler {
/** /**
* query num files enqueued total num of files to be enqueued. * query num files enqueued total num of files to be enqueued.
* *
* Counts all files for all the images currently in the queues. * Counts all files for all the sources currently in the queues.
* *
* @return approx. total num of files enqueued (or to be enqueued) * @return approx. total num of files enqueued (or to be enqueued)
*/ */
@ -165,12 +165,11 @@ class IngestScheduler {
int totalFiles = 0; int totalFiles = 0;
List<Content> contents = this.getSourceContent(); List<Content> contents = this.getSourceContent();
final GetImageFilesCountVisitor countVisitor = final GetFilesCountVisitor countVisitor =
new GetImageFilesCountVisitor(); new GetFilesCountVisitor();
for (Content content : contents) { for (Content content : contents) {
totalFiles += content.accept(countVisitor); totalFiles += content.accept(countVisitor);
} }
//TODO revise for imageless LocalFiles enqueued
logger.log(Level.INFO, "Total files to queue up: " + totalFiles); logger.log(Level.INFO, "Total files to queue up: " + totalFiles);
@ -178,7 +177,7 @@ class IngestScheduler {
} }
/** /**
* get total est. number of files to be enqueued for current images in * get total est. number of files to be enqueued for current ingest input sources in
* queues * queues
* *
* @return total number of files * @return total number of files
@ -352,8 +351,8 @@ class IngestScheduler {
* as the parent origin file. * as the parent origin file.
* *
* @param file file to be scheduled * @param file file to be scheduled
* @param originalContext original image schedule context that was used to * @param originalContext original image schedule context that was used
* schedule the parent origin file, with the modules, settings, etc. * to schedule the parent origin file, with the modules, settings, etc.
*/ */
synchronized void schedule(AbstractFile file, PipelineContext originalContext) { synchronized void schedule(AbstractFile file, PipelineContext originalContext) {
ScheduledTask originalTask = originalContext.getScheduledTask(); ScheduledTask originalTask = originalContext.getScheduledTask();
@ -373,7 +372,8 @@ class IngestScheduler {
} }
/** /**
* Schedule new Content object for a file ingest with associated modules. * Schedule new Content object for a file ingest with associated
* modules.
* *
* @param task image schedule task with image and associated modules * @param task image schedule task with image and associated modules
*/ */
@ -403,9 +403,10 @@ class IngestScheduler {
//adds and resorts the tasks //adds and resorts the tasks
this.rootProcessTasks.addAll(rootTasks); this.rootProcessTasks.addAll(rootTasks);
//update approx count of files to process in queues
this.filesEnqueuedEst = this.queryNumFilesinEnqueuedContents(); this.filesEnqueuedEst = this.queryNumFilesinEnqueuedContents();
//update the dir and file level queues if needed //reshuffle/update the dir and file level queues if needed
updateQueues(); updateQueues();
} }
@ -511,31 +512,26 @@ class IngestScheduler {
} }
/** /**
* Return list of contents associated with the file/dir objects in the * Return list of input source contents associated with the file/dir
* queue scheduler to be processed. * objects in the queue scheduler to be processed.
* *
* Helpful to determine whether ingest * Helpful to determine whether ingest for particular input Content is
* for particular input Content is active * active
* *
* @return list of parent source content objects for files currently enqueued * @return list of parent source content objects for files currently
* enqueued
*/ */
synchronized List<Content> getSourceContent() { synchronized List<Content> getSourceContent() {
Set<Content> contentSet = new HashSet<Content>(); final Set<Content> contentSet = new HashSet<Content>();
try { for (ProcessTask task : rootProcessTasks) {
for (ProcessTask task : rootProcessTasks) { contentSet.add(task.context.getScheduledTask().getContent());
contentSet.add(task.file.getImage()); }
} for (ProcessTask task : curDirProcessTasks) {
for (ProcessTask task : curDirProcessTasks) { contentSet.add(task.context.getScheduledTask().getContent());
contentSet.add(task.file.getImage()); }
} for (ProcessTask task : curFileProcessTasks) {
for (ProcessTask task : curFileProcessTasks) { contentSet.add(task.context.getScheduledTask().getContent());
contentSet.add(task.file.getImage());
}
//TODO do we need to handle LocalFiles separately that have no image
} catch (TskCoreException e) {
logger.log(Level.SEVERE, "Could not get images for files scheduled for ingest", e);
} }
return new ArrayList<Content>(contentSet); return new ArrayList<Content>(contentSet);
@ -579,8 +575,8 @@ class IngestScheduler {
* Check if the file meets criteria to be enqueued, or is a special file * Check if the file meets criteria to be enqueued, or is a special file
* that we should skip * that we should skip
* *
* @param processTask a task whose file to check if should be queued * @param processTask a task whose file to check if should be queued of
* of skipped * skipped
* @return true if should be enqueued, false otherwise * @return true if should be enqueued, false otherwise
*/ */
private static boolean shouldEnqueueTask(final ProcessTask processTask) { private static boolean shouldEnqueueTask(final ProcessTask processTask) {
@ -589,15 +585,14 @@ class IngestScheduler {
//if it's unalloc file, skip if so scheduled //if it's unalloc file, skip if so scheduled
if (processTask.context.isProcessUnalloc() == false if (processTask.context.isProcessUnalloc() == false
&& aFile.getType().equals(TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS //unalloc files && aFile.getType().equals(TSK_DB_FILES_TYPE_ENUM.UNALLOC_BLOCKS //unalloc files
) ) { )) {
return false; return false;
} }
String fileName = aFile.getName(); String fileName = aFile.getName();
if (fileName.equals(".") || fileName.equals("..")) { if (fileName.equals(".") || fileName.equals("..")) {
return false; return false;
} } else if (aFile instanceof org.sleuthkit.datamodel.File) {
else if (aFile instanceof org.sleuthkit.datamodel.File ) {
final org.sleuthkit.datamodel.File f = (File) aFile; final org.sleuthkit.datamodel.File f = (File) aFile;
//skip files in root dir, starting with $, containing : (not default attributes) //skip files in root dir, starting with $, containing : (not default attributes)
@ -750,13 +745,12 @@ class IngestScheduler {
} }
/** /**
* Get counts of ingestable files/dirs for image/filesystem Only call * Get counts of ingestable files/dirs for the image input source.
* accept() for Image object Do not use on any other objects
* *
* Includes counts of all unalloc files (for the fs, image, volume) even * Includes counts of all unalloc files (for the fs, image, volume) even
* if ingest didn't ask for them * if ingest didn't ask for them
*/ */
static class GetImageFilesCountVisitor extends ContentVisitor.Default<Long> { static class GetFilesCountVisitor extends ContentVisitor.Default<Long> {
@Override @Override
public Long visit(FileSystem fs) { public Long visit(FileSystem fs) {
@ -797,12 +791,16 @@ class IngestScheduler {
private long getCountFromChildren(Content content) { private long getCountFromChildren(Content content) {
long count = 0; long count = 0;
try { try {
for (Content child : content.getChildren()) { List<Content> children = content.getChildren();
count += child.accept(this); if (children.size() > 0) {
for (Content child : children) {
count += child.accept(this);
}
} else {
count = 1;
} }
} catch (TskCoreException ex) { } catch (TskCoreException ex) {
Exceptions.printStackTrace(ex); logger.log(Level.WARNING, "Could not get count of objects from children to get num of total files to be ingested", ex);
return 0L;
} }
return count; return count;
} }
@ -817,8 +815,8 @@ class IngestScheduler {
/** /**
* Visitor that gets a collection of top level objects to be scheduled, * Visitor that gets a collection of top level objects to be scheduled,
* such as root Dirs (if there is FS) or * such as root Dirs (if there is FS) or LayoutFiles and virtual
* LayoutFiles and virtual directories, also if there is no FS. * directories, also if there is no FS.
*/ */
static class GetRootDirVisitor extends GetFilesContentVisitor { static class GetRootDirVisitor extends GetFilesContentVisitor {