7529 KWS artifact ingest module

This commit is contained in:
Richard Cordovano 2021-11-20 06:23:48 -05:00
parent 08e5ca61ac
commit 75463dc9f0
9 changed files with 98 additions and 46 deletions

View File

@ -93,6 +93,7 @@ IngestJobTableModel.colName.rootQueued=Roots Queued
IngestJobTableModel.colName.streamingQueued=Streamed Files Queued
IngestJobTableModel.colName.dsQueued=DS Queued
IngestJobTableModel.colName.artifactsQueued=Artifacts Queued
IngestJobTableModel.colName.resultsQueued=Results Queued
ModuleTableModel.colName.module=Module
ModuleTableModel.colName.duration=Duration
IngestJobSettingsPanel.jButtonSelectAll.text=Select All

View File

@ -1,5 +1,7 @@
CTL_RunIngestAction=Run Ingest
FileIngestPipeline_SaveResults_Activity=Saving Results
# {0} - data source name
IngestJob.progress.analysisResultIngest.displayName=Analyzing data artifacts from {0}
IngestJobSettingsPanel.IngestModulesTableRenderer.info.message=A previous version of this ingest module has been run before on this data source.
IngestJobSettingsPanel.IngestModulesTableRenderer.warning.message=This ingest module has been run before on this data source.
IngestJobSettingsPanel.noPerRunSettings=The selected module has no per-run settings.
@ -109,6 +111,7 @@ IngestJobTableModel.colName.rootQueued=Roots Queued
IngestJobTableModel.colName.streamingQueued=Streamed Files Queued
IngestJobTableModel.colName.dsQueued=DS Queued
IngestJobTableModel.colName.artifactsQueued=Artifacts Queued
IngestJobTableModel.colName.resultsQueued=Results Queued
ModuleTableModel.colName.module=Module
ModuleTableModel.colName.duration=Duration
IngestJobSettingsPanel.jButtonSelectAll.text=Select All

View File

@ -1006,7 +1006,7 @@ final class IngestJobExecutor {
if (stage == IngestJobStage.STREAMED_FILE_ANALYSIS_ONLY) {
return;
}
if (taskScheduler.currentTasksAreCompleted(this)) {
if (taskScheduler.currentTasksAreCompleted(getIngestJobId())) {
switch (stage) {
case FILE_AND_HIGH_PRIORITY_DATA_SRC_LEVEL_ANALYSIS:
finishFileAndHighPriorityDataSrcAnalysis();
@ -1307,7 +1307,7 @@ final class IngestJobExecutor {
void addFiles(List<AbstractFile> files) {
if (stage.equals(IngestJobStage.STREAMED_FILE_ANALYSIS_ONLY)
|| stage.equals(IngestJobStage.FILE_AND_HIGH_PRIORITY_DATA_SRC_LEVEL_ANALYSIS)) {
taskScheduler.fastTrackFileIngestTasks(this, files);
taskScheduler.scheduleHighPriorityFileIngestTasks(this, files);
} else {
logErrorMessage(Level.SEVERE, "Adding streaming files to job during stage " + stage.toString() + " not supported");
}
@ -1683,12 +1683,20 @@ final class IngestJobExecutor {
tasksSnapshot = taskScheduler.getTasksSnapshotForJob(getIngestJobId());
}
return new Snapshot(dataSource.getName(),
getIngestJobId(), createTime,
return new Snapshot(
dataSource.getName(),
getIngestJobId(),
createTime,
getCurrentDataSourceIngestModule(),
fileIngestRunning, fileIngestStartTime,
jobCancelled, cancellationReason, cancelledDataSourceIngestModules,
processedFilesCount, estimatedFilesToProcessCount, snapShotTime, tasksSnapshot);
fileIngestRunning,
fileIngestStartTime,
jobCancelled,
cancellationReason,
cancelledDataSourceIngestModules,
processedFilesCount,
estimatedFilesToProcessCount,
snapShotTime,
tasksSnapshot);
}
/**

View File

@ -179,7 +179,8 @@ class IngestProgressSnapshotPanel extends javax.swing.JPanel {
NbBundle.getMessage(this.getClass(), "IngestJobTableModel.colName.rootQueued"),
NbBundle.getMessage(this.getClass(), "IngestJobTableModel.colName.streamingQueued"),
NbBundle.getMessage(this.getClass(), "IngestJobTableModel.colName.dsQueued"),
NbBundle.getMessage(this.getClass(), "IngestJobTableModel.colName.artifactsQueued")};
NbBundle.getMessage(this.getClass(), "IngestJobTableModel.colName.artifactsQueued"),
NbBundle.getMessage(this.getClass(), "IngestJobTableModel.colName.resultsQueued")};
private List<Snapshot> jobSnapshots;
@ -249,6 +250,9 @@ class IngestProgressSnapshotPanel extends javax.swing.JPanel {
case 11:
cellValue = snapShot.getArtifactTasksQueueSize();
break;
case 12:
cellValue = snapShot.getResultTasksQueueSize();
break;
default:
cellValue = null;
break;

View File

@ -148,6 +148,9 @@ final class IngestTasksScheduler {
* settings that causes the ingest task scheduler to discard all of the file
* tasks.
*
* RJCTODO: Return a count of scheduled tasks or even just a boolean; let
* the caller know if file filters, etc., caused no tasks to be scheduled.
*
* @param executor The ingest job executor that will execute the scheduled
* tasks. A reference to the executor is added to each task
* so that when the task is dequeued by an ingest thread,
@ -164,7 +167,7 @@ final class IngestTasksScheduler {
}
if (executor.hasDataArtifactIngestModules()) {
scheduleDataArtifactIngestTasks(executor);
}
}
if (executor.hasAnalysisResultIngestModules()) {
scheduleAnalysisResultIngestTasks(executor);
}
@ -194,8 +197,8 @@ final class IngestTasksScheduler {
}
/**
* Schedules file tasks for either all the files, or a given subset of the
* files, for a data source. The data source is obtained from the ingest
* Schedules file tasks for either all of the files, or a given subset of
* the files, for a data source. The data source is obtained from the ingest
* ingest job executor passed in.
*
* @param executor The ingest job executor that will execute the scheduled
@ -204,7 +207,7 @@ final class IngestTasksScheduler {
* the task can pass its target item to the executor for
* processing by the executor's ingest module pipelines.
* @param files A subset of the files from the data source; if empty,
* then all if the files from the data source are candidates
* then all of the files from the data source are candidates
* for scheduling.
*/
synchronized void scheduleFileIngestTasks(IngestJobExecutor executor, Collection<AbstractFile> files) {
@ -267,7 +270,7 @@ final class IngestTasksScheduler {
* processing by the executor's ingest module pipelines.
* @param files The files.
*/
synchronized void fastTrackFileIngestTasks(IngestJobExecutor executor, Collection<AbstractFile> files) {
synchronized void scheduleHighPriorityFileIngestTasks(IngestJobExecutor executor, Collection<AbstractFile> files) {
if (!executor.isCancelled()) {
/*
* Put the files directly into the queue for the file ingest
@ -450,8 +453,7 @@ final class IngestTasksScheduler {
*
* @return True or false.
*/
synchronized boolean currentTasksAreCompleted(IngestJobExecutor executor) {
long ingestJobId = executor.getIngestJobId();
synchronized boolean currentTasksAreCompleted(Long ingestJobId) {
return !(dataSourceIngestTasksQueue.hasTasksForJob(ingestJobId)
|| hasTasksForJob(topLevelFileIngestTasksQueue, ingestJobId)
|| hasTasksForJob(batchedFileIngestTasksQueue, ingestJobId)
@ -498,7 +500,9 @@ final class IngestTasksScheduler {
List<AbstractFile> topLevelFiles = new ArrayList<>();
Collection<AbstractFile> rootObjects = dataSource.accept(new GetRootDirectoryVisitor());
if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) {
// The data source is itself a file to be processed.
/*
* The data source is itself a file to be processed.
*/
topLevelFiles.add((AbstractFile) dataSource);
} else {
for (AbstractFile root : rootObjects) {
@ -506,12 +510,17 @@ final class IngestTasksScheduler {
try {
children = root.getChildren();
if (children.isEmpty()) {
// Add the root object itself, it could be an unallocated
// space file, or a child of a volume or an image.
/*
* Add the root object itself, it could be an
* unallocated space file, or a child of a volume or an
* image.
*/
topLevelFiles.add(root);
} else {
// The root object is a file system root directory, get
// the files within it.
/*
* The root object is a file system root directory, get
* the files within it.
*/
for (Content child : children) {
if (child instanceof AbstractFile) {
topLevelFiles.add((AbstractFile) child);
@ -623,7 +632,8 @@ final class IngestTasksScheduler {
AbstractFile file = null;
try {
file = nextTask.getFile();
for (Content child : file.getChildren()) {
List<Content> children = file.getChildren();
for (Content child : children) {
if (child instanceof AbstractFile) {
AbstractFile childFile = (AbstractFile) child;
FileIngestTask childTask = new FileIngestTask(nextTask.getIngestJobExecutor(), childFile);
@ -663,8 +673,10 @@ final class IngestTasksScheduler {
return false;
}
// Skip the task if the file is actually the pseudo-file for the parent
// or current directory.
/*
* Skip the task if the file is actually the pseudo-file for the parent
* or current directory.
*/
String fileName = file.getName();
if (fileName.equals(".") || fileName.equals("..")) {
@ -687,12 +699,16 @@ final class IngestTasksScheduler {
return false;
}
// Skip the task if the file is one of a select group of special, large
// NTFS or FAT file system files.
/*
* Skip the task if the file is one of a select group of special, large
* NTFS or FAT file system files.
*/
if (file instanceof org.sleuthkit.datamodel.File) {
final org.sleuthkit.datamodel.File f = (org.sleuthkit.datamodel.File) file;
// Get the type of the file system, if any, that owns the file.
/*
* Get the type of the file system, if any, that owns the file.
*/
TskData.TSK_FS_TYPE_ENUM fsType = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_UNSUPP;
try {
FileSystem fs = f.getFileSystem();
@ -703,12 +719,16 @@ final class IngestTasksScheduler {
logger.log(Level.SEVERE, "Error querying file system for " + f, ex); //NON-NLS
}
// If the file system is not NTFS or FAT, don't skip the file.
/*
* If the file system is not NTFS or FAT, don't skip the file.
*/
if ((fsType.getValue() & FAT_NTFS_FLAGS) == 0) {
return true;
}
// Find out whether the file is in a root directory.
/*
* Find out whether the file is in a root directory.
*/
boolean isInRootDir = false;
try {
AbstractFile parent = f.getParentDirectory();
@ -721,9 +741,11 @@ final class IngestTasksScheduler {
logger.log(Level.WARNING, "Error querying parent directory for" + f.getName(), ex); //NON-NLS
}
// If the file is in the root directory of an NTFS or FAT file
// system, check its meta-address and check its name for the '$'
// character and a ':' character (not a default attribute).
/*
* If the file is in the root directory of an NTFS or FAT file
* system, check its meta-address and check its name for the '$'
* character and a ':' character (not a default attribute).
*/
if (isInRootDir && f.getMetaAddr() < 32) {
String name = f.getName();
if (name.length() > 0 && name.charAt(0) == '$' && name.contains(":")) {
@ -839,7 +861,7 @@ final class IngestTasksScheduler {
fileIngestTasksQueue.countQueuedTasksForJob(ingestJobId),
countTasksForJob(streamedFileIngestTasksQueue, ingestJobId),
artifactIngestTasksQueue.countQueuedTasksForJob(ingestJobId),
artifactIngestTasksQueue.countQueuedTasksForJob(ingestJobId),
resultIngestTasksQueue.countQueuedTasksForJob(ingestJobId),
dataSourceIngestTasksQueue.countRunningTasksForJob(ingestJobId) + fileIngestTasksQueue.countRunningTasksForJob(ingestJobId) + artifactIngestTasksQueue.countRunningTasksForJob(ingestJobId) + resultIngestTasksQueue.countRunningTasksForJob(ingestJobId)
);
}
@ -852,20 +874,27 @@ final class IngestTasksScheduler {
@Override
public int compare(FileIngestTask q1, FileIngestTask q2) {
// In practice the case where one or both calls to getFile() fails
// should never occur since such tasks would not be added to the queue.
/*
* In practice the case where one or both calls to getFile() fails
* should never occur since such tasks would not be added to the
* queue.
*/
AbstractFile file1 = null;
AbstractFile file2 = null;
try {
file1 = q1.getFile();
} catch (TskCoreException ex) {
// Do nothing - the exception has been logged elsewhere
/*
* Do nothing - the exception has been logged elsewhere
*/
}
try {
file2 = q2.getFile();
} catch (TskCoreException ex) {
// Do nothing - the exception has been logged elsewhere
/*
* Do nothing - the exception has been logged elsewhere
*/
}
if (file1 == null) {
@ -910,15 +939,11 @@ final class IngestTasksScheduler {
static final List<Pattern> HIGH_PRI_PATHS = new ArrayList<>();
/*
* prioritize root directory folders based on the assumption that we
* Prioritize root directory folders based on the assumption that we
* are looking for user content. Other types of investigations may
* want different priorities.
*/
static /*
* prioritize root directory folders based on the assumption that we
* are looking for user content. Other types of investigations may
* want different priorities.
*/ {
static {
// these files have no structure, so they go last
//unalloc files are handled as virtual files in getPriority()
//LAST_PRI_PATHS.schedule(Pattern.compile("^\\$Unalloc", Pattern.CASE_INSENSITIVE));
@ -1170,16 +1195,16 @@ final class IngestTasksScheduler {
* @param resultsQueueSize The number of queued ingest tasks for
* analysis results.
*/
IngestTasksSnapshot(long ingestJobId, long dataSourceQueueSize, long rootQueueSize, long dirQueueSize, long fileQueueSize, long inProgressListSize, long streamedFileQueueSize, long artifactsQueueSize, long resultsQueueSize) {
IngestTasksSnapshot(long ingestJobId, long dataSourceQueueSize, long rootQueueSize, long dirQueueSize, long fileQueueSize, long streamedFileQueueSize, long artifactsQueueSize, long resultsQueueSize, long inProgressListSize) {
this.ingestJobId = ingestJobId;
this.dataSourceQueueSize = dataSourceQueueSize;
this.rootQueueSize = rootQueueSize;
this.dirQueueSize = dirQueueSize;
this.fileQueueSize = fileQueueSize;
this.inProgressListSize = inProgressListSize;
this.streamedFileQueueSize = streamedFileQueueSize;
this.artifactsQueueSize = artifactsQueueSize;
this.resultsQueueSize = resultsQueueSize;
this.inProgressListSize = inProgressListSize;
}
/**

View File

@ -200,6 +200,13 @@ public final class Snapshot implements Serializable {
return tasksSnapshot.getArtifactsQueueSize();
}
long getResultTasksQueueSize() {
if (tasksSnapshot == null) {
return 0;
}
return tasksSnapshot.getResultsQueueSize();
}
boolean isCancelled() {
return this.jobCancelled;
}

View File

@ -65,15 +65,19 @@ DayOfTheWeekRenderer_Tuesday_Label=Tuesday
DayOfTheWeekRenderer_Wednesday_Label=Wednesday
GeneralOptionsPanelController.moduleErr.msg=A module caused an error listening to GeneralOptionsPanelController updates. See log to determine which module. Some data could be incomplete.
GeneralOptionsPanelController.moduleErr=Module Error
# {0} - errorMessage
MultiUserTestTool.criticalError=Critical error running data source processor on test data source: {0}
MultiUserTestTool.errorStartingIngestJob=Ingest manager error while starting ingest job
# {0} - cancellationReason
MultiUserTestTool.ingestCancelled=Ingest cancelled due to {0}
MultiUserTestTool.ingestSettingsError=Failed to analyze data source due to ingest settings errors
MultiUserTestTool.noContent=Test data source failed to produce content
# {0} - serviceName
MultiUserTestTool.serviceDown=Multi User service is down: {0}
MultiUserTestTool.startupError=Failed to analyze data source due to ingest job startup error
MultiUserTestTool.unableAddFileAsDataSource=Unable to add test file as data source to case
MultiUserTestTool.unableCreatFile=Unable to create a file in case output directory
# {0} - serviceName
MultiUserTestTool.unableToCheckService=Unable to check Multi User service state: {0}
MultiUserTestTool.unableToCreateCase=Unable to create case
MultiUserTestTool.unableToInitializeDatabase=Case database was not successfully initialized

View File

@ -1,5 +1,5 @@
#Updated by build script
#Thu, 30 Sep 2021 19:36:31 -0400
#Fri, 19 Nov 2021 17:01:30 -0500
LBL_splash_window_title=Starting Autopsy
SPLASH_HEIGHT=314
SPLASH_WIDTH=538

View File

@ -1,4 +1,4 @@
#Updated by build script
#Thu, 30 Sep 2021 19:36:31 -0400
#Fri, 19 Nov 2021 17:01:30 -0500
CTL_MainWindow_Title=Autopsy 4.19.2
CTL_MainWindow_Title_No_Project=Autopsy 4.19.2