mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-15 09:17:42 +00:00
Merge pull request #3579 from rcordovano/2103-ingest-task-sched-concurrency
2103 ingest concurrency bug fixes
This commit is contained in:
commit
c988090e9b
@ -518,7 +518,7 @@ final class DataSourceIngestJob {
|
|||||||
*/
|
*/
|
||||||
if (this.hasFirstStageDataSourceIngestPipeline() && this.hasFileIngestPipeline()) {
|
if (this.hasFirstStageDataSourceIngestPipeline() && this.hasFileIngestPipeline()) {
|
||||||
logger.log(Level.INFO, "Scheduling first stage data source and file level analysis tasks for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
|
logger.log(Level.INFO, "Scheduling first stage data source and file level analysis tasks for {0} (jobId={1})", new Object[]{dataSource.getName(), this.id}); //NON-NLS
|
||||||
DataSourceIngestJob.taskScheduler.scheduleIngestTasks(this, this.files);
|
DataSourceIngestJob.taskScheduler.scheduleIngestTasks(this);
|
||||||
} else if (this.hasFirstStageDataSourceIngestPipeline()) {
|
} else if (this.hasFirstStageDataSourceIngestPipeline()) {
|
||||||
logger.log(Level.INFO, "Scheduling first stage data source level analysis tasks for {0} (jobId={1}), no file level analysis configured", new Object[]{dataSource.getName(), this.id}); //NON-NLS
|
logger.log(Level.INFO, "Scheduling first stage data source level analysis tasks for {0} (jobId={1}), no file level analysis configured", new Object[]{dataSource.getName(), this.id}); //NON-NLS
|
||||||
DataSourceIngestJob.taskScheduler.scheduleDataSourceIngestTask(this);
|
DataSourceIngestJob.taskScheduler.scheduleDataSourceIngestTask(this);
|
||||||
@ -827,7 +827,7 @@ final class DataSourceIngestJob {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds more files from the data source for this job to the job, i.e., adds
|
* Adds more files from the data source for this job to the job, e.g., adds
|
||||||
* extracted or carved files. Not currently supported for the second stage
|
* extracted or carved files. Not currently supported for the second stage
|
||||||
* of the job.
|
* of the job.
|
||||||
*
|
*
|
||||||
@ -835,9 +835,7 @@ final class DataSourceIngestJob {
|
|||||||
*/
|
*/
|
||||||
void addFiles(List<AbstractFile> files) {
|
void addFiles(List<AbstractFile> files) {
|
||||||
if (DataSourceIngestJob.Stages.FIRST == this.stage) {
|
if (DataSourceIngestJob.Stages.FIRST == this.stage) {
|
||||||
for (AbstractFile file : files) {
|
DataSourceIngestJob.taskScheduler.scheduleFileIngestTasks(this, files);
|
||||||
DataSourceIngestJob.taskScheduler.scheduleFastTrackedFileIngestTask(this, file);
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
DataSourceIngestJob.logger.log(Level.SEVERE, "Adding files during second stage not supported"); //NON-NLS
|
DataSourceIngestJob.logger.log(Level.SEVERE, "Adding files during second stage not supported"); //NON-NLS
|
||||||
}
|
}
|
||||||
|
@ -121,7 +121,7 @@ public class IngestManager {
|
|||||||
private final AtomicLong nextIngestManagerTaskId = new AtomicLong(0L);
|
private final AtomicLong nextIngestManagerTaskId = new AtomicLong(0L);
|
||||||
private final ExecutorService startIngestJobsExecutor = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("IM-start-ingest-jobs-%d").build()); //NON-NLS;
|
private final ExecutorService startIngestJobsExecutor = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("IM-start-ingest-jobs-%d").build()); //NON-NLS;
|
||||||
private final Map<Long, Future<Void>> startIngestJobFutures = new ConcurrentHashMap<>();
|
private final Map<Long, Future<Void>> startIngestJobFutures = new ConcurrentHashMap<>();
|
||||||
private final Map<Long, IngestJob> ingestJobsById = new ConcurrentHashMap<>();
|
private final Map<Long, IngestJob> ingestJobsById = new HashMap<>();
|
||||||
private final ExecutorService dataSourceLevelIngestJobTasksExecutor = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("IM-data-source-ingest-%d").build()); //NON-NLS;
|
private final ExecutorService dataSourceLevelIngestJobTasksExecutor = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("IM-data-source-ingest-%d").build()); //NON-NLS;
|
||||||
private final ExecutorService fileLevelIngestJobTasksExecutor;
|
private final ExecutorService fileLevelIngestJobTasksExecutor;
|
||||||
private final ExecutorService eventPublishingExecutor = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("IM-ingest-events-%d").build()); //NON-NLS;
|
private final ExecutorService eventPublishingExecutor = Executors.newSingleThreadExecutor(new ThreadFactoryBuilder().setNameFormat("IM-ingest-events-%d").build()); //NON-NLS;
|
||||||
@ -399,13 +399,17 @@ public class IngestManager {
|
|||||||
ingestMonitor.start();
|
ingestMonitor.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
ingestJobsById.put(job.getId(), job);
|
synchronized (ingestJobsById) {
|
||||||
|
ingestJobsById.put(job.getId(), job);
|
||||||
|
}
|
||||||
errors = job.start();
|
errors = job.start();
|
||||||
if (errors.isEmpty()) {
|
if (errors.isEmpty()) {
|
||||||
this.fireIngestJobStarted(job.getId());
|
this.fireIngestJobStarted(job.getId());
|
||||||
IngestManager.logger.log(Level.INFO, "Ingest job {0} started", job.getId()); //NON-NLS
|
IngestManager.logger.log(Level.INFO, "Ingest job {0} started", job.getId()); //NON-NLS
|
||||||
} else {
|
} else {
|
||||||
this.ingestJobsById.remove(job.getId());
|
synchronized (ingestJobsById) {
|
||||||
|
this.ingestJobsById.remove(job.getId());
|
||||||
|
}
|
||||||
for (IngestModuleError error : errors) {
|
for (IngestModuleError error : errors) {
|
||||||
logger.log(Level.SEVERE, String.format("Error starting %s ingest module for job %d", error.getModuleDisplayName(), job.getId()), error.getThrowable()); //NON-NLS
|
logger.log(Level.SEVERE, String.format("Error starting %s ingest module for job %d", error.getModuleDisplayName(), job.getId()), error.getThrowable()); //NON-NLS
|
||||||
}
|
}
|
||||||
@ -438,7 +442,9 @@ public class IngestManager {
|
|||||||
*/
|
*/
|
||||||
void finishIngestJob(IngestJob job) {
|
void finishIngestJob(IngestJob job) {
|
||||||
long jobId = job.getId();
|
long jobId = job.getId();
|
||||||
ingestJobsById.remove(jobId);
|
synchronized (ingestJobsById) {
|
||||||
|
ingestJobsById.remove(jobId);
|
||||||
|
}
|
||||||
if (!job.isCancelled()) {
|
if (!job.isCancelled()) {
|
||||||
IngestManager.logger.log(Level.INFO, "Ingest job {0} completed", jobId); //NON-NLS
|
IngestManager.logger.log(Level.INFO, "Ingest job {0} completed", jobId); //NON-NLS
|
||||||
fireIngestJobCompleted(jobId);
|
fireIngestJobCompleted(jobId);
|
||||||
@ -455,7 +461,9 @@ public class IngestManager {
|
|||||||
* @return True or false.
|
* @return True or false.
|
||||||
*/
|
*/
|
||||||
public boolean isIngestRunning() {
|
public boolean isIngestRunning() {
|
||||||
return !ingestJobsById.isEmpty();
|
synchronized (ingestJobsById) {
|
||||||
|
return !ingestJobsById.isEmpty();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -467,9 +475,11 @@ public class IngestManager {
|
|||||||
startIngestJobFutures.values().forEach((handle) -> {
|
startIngestJobFutures.values().forEach((handle) -> {
|
||||||
handle.cancel(true);
|
handle.cancel(true);
|
||||||
});
|
});
|
||||||
this.ingestJobsById.values().forEach((job) -> {
|
synchronized (ingestJobsById) {
|
||||||
job.cancel(reason);
|
this.ingestJobsById.values().forEach((job) -> {
|
||||||
});
|
job.cancel(reason);
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -770,9 +780,11 @@ public class IngestManager {
|
|||||||
*/
|
*/
|
||||||
List<DataSourceIngestJob.Snapshot> getIngestJobSnapshots() {
|
List<DataSourceIngestJob.Snapshot> getIngestJobSnapshots() {
|
||||||
List<DataSourceIngestJob.Snapshot> snapShots = new ArrayList<>();
|
List<DataSourceIngestJob.Snapshot> snapShots = new ArrayList<>();
|
||||||
ingestJobsById.values().forEach((job) -> {
|
synchronized (ingestJobsById) {
|
||||||
snapShots.addAll(job.getDataSourceIngestJobSnapshots());
|
ingestJobsById.values().forEach((job) -> {
|
||||||
});
|
snapShots.addAll(job.getDataSourceIngestJobSnapshots());
|
||||||
|
});
|
||||||
|
}
|
||||||
return snapShots;
|
return snapShots;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -808,7 +820,9 @@ public class IngestManager {
|
|||||||
public Void call() {
|
public Void call() {
|
||||||
try {
|
try {
|
||||||
if (Thread.currentThread().isInterrupted()) {
|
if (Thread.currentThread().isInterrupted()) {
|
||||||
ingestJobsById.remove(job.getId());
|
synchronized (ingestJobsById) {
|
||||||
|
ingestJobsById.remove(job.getId());
|
||||||
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,12 +21,13 @@ package org.sleuthkit.autopsy.ingest;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashSet;
|
import java.util.Deque;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
import java.util.concurrent.BlockingDeque;
|
import java.util.concurrent.BlockingDeque;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
import java.util.concurrent.LinkedBlockingDeque;
|
import java.util.concurrent.LinkedBlockingDeque;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
@ -40,64 +41,20 @@ import org.sleuthkit.datamodel.TskCoreException;
|
|||||||
import org.sleuthkit.datamodel.TskData;
|
import org.sleuthkit.datamodel.TskData;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates ingest tasks for ingest jobs, queuing the tasks in priority order for
|
* Creates ingest tasks for data source ingest jobs, queueing the tasks in
|
||||||
* execution by the ingest manager's ingest threads.
|
* priority order for execution by the ingest manager's ingest threads.
|
||||||
*/
|
*/
|
||||||
final class IngestTasksScheduler {
|
final class IngestTasksScheduler {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(IngestTasksScheduler.class.getName());
|
|
||||||
private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue();
|
private static final int FAT_NTFS_FLAGS = TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT12.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT16.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_FAT32.getValue() | TskData.TSK_FS_TYPE_ENUM.TSK_FS_TYPE_NTFS.getValue();
|
||||||
|
private static final Logger logger = Logger.getLogger(IngestTasksScheduler.class.getName());
|
||||||
private static IngestTasksScheduler instance;
|
private static IngestTasksScheduler instance;
|
||||||
|
private final DataSourceIngestTaskQueue dataSourceTaskQueueForIngestThreads;
|
||||||
/**
|
private final List<DataSourceIngestTask> queuedAndRunningDataSourceTasks;
|
||||||
* Scheduling of data source ingest tasks is accomplished by putting them in
|
private final TreeSet<FileIngestTask> rootFileTaskQueue;
|
||||||
* a FIFO queue to be consumed by the ingest threads, so the queue is
|
private final Deque<FileIngestTask> directoryFileTaskQueue;
|
||||||
* wrapped in a "dispenser" that implements the IngestTaskQueue interface
|
private final FileIngestTaskQueue fileTaskQueueForIngestThreads;
|
||||||
* and is exposed via a getter method.
|
private final List<FileIngestTask> queuedAndRunningFileTasks;
|
||||||
*/
|
|
||||||
private final LinkedBlockingQueue<DataSourceIngestTask> pendingDataSourceTasks;
|
|
||||||
private final DataSourceIngestTaskQueue dataSourceTasksDispenser;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Scheduling of file ingest tasks is accomplished by "shuffling" them
|
|
||||||
* through a sequence of internal queues that allows for the interleaving of
|
|
||||||
* tasks from different ingest jobs based on priority. These scheduling
|
|
||||||
* queues are:
|
|
||||||
*
|
|
||||||
* 1. Root directory tasks (priority queue)
|
|
||||||
*
|
|
||||||
* 2. Directory tasks (FIFO queue)
|
|
||||||
*
|
|
||||||
* 3. Pending file tasks (LIFO queue).
|
|
||||||
*
|
|
||||||
* The pending file tasks queue is LIFO to handle large numbers of files
|
|
||||||
* extracted from archive files. At least one image has been processed that
|
|
||||||
* had a folder full of archive files. The queue grew to have thousands of
|
|
||||||
* entries, as each successive archive file was expanded, so now extracted
|
|
||||||
* files get added to the front of the queue so that in such a scenario they
|
|
||||||
* would be processed before the expansion of the next archive file.
|
|
||||||
*
|
|
||||||
* Tasks in the pending file tasks queue are ready to be consumed by the
|
|
||||||
* ingest threads, so the queue is wrapped in a "dispenser" that implements
|
|
||||||
* the IngestTaskQueue interface and is exposed via a getter method.
|
|
||||||
*/
|
|
||||||
private final TreeSet<FileIngestTask> rootDirectoryTasks;
|
|
||||||
private final List<FileIngestTask> directoryTasks;
|
|
||||||
private final BlockingDeque<FileIngestTask> pendingFileTasks;
|
|
||||||
private final FileIngestTaskQueue fileTasksDispenser;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The ingest tasks scheduler allows ingest jobs to query it to see if there
|
|
||||||
* are any tasks in progress for the job. To make this possible, the ingest
|
|
||||||
* tasks scheduler needs to keep track not only of the tasks in its queues,
|
|
||||||
* but also of the tasks that have been handed out for processing by the
|
|
||||||
* ingest threads. Therefore all ingest tasks are added to this list when
|
|
||||||
* they are created and are not removed when an ingest thread takes an
|
|
||||||
* ingest task. Instead, the ingest thread calls back into the scheduler
|
|
||||||
* when the task is completed, at which time the task will be removed from
|
|
||||||
* this list.
|
|
||||||
*/
|
|
||||||
private final Set<IngestTask> tasksInProgress;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the ingest tasks scheduler singleton.
|
* Gets the ingest tasks scheduler singleton.
|
||||||
@ -113,52 +70,53 @@ final class IngestTasksScheduler {
|
|||||||
* Constructs an ingest tasks scheduler.
|
* Constructs an ingest tasks scheduler.
|
||||||
*/
|
*/
|
||||||
private IngestTasksScheduler() {
|
private IngestTasksScheduler() {
|
||||||
this.pendingDataSourceTasks = new LinkedBlockingQueue<>();
|
this.queuedAndRunningDataSourceTasks = new LinkedList<>();
|
||||||
this.dataSourceTasksDispenser = new DataSourceIngestTaskQueue();
|
this.dataSourceTaskQueueForIngestThreads = new DataSourceIngestTaskQueue();
|
||||||
this.rootDirectoryTasks = new TreeSet<>(new RootDirectoryTaskComparator());
|
this.rootFileTaskQueue = new TreeSet<>(new RootDirectoryTaskComparator());
|
||||||
this.directoryTasks = new ArrayList<>();
|
this.directoryFileTaskQueue = new LinkedList<>();
|
||||||
this.pendingFileTasks = new LinkedBlockingDeque<>();
|
this.queuedAndRunningFileTasks = new LinkedList<>();
|
||||||
this.fileTasksDispenser = new FileIngestTaskQueue();
|
this.fileTaskQueueForIngestThreads = new FileIngestTaskQueue();
|
||||||
this.tasksInProgress = new HashSet<>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets this ingest task scheduler's implementation of the IngestTaskQueue
|
* Gets the data source level ingest tasks queue. This queue is a blocking
|
||||||
* interface for data source ingest tasks.
|
* queue intended for use by the ingest manager's data source ingest
|
||||||
|
* threads.
|
||||||
*
|
*
|
||||||
* @return The data source ingest tasks queue.
|
* @return The queue.
|
||||||
*/
|
*/
|
||||||
IngestTaskQueue getDataSourceIngestTaskQueue() {
|
IngestTaskQueue getDataSourceIngestTaskQueue() {
|
||||||
return this.dataSourceTasksDispenser;
|
return this.dataSourceTaskQueueForIngestThreads;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets this ingest task scheduler's implementation of the IngestTaskQueue
|
* Gets the file level ingest tasks queue. This queue is a blocking queue
|
||||||
* interface for file ingest tasks.
|
* intended for use by the ingest manager's file ingest threads.
|
||||||
*
|
*
|
||||||
* @return The file ingest tasks queue.
|
* @return The queue.
|
||||||
*/
|
*/
|
||||||
IngestTaskQueue getFileIngestTaskQueue() {
|
IngestTaskQueue getFileIngestTaskQueue() {
|
||||||
return this.fileTasksDispenser;
|
return this.fileTaskQueueForIngestThreads;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedules a data source level ingest task and file level ingest tasks for
|
* Schedules a data source level ingest task and file level ingest tasks for
|
||||||
* an ingest job. Either all of the files in the data source or a given
|
* a data source ingest job.
|
||||||
* subset of the files will be scheduled.
|
|
||||||
*
|
*
|
||||||
* @param job The data source ingest job.
|
* @param job The data source ingest job.
|
||||||
* @param files A subset of the files for the data source.
|
|
||||||
*/
|
*/
|
||||||
synchronized void scheduleIngestTasks(DataSourceIngestJob job, List<AbstractFile> files) {
|
synchronized void scheduleIngestTasks(DataSourceIngestJob job) {
|
||||||
if (!job.isCancelled()) {
|
if (!job.isCancelled()) {
|
||||||
// Scheduling of both a data source ingest task and file ingest tasks
|
/*
|
||||||
// for a job must be an atomic operation. Otherwise, the data source
|
* Scheduling of both the data source ingest task and the initial
|
||||||
// task might be completed before the file tasks are scheduled,
|
* file ingest tasks for a job must be an atomic operation.
|
||||||
// resulting in a potential false positive when another thread checks
|
* Otherwise, the data source task might be completed before the
|
||||||
// whether or not all the tasks for the job are completed.
|
* file tasks are scheduled, resulting in a potential false positive
|
||||||
|
* when another thread checks whether or not all the tasks for the
|
||||||
|
* job are completed.
|
||||||
|
*/
|
||||||
this.scheduleDataSourceIngestTask(job);
|
this.scheduleDataSourceIngestTask(job);
|
||||||
this.scheduleFileIngestTasks(job, files);
|
this.scheduleFileIngestTasks(job);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -170,41 +128,29 @@ final class IngestTasksScheduler {
|
|||||||
synchronized void scheduleDataSourceIngestTask(DataSourceIngestJob job) {
|
synchronized void scheduleDataSourceIngestTask(DataSourceIngestJob job) {
|
||||||
if (!job.isCancelled()) {
|
if (!job.isCancelled()) {
|
||||||
DataSourceIngestTask task = new DataSourceIngestTask(job);
|
DataSourceIngestTask task = new DataSourceIngestTask(job);
|
||||||
this.tasksInProgress.add(task);
|
this.queuedAndRunningDataSourceTasks.add(task);
|
||||||
try {
|
try {
|
||||||
this.pendingDataSourceTasks.put(task);
|
this.dataSourceTaskQueueForIngestThreads.add(task);
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
/**
|
IngestTasksScheduler.logger.log(Level.INFO, "Ingest cancelled while a data source ingest thread was blocked on a full queue", ex);
|
||||||
* The current thread was interrupted while blocked on a full
|
this.queuedAndRunningDataSourceTasks.remove(task);
|
||||||
* queue. Discard the task and reset the interrupted flag.
|
|
||||||
*/
|
|
||||||
this.tasksInProgress.remove(task);
|
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedules file level ingest tasks for a data source ingest job. Either
|
* Schedules file level ingest tasks for a data source ingest job.
|
||||||
* all of the files in the data source or a given subset of the files will
|
|
||||||
* be scheduled.
|
|
||||||
*
|
*
|
||||||
* @param job The data source ingest job.
|
* @param job The data source ingest job.
|
||||||
* @param files A subset of the files for the data source.
|
|
||||||
*/
|
*/
|
||||||
synchronized void scheduleFileIngestTasks(DataSourceIngestJob job, List<AbstractFile> files) {
|
synchronized void scheduleFileIngestTasks(DataSourceIngestJob job) {
|
||||||
if (!job.isCancelled()) {
|
if (!job.isCancelled()) {
|
||||||
List<AbstractFile> candidateFiles = new ArrayList<>();
|
List<AbstractFile> candidateFiles = getTopLevelFiles(job.getDataSource());
|
||||||
if (files.isEmpty()) {
|
for (AbstractFile file : candidateFiles) {
|
||||||
getTopLevelFiles(job.getDataSource(), candidateFiles);
|
FileIngestTask task = new FileIngestTask(job, file);
|
||||||
} else {
|
|
||||||
candidateFiles.addAll(files);
|
|
||||||
}
|
|
||||||
for (AbstractFile firstLevelFile : candidateFiles) {
|
|
||||||
FileIngestTask task = new FileIngestTask(job, firstLevelFile);
|
|
||||||
if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
|
if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
|
||||||
this.tasksInProgress.add(task);
|
this.rootFileTaskQueue.add(task);
|
||||||
this.rootDirectoryTasks.add(task);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
shuffleFileTaskQueues();
|
shuffleFileTaskQueues();
|
||||||
@ -212,73 +158,119 @@ final class IngestTasksScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Schedules a file ingest task for a data source ingest job. The task that
|
* Schedules file level ingest tasks for a subset of the files for a data
|
||||||
* is created is added directly to the pending file tasks queues, i.e., it
|
* source ingest job.
|
||||||
* is "fast tracked."
|
|
||||||
*
|
*
|
||||||
* @param job The data source ingest job.
|
* @param job The data source ingest job.
|
||||||
* @param file A file.
|
* @param files A subset of the files for the data source.
|
||||||
*/
|
*/
|
||||||
synchronized void scheduleFastTrackedFileIngestTask(DataSourceIngestJob job, AbstractFile file) {
|
synchronized void scheduleFileIngestTasks(DataSourceIngestJob job, Collection<AbstractFile> files) {
|
||||||
if (!job.isCancelled()) {
|
if (!job.isCancelled()) {
|
||||||
FileIngestTask task = new FileIngestTask(job, file);
|
List<FileIngestTask> newTasksForFileIngestThreads = new LinkedList<>();
|
||||||
if (IngestTasksScheduler.shouldEnqueueFileTask(task)) {
|
for (AbstractFile file : files) {
|
||||||
this.tasksInProgress.add(task);
|
/*
|
||||||
addToPendingFileTasksQueue(task);
|
* Put the file directly into the queue for the file ingest
|
||||||
|
* threads, if it passes the filter for the job. The file is
|
||||||
|
* added to the queue for the ingest threads BEFORE the other
|
||||||
|
* queued tasks because the primary use case for this method is
|
||||||
|
* adding derived files from a higher priority task that
|
||||||
|
* preceded the tasks currently in the queue.
|
||||||
|
*/
|
||||||
|
FileIngestTask task = new FileIngestTask(job, file);
|
||||||
|
if (shouldEnqueueFileTask(task)) {
|
||||||
|
newTasksForFileIngestThreads.add(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the file or directory that was just queued has children,
|
||||||
|
* try to queue tasks for the children. Each child task will go
|
||||||
|
* into either the directory queue if it is a directory, or
|
||||||
|
* directly into the queue for the file ingest threads, if it
|
||||||
|
* passes the filter for the job.
|
||||||
|
*/
|
||||||
|
try {
|
||||||
|
for (Content child : file.getChildren()) {
|
||||||
|
if (child instanceof AbstractFile) {
|
||||||
|
AbstractFile childFile = (AbstractFile) child;
|
||||||
|
FileIngestTask childTask = new FileIngestTask(job, childFile);
|
||||||
|
if (childFile.hasChildren()) {
|
||||||
|
this.directoryFileTaskQueue.add(childTask);
|
||||||
|
} else if (shouldEnqueueFileTask(childTask)) {
|
||||||
|
newTasksForFileIngestThreads.add(task);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (TskCoreException ex) {
|
||||||
|
logger.log(Level.SEVERE, String.format("Error getting the children of %s (objId=%d)", file.getName(), file.getId()), ex); //NON-NLS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The files are added to the queue for the ingest threads BEFORE
|
||||||
|
* the other queued tasks because the primary use case for this
|
||||||
|
* method is adding derived files from a higher priority task that
|
||||||
|
* preceded the tasks currently in the queue.
|
||||||
|
*/
|
||||||
|
for (FileIngestTask newTask : newTasksForFileIngestThreads) {
|
||||||
|
try {
|
||||||
|
this.queuedAndRunningFileTasks.add(newTask);
|
||||||
|
this.fileTaskQueueForIngestThreads.addFirst(newTask);
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
this.queuedAndRunningFileTasks.remove(newTask);
|
||||||
|
IngestTasksScheduler.logger.log(Level.INFO, "Ingest cancelled while blocked on a full file ingest threads queue", ex);
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows an ingest thread to notify this ingest task scheduler that a task
|
* Allows an ingest thread to notify this ingest task scheduler that a data
|
||||||
* has been completed.
|
* source level task has been completed.
|
||||||
*
|
*
|
||||||
* @param task The completed task.
|
* @param task The completed task.
|
||||||
*/
|
*/
|
||||||
synchronized void notifyTaskCompleted(IngestTask task) {
|
synchronized void notifyTaskCompleted(DataSourceIngestTask task) {
|
||||||
tasksInProgress.remove(task);
|
this.queuedAndRunningDataSourceTasks.remove(task);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Queries the task scheduler to determine whether or not all current ingest
|
* Allows an ingest thread to notify this ingest task scheduler that a file
|
||||||
* tasks for an ingest job are completed.
|
* level task has been completed.
|
||||||
*
|
*
|
||||||
* @param job The job for which the query is to be performed.
|
* @param task The completed task.
|
||||||
|
*/
|
||||||
|
synchronized void notifyTaskCompleted(FileIngestTask task) {
|
||||||
|
this.queuedAndRunningFileTasks.remove(task);
|
||||||
|
shuffleFileTaskQueues();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Queries the task scheduler to determine whether or not all of the ingest
|
||||||
|
* tasks for a data source ingest job have been completed.
|
||||||
|
*
|
||||||
|
* @param job The data source ingest job.
|
||||||
*
|
*
|
||||||
* @return True or false.
|
* @return True or false.
|
||||||
*/
|
*/
|
||||||
synchronized boolean tasksForJobAreCompleted(DataSourceIngestJob job) {
|
synchronized boolean tasksForJobAreCompleted(DataSourceIngestJob job) {
|
||||||
for (IngestTask task : tasksInProgress) {
|
return !hasTasksForJob(this.queuedAndRunningDataSourceTasks, job)
|
||||||
if (task.getIngestJob().getId() == job.getId()) {
|
&& !hasTasksForJob(this.rootFileTaskQueue, job)
|
||||||
return false;
|
&& !hasTasksForJob(this.directoryFileTaskQueue, job)
|
||||||
}
|
&& !hasTasksForJob(this.queuedAndRunningFileTasks, job);
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clears the "upstream" task scheduling queues for an ingest job, but does
|
* Clears the "upstream" task scheduling queues for a data source ingest
|
||||||
* nothing about tasks that have already been shuffled into the concurrently
|
* job, but does nothing about tasks that have already been moved into the
|
||||||
* accessed blocking queues shared with the ingest threads. Note that tasks
|
* queue that is consumed by the file ingest threads.
|
||||||
* in the "downstream" queues or already taken by the ingest threads will be
|
|
||||||
* flushed out when the ingest threads call back with their task completed
|
|
||||||
* notifications.
|
|
||||||
*
|
*
|
||||||
* @param job The job for which the tasks are to to canceled.
|
* @param job The data source ingest job.
|
||||||
*/
|
*/
|
||||||
synchronized void cancelPendingTasksForIngestJob(DataSourceIngestJob job) {
|
synchronized void cancelPendingTasksForIngestJob(DataSourceIngestJob job) {
|
||||||
/**
|
this.removeTasksForJob(this.rootFileTaskQueue, job);
|
||||||
* This code should not flush the blocking queues that are concurrently
|
this.removeTasksForJob(this.directoryFileTaskQueue, job);
|
||||||
* accessed by the ingest threads. This is because the "lock striping"
|
|
||||||
* and "weakly consistent" iterators of these collections make it so
|
|
||||||
* that this code could have a different view of the queues than the
|
|
||||||
* ingest threads. It does clean out the directory level tasks before
|
|
||||||
* they are exploded into file tasks.
|
|
||||||
*/
|
|
||||||
long jobId = job.getId();
|
|
||||||
this.removeTasksForJob(this.rootDirectoryTasks, jobId);
|
|
||||||
this.removeTasksForJob(this.directoryTasks, jobId);
|
|
||||||
this.shuffleFileTaskQueues();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -286,10 +278,12 @@ final class IngestTasksScheduler {
|
|||||||
* files and virtual directories for a data source. Used to create file
|
* files and virtual directories for a data source. Used to create file
|
||||||
* tasks to put into the root directories queue.
|
* tasks to put into the root directories queue.
|
||||||
*
|
*
|
||||||
* @param dataSource The data source.
|
* @param dataSource The data source.
|
||||||
* @param topLevelFiles The top level files are added to this list.
|
*
|
||||||
|
* @return The top level files.
|
||||||
*/
|
*/
|
||||||
private static void getTopLevelFiles(Content dataSource, List<AbstractFile> topLevelFiles) {
|
private static List<AbstractFile> getTopLevelFiles(Content dataSource) {
|
||||||
|
List<AbstractFile> topLevelFiles = new ArrayList<>();
|
||||||
Collection<AbstractFile> rootObjects = dataSource.accept(new GetRootDirectoryVisitor());
|
Collection<AbstractFile> rootObjects = dataSource.accept(new GetRootDirectoryVisitor());
|
||||||
if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) {
|
if (rootObjects.isEmpty() && dataSource instanceof AbstractFile) {
|
||||||
// The data source is itself a file to be processed.
|
// The data source is itself a file to be processed.
|
||||||
@ -317,74 +311,113 @@ final class IngestTasksScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return topLevelFiles;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* "Shuffles" the file task queues to ensure that there is at least one task
|
* Schedules file ingest tasks for the ingest manager's file ingest threads
|
||||||
* in the pending file ingest tasks queue, as long as there are still file
|
* by "shuffling" them through a sequence of three queues that allows for
|
||||||
* ingest tasks to be performed.
|
* the interleaving of tasks from different data source ingest jobs based on
|
||||||
|
* priority. The sequence of queues is:
|
||||||
|
*
|
||||||
|
* 1. The root file tasks priority queue, which contains file tasks for the
|
||||||
|
* root objects of the data sources that are being analyzed. For example,
|
||||||
|
* the root tasks for a disk image data source are typically the tasks for
|
||||||
|
* the contents of the root directories of the file systems. This queue is a
|
||||||
|
* priority queue that attempts to ensure that user directory content is
|
||||||
|
* analyzed before general file system content. It feeds into the directory
|
||||||
|
* tasks queue.
|
||||||
|
*
|
||||||
|
* 2. The directory file tasks queue, which contains root file tasks
|
||||||
|
* shuffled out of the root tasks queue, plus directory tasks discovered in
|
||||||
|
* the descent from the root tasks to the final leaf tasks in the content
|
||||||
|
* trees that are being analyzed for the data source ingest jobs. This queue
|
||||||
|
* is a FIFO queue. It feeds into the file tasks queue for the ingest
|
||||||
|
* manager's file ingest threads.
|
||||||
|
*
|
||||||
|
* 3. The file tasks queue for the ingest manager's file ingest threads.
|
||||||
|
* This queue is a blocking deque that is FIFO during a shuffle to maintain
|
||||||
|
* task prioritization, but LIFO when adding derived files to it directly
|
||||||
|
* during ingest. The reason for the LIFO additions is to give priority
|
||||||
|
* derived files of priority files.
|
||||||
|
*
|
||||||
|
* There is a fourth collection of file tasks, a "tracking" list, that keeps
|
||||||
|
* track of the file tasks that are either in the tasks queue for the file
|
||||||
|
* ingest threads, or are in the process of being analyzed in a file ingest
|
||||||
|
* thread. This queue is vital to the ingest task scheduler's ability to
|
||||||
|
* determine when all of the ingest tasks for a data source ingest job have
|
||||||
|
* been completed. It is also used to drive this shuffling algorithm -
|
||||||
|
* whenever this list is empty, the two "upstream" queues are "shuffled" to
|
||||||
|
* queue more tasks for the file ingest threads.
|
||||||
*/
|
*/
|
||||||
synchronized private void shuffleFileTaskQueues() {
|
synchronized private void shuffleFileTaskQueues() {
|
||||||
// This is synchronized because it is called both by synchronized
|
List<FileIngestTask> newTasksForFileIngestThreads = new LinkedList<>();
|
||||||
// methods of this ingest scheduler and an unsynchronized method of its
|
while (this.queuedAndRunningFileTasks.isEmpty()) {
|
||||||
// file tasks "dispenser".
|
/*
|
||||||
while (true) {
|
* If the directory file task queue is empty, move the highest
|
||||||
// Loop until either the pending file tasks queue is NOT empty
|
* priority root file task, if there is one, into it. If both the
|
||||||
// or the upstream queues that feed into it ARE empty.
|
* root and the directory file task queues are empty, there is
|
||||||
if (!this.pendingFileTasks.isEmpty()) {
|
* nothing left to shuffle, so exit.
|
||||||
// There are file tasks ready to be consumed, exit.
|
*/
|
||||||
return;
|
if (this.directoryFileTaskQueue.isEmpty()) {
|
||||||
}
|
if (!this.rootFileTaskQueue.isEmpty()) {
|
||||||
if (this.directoryTasks.isEmpty()) {
|
this.directoryFileTaskQueue.add(this.rootFileTaskQueue.pollFirst());
|
||||||
if (this.rootDirectoryTasks.isEmpty()) {
|
|
||||||
// There are no root directory tasks to move into the
|
|
||||||
// directory queue, exit.
|
|
||||||
return;
|
|
||||||
} else {
|
} else {
|
||||||
// Move the next root directory task into the
|
return;
|
||||||
// directories queue. Note that the task was already
|
|
||||||
// added to the tasks in progress list when the task was
|
|
||||||
// created in scheduleFileIngestTasks().
|
|
||||||
this.directoryTasks.add(this.rootDirectoryTasks.pollFirst());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to add the most recently added directory from the
|
/*
|
||||||
// directory tasks queue to the pending file tasks queue.
|
* Try to move the next task from the directory task queue into the
|
||||||
FileIngestTask directoryTask = this.directoryTasks.remove(this.directoryTasks.size() - 1);
|
* queue for the file ingest threads, if it passes the filter for
|
||||||
|
* the job. The file is added to the queue for the ingest threads
|
||||||
|
* AFTER the higher priority tasks that preceded it.
|
||||||
|
*/
|
||||||
|
final FileIngestTask directoryTask = this.directoryFileTaskQueue.pollLast();
|
||||||
if (shouldEnqueueFileTask(directoryTask)) {
|
if (shouldEnqueueFileTask(directoryTask)) {
|
||||||
addToPendingFileTasksQueue(directoryTask);
|
newTasksForFileIngestThreads.add(directoryTask);
|
||||||
} else {
|
this.queuedAndRunningFileTasks.add(directoryTask);
|
||||||
this.tasksInProgress.remove(directoryTask);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the directory contains subdirectories or files, try to
|
/*
|
||||||
// enqueue tasks for them as well.
|
* If the directory (or root level file) that was just queued has
|
||||||
|
* children, try to queue tasks for the children. Each child task
|
||||||
|
* will go into either the directory queue if it is a directory, or
|
||||||
|
* into the queue for the file ingest threads, if it passes the
|
||||||
|
* filter for the job. The file is added to the queue for the ingest
|
||||||
|
* threads AFTER the higher priority tasks that preceded it.
|
||||||
|
*/
|
||||||
final AbstractFile directory = directoryTask.getFile();
|
final AbstractFile directory = directoryTask.getFile();
|
||||||
try {
|
try {
|
||||||
for (Content child : directory.getChildren()) {
|
for (Content child : directory.getChildren()) {
|
||||||
if (child instanceof AbstractFile) {
|
if (child instanceof AbstractFile) {
|
||||||
AbstractFile file = (AbstractFile) child;
|
AbstractFile childFile = (AbstractFile) child;
|
||||||
FileIngestTask childTask = new FileIngestTask(directoryTask.getIngestJob(), file);
|
FileIngestTask childTask = new FileIngestTask(directoryTask.getIngestJob(), childFile);
|
||||||
if (file.hasChildren()) {
|
if (childFile.hasChildren()) {
|
||||||
// Found a subdirectory, put the task in the
|
this.directoryFileTaskQueue.add(childTask);
|
||||||
// pending directory tasks queue. Note the
|
|
||||||
// addition of the task to the tasks in progress
|
|
||||||
// list. This is necessary because this is the
|
|
||||||
// first appearance of this task in the queues.
|
|
||||||
this.tasksInProgress.add(childTask);
|
|
||||||
this.directoryTasks.add(childTask);
|
|
||||||
} else if (shouldEnqueueFileTask(childTask)) {
|
} else if (shouldEnqueueFileTask(childTask)) {
|
||||||
// Found a file, put the task directly into the
|
newTasksForFileIngestThreads.add(childTask);
|
||||||
// pending file tasks queue.
|
this.queuedAndRunningFileTasks.add(childTask);
|
||||||
this.tasksInProgress.add(childTask);
|
|
||||||
addToPendingFileTasksQueue(childTask);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (TskCoreException ex) {
|
} catch (TskCoreException ex) {
|
||||||
String errorMessage = String.format("An error occurred getting the children of %s", directory.getName()); //NON-NLS
|
logger.log(Level.SEVERE, String.format("Error getting the children of %s (objId=%d)", directory.getName(), directory.getId()), ex); //NON-NLS
|
||||||
logger.log(Level.SEVERE, errorMessage, ex);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The files are added to the queue for the ingest threads AFTER the
|
||||||
|
* higher priority tasks that preceded them.
|
||||||
|
*/
|
||||||
|
for (FileIngestTask newTask : newTasksForFileIngestThreads) {
|
||||||
|
try {
|
||||||
|
this.fileTaskQueueForIngestThreads.addFirst(newTask);
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
this.queuedAndRunningFileTasks.remove(newTask);
|
||||||
|
IngestTasksScheduler.logger.log(Level.INFO, "Ingest cancelled while blocked on a full file ingest threads queue", ex);
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -463,44 +496,44 @@ final class IngestTasksScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a file ingest task to the blocking pending tasks queue.
|
* Checks whether or not a collection of ingest tasks includes a task for a
|
||||||
|
* given data source ingest job.
|
||||||
*
|
*
|
||||||
* @param task The task to add.
|
* @param tasks The tasks.
|
||||||
|
* @param job The data source ingest job.
|
||||||
|
*
|
||||||
|
* @return True if there are no tasks for the job, false otherwise.
|
||||||
*/
|
*/
|
||||||
synchronized private void addToPendingFileTasksQueue(FileIngestTask task) {
|
synchronized private boolean hasTasksForJob(Collection<? extends IngestTask> tasks, DataSourceIngestJob job) {
|
||||||
try {
|
long jobId = job.getId();
|
||||||
this.pendingFileTasks.putFirst(task);
|
for (IngestTask task : tasks) {
|
||||||
} catch (InterruptedException ex) {
|
if (task.getIngestJob().getId() == jobId) {
|
||||||
/**
|
return true;
|
||||||
* The current thread was interrupted while blocked on a full queue.
|
}
|
||||||
* Discard the task and reset the interrupted flag.
|
|
||||||
*/
|
|
||||||
this.tasksInProgress.remove(task);
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Removes all of the ingest tasks associated with an ingest job from a
|
* Removes all of the ingest tasks associated with a data source ingest job
|
||||||
* tasks queue. The task is removed from the the tasks in progress list as
|
* from a tasks collection.
|
||||||
* well.
|
|
||||||
*
|
*
|
||||||
* @param taskQueue The queue from which to remove the tasks.
|
* @param tasks The collection from which to remove the tasks.
|
||||||
* @param jobId The id of the job for which the tasks are to be removed.
|
* @param job THe data source ingest job.
|
||||||
*/
|
*/
|
||||||
synchronized private void removeTasksForJob(Collection<? extends IngestTask> taskQueue, long jobId) {
|
synchronized private void removeTasksForJob(Collection<? extends IngestTask> tasks, DataSourceIngestJob job) {
|
||||||
Iterator<? extends IngestTask> iterator = taskQueue.iterator();
|
long jobId = job.getId();
|
||||||
|
Iterator<? extends IngestTask> iterator = tasks.iterator();
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
IngestTask task = iterator.next();
|
IngestTask task = iterator.next();
|
||||||
if (task.getIngestJob().getId() == jobId) {
|
if (task.getIngestJob().getId() == jobId) {
|
||||||
this.tasksInProgress.remove(task);
|
|
||||||
iterator.remove();
|
iterator.remove();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Counts the number of ingest tasks in a task queue for a given job.
|
* Counts the number of ingest tasks in a tasks collection for a given job.
|
||||||
*
|
*
|
||||||
* @param queue The queue for which to count tasks.
|
* @param queue The queue for which to count tasks.
|
||||||
* @param jobId The id of the job for which the tasks are to be counted.
|
* @param jobId The id of the job for which the tasks are to be counted.
|
||||||
@ -511,7 +544,7 @@ final class IngestTasksScheduler {
|
|||||||
Iterator<? extends IngestTask> iterator = queue.iterator();
|
Iterator<? extends IngestTask> iterator = queue.iterator();
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while (iterator.hasNext()) {
|
while (iterator.hasNext()) {
|
||||||
IngestTask task = (IngestTask) iterator.next();
|
IngestTask task = iterator.next();
|
||||||
if (task.getIngestJob().getId() == jobId) {
|
if (task.getIngestJob().getId() == jobId) {
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
@ -549,8 +582,15 @@ final class IngestTasksScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to prioritize file ingest tasks in the root tasks queue so that
|
||||||
|
* user content is processed first.
|
||||||
|
*/
|
||||||
private static class AbstractFilePriority {
|
private static class AbstractFilePriority {
|
||||||
|
|
||||||
|
private AbstractFilePriority() {
|
||||||
|
}
|
||||||
|
|
||||||
enum Priority {
|
enum Priority {
|
||||||
|
|
||||||
LAST, LOW, MEDIUM, HIGH
|
LAST, LOW, MEDIUM, HIGH
|
||||||
@ -642,28 +682,43 @@ final class IngestTasksScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wraps access to pending data source ingest tasks in the interface
|
* A blocking queue of data source ingest tasks for the ingest manager's
|
||||||
* required by the ingest threads.
|
* data source ingest threads.
|
||||||
*/
|
*/
|
||||||
private final class DataSourceIngestTaskQueue implements IngestTaskQueue {
|
private final class DataSourceIngestTaskQueue implements IngestTaskQueue {
|
||||||
|
|
||||||
|
private final BlockingQueue<DataSourceIngestTask> tasks = new LinkedBlockingQueue<>();
|
||||||
|
|
||||||
|
private void add(DataSourceIngestTask task) throws InterruptedException {
|
||||||
|
this.tasks.put(task);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IngestTask getNextTask() throws InterruptedException {
|
public IngestTask getNextTask() throws InterruptedException {
|
||||||
return IngestTasksScheduler.this.pendingDataSourceTasks.take();
|
return tasks.take();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wraps access to pending file ingest tasks in the interface required by
|
* A blocking, LIFO queue of data source ingest tasks for the ingest
|
||||||
* the ingest threads.
|
* manager's data source ingest threads.
|
||||||
*/
|
*/
|
||||||
private final class FileIngestTaskQueue implements IngestTaskQueue {
|
private final class FileIngestTaskQueue implements IngestTaskQueue {
|
||||||
|
|
||||||
|
private final BlockingDeque<FileIngestTask> tasks = new LinkedBlockingDeque<>();
|
||||||
|
|
||||||
|
private void addFirst(FileIngestTask task) throws InterruptedException {
|
||||||
|
this.tasks.putFirst(task);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addLast(FileIngestTask task) throws InterruptedException {
|
||||||
|
this.tasks.putLast(task);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public IngestTask getNextTask() throws InterruptedException {
|
public IngestTask getNextTask() throws InterruptedException {
|
||||||
FileIngestTask task = IngestTasksScheduler.this.pendingFileTasks.takeFirst();
|
return tasks.takeFirst();
|
||||||
shuffleFileTaskQueues();
|
|
||||||
return task;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -674,10 +729,10 @@ final class IngestTasksScheduler {
|
|||||||
class IngestJobTasksSnapshot {
|
class IngestJobTasksSnapshot {
|
||||||
|
|
||||||
private final long jobId;
|
private final long jobId;
|
||||||
|
private final long dsQueueSize;
|
||||||
private final long rootQueueSize;
|
private final long rootQueueSize;
|
||||||
private final long dirQueueSize;
|
private final long dirQueueSize;
|
||||||
private final long fileQueueSize;
|
private final long fileQueueSize;
|
||||||
private final long dsQueueSize;
|
|
||||||
private final long runningListSize;
|
private final long runningListSize;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -687,11 +742,11 @@ final class IngestTasksScheduler {
|
|||||||
*/
|
*/
|
||||||
IngestJobTasksSnapshot(long jobId) {
|
IngestJobTasksSnapshot(long jobId) {
|
||||||
this.jobId = jobId;
|
this.jobId = jobId;
|
||||||
this.rootQueueSize = countTasksForJob(IngestTasksScheduler.this.rootDirectoryTasks, jobId);
|
this.rootQueueSize = countTasksForJob(IngestTasksScheduler.this.rootFileTaskQueue, jobId);
|
||||||
this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryTasks, jobId);
|
this.dirQueueSize = countTasksForJob(IngestTasksScheduler.this.directoryFileTaskQueue, jobId);
|
||||||
this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingFileTasks, jobId);
|
this.fileQueueSize = countTasksForJob(IngestTasksScheduler.this.fileTaskQueueForIngestThreads.tasks, jobId);
|
||||||
this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.pendingDataSourceTasks, jobId);
|
this.dsQueueSize = countTasksForJob(IngestTasksScheduler.this.dataSourceTaskQueueForIngestThreads.tasks, jobId);
|
||||||
this.runningListSize = countTasksForJob(IngestTasksScheduler.this.tasksInProgress, jobId);
|
this.runningListSize = countTasksForJob(IngestTasksScheduler.this.queuedAndRunningDataSourceTasks, jobId) + countTasksForJob(IngestTasksScheduler.this.queuedAndRunningFileTasks, jobId);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
Loading…
x
Reference in New Issue
Block a user