Improvements

This commit is contained in:
Eugene Livis 2022-04-20 15:34:33 -04:00
parent e99bb63995
commit 0527d3c62e

View File

@ -41,9 +41,11 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Observable;
import java.util.Set;
import java.util.UUID;
@ -1981,76 +1983,95 @@ final class AutoIngestManager extends Observable implements PropertyChangeListen
AutoIngestCleanup cleanup = cleanups.iterator().next();
sysLogger.log(Level.INFO, "CleanupSchedulingTask - trying to get ingest job lock");
// NOTE1: Make a copy of the completed jobs list. There is no need to hold the jobs
// lock during the entire very lengthy cleanup operation. Jobs lock is also used
// to process incoming messages from other nodes so we don't want to hold it for hours.
// NOTE2: Create a map of cases and data sources, so that we only attempt to clean
// each case once. otherwise if there are many completed jobs for a case
// that has jobs being processed by other AINs, we will be stuck attemping to clean
// that case over and over again, and unable to get locks.
Map<Path, List<Path>> casesToJobsMap = new HashMap<>();
synchronized (jobsLock) {
sysLogger.log(Level.INFO, "CleanupSchedulingTask - got ingest job lock");
String deletedCaseName = "";
for (AutoIngestJob job : completedJobs) {
// do cleanup for each job
Path casePath = job.getCaseDirectoryPath();
Path dsPath = job.getManifest().getDataSourcePath();
boolean success = true;
if (casePath.toFile().exists()) {
sysLogger.log(Level.INFO, "Cleaning up case {0} for job {1}", new Object[]{casePath.toString(), dsPath.toString()});
success = cleanup.runCleanupTask(casePath, AutoIngestCleanup.DeleteOptions.DELETE_INPUT_AND_OUTPUT, new DoNothingProgressIndicator());
} else {
// case directory has been deleted, likely during cleanup of previous completed job.
// make sure data source is deleted as well because we will never be able to run automated
// cleanup on a case directory that has been deleted.
List<Path> list = casesToJobsMap.get(casePath);
if (list == null) {
list = new ArrayList<>();
casesToJobsMap.put(casePath, list);
}
list.add(dsPath);
}
}
sysLogger.log(Level.INFO, "CleanupSchedulingTask - got ingest job lock");
String deletedCaseName = "";
for (Map.Entry<Path, List<Path>> caseData : casesToJobsMap.entrySet()) {
// do cleanup for each case and data source of the case
Path casePath = caseData.getKey();
boolean success = true;
if (casePath.toFile().exists()) {
sysLogger.log(Level.INFO, "Cleaning up case {0} ", casePath.toString());
success = cleanup.runCleanupTask(casePath, AutoIngestCleanup.DeleteOptions.DELETE_INPUT_AND_OUTPUT, new DoNothingProgressIndicator());
} else {
// case directory has been deleted. make sure data source is deleted as well
// because we will never be able to run automated cleanup on a case directory
// that has been deleted.
for (Path dsPath : caseData.getValue()) {
File dsFile = dsPath.toFile();
if (dsFile.exists()) {
sysLogger.log(Level.INFO, "Cleaning up data source {0} for deleted case {1}", new Object[]{dsPath.toString(), casePath.toString()});
if (!FileUtil.deleteFileDir(dsFile)) {
sysLogger.log(Level.SEVERE, String.format("Failed to delete data source file at %s for %s", dsPath.toString(), job.getManifest().getCaseName()));
// there might be some file handles that remain open, try again after next job.
continue;
sysLogger.log(Level.SEVERE, String.format("Failed to delete data source file at %s ", dsPath.toString()));
}
} else {
// the data source has already been deleted
continue;
}
}
}
if (success) {
sysLogger.log(Level.INFO, "Cleanup task successfully completed for job: {0}", dsPath.toString());
} else {
sysLogger.log(Level.WARNING, "Cleanup task failed for job: {0}", dsPath.toString());
continue;
}
if (success) {
sysLogger.log(Level.INFO, "Cleanup task successfully completed for case: {0}", casePath.toString());
} else {
sysLogger.log(Level.WARNING, "Cleanup task failed for case: {0}", casePath.toString());
continue;
}
// verify that the data source and case directory have indeed been deleted
// NOTE: the code below asumes that case directory and all data sources are being deleted
// during cleanup. This may not be the case in future implementations of AutoIngestCleanup
// verify that the data source and case directory have indeed been deleted
for (Path dsPath : caseData.getValue()) {
if (dsPath.toFile().exists()) {
// data source have NOT ben deleted - keep the completed job so that we
// attempt the cleanup again later
// data source have NOT ben deleted
sysLogger.log(Level.SEVERE, "Data source has not been deleted during cleanup: {0}", dsPath.toString());
continue;
}
if (casePath.toFile().exists()) {
// case output directory has NOT ben deleted - keep the completed job so that we
// attempt the cleanup again later
sysLogger.log(Level.SEVERE, "Case directory has not been deleted during cleanup: {0}", casePath.toString());
continue;
}
deletedCaseName = job.getManifest().getCaseName();
}
if (!deletedCaseName.isEmpty()) {
// send message that a at lease one case has been deleted. This message triggers input direcotry
// re-scan on other AINs so only send one message after all cleanup is complete
final String name = deletedCaseName;
new Thread(() -> {
eventPublisher.publishRemotely(new AutoIngestCaseDeletedEvent(LOCAL_HOST_NAME, name,
getSystemUserNameProperty()));
}).start();
// trigger input scan which will update the ZK nodes and tables
scanInputDirsNow();
if (casePath.toFile().exists()) {
// case output directory has NOT ben deleted, or at least some contents of the
// case directory remain
sysLogger.log(Level.SEVERE, "Case directory has not been deleted during cleanup: {0}", casePath.toString());
}
deletedCaseName = casePath.toString();
}
if (!deletedCaseName.isEmpty()) {
// send message that a at lease one case has been deleted. This message triggers input direcotry
// re-scan on other AINs so only send one message after all cleanup is complete. The actual
// case name is not relevant either and is not being tracked on the receiving side.
final String name = deletedCaseName;
new Thread(() -> {
eventPublisher.publishRemotely(new AutoIngestCaseDeletedEvent(LOCAL_HOST_NAME, name,
getSystemUserNameProperty()));
}).start();
// trigger input scan which will update the ZK nodes and tables
scanInputDirsNow();
}
}
} catch (Exception ex) {
sysLogger.log(Level.SEVERE, "Unexpected exception in CleanupSchedulingTask", ex); //NON-NLS
}