diff --git a/Testing/script/regression.py b/Testing/script/regression.py index f127695f06..a515a42090 100644 --- a/Testing/script/regression.py +++ b/Testing/script/regression.py @@ -7,455 +7,1035 @@ import subprocess import os.path import shutil import time +import datetime import xml -#import thread -#import wmi -# import win32gui from xml.dom.minidom import parse, parseString - - -# Last modified 7/25/12 @2:30pm -# Usage: ./regression.py [-f FILE] OR [-l CONFIG] [OPTIONS] -# Run the RegressionTest.java file, and compare the result with a gold standard -# When the -f flag is set, this script only tests the image given by FILE. -# An indexed NSRL database is expected at ./input/nsrl.txt-md5.idx, -# and an indexed notable hash database at ./input/notablehashes.txt-md5.idx -# In addition, any keywords to search for must be in ./input/notablekeywords.xml -# When the -l flag is set, the script looks for a config.xml file of the given name -# where images are stored. The above input files can be outsourced to different locations -# from the config.xml. For usage notes please see the example "config.xml" in -# the /script folder. -# Options: -# -r, --rebuild Rebuild the gold standards from the test results for each image -# -i, --ignore Ignores the ./input directory when searching for files -# -u, --unallocated Ignores unallocated space when ingesting. Faster, but less accurate results. -# -d, --delete Disables the deletion of Solr indexing directory generated by Ingest. Uses more disk space.. -# -v, --verbose Prints logged warnings after each ingest -# -e, --exception When followed by a string, will only print out the exceptions that occured that contain the string. Case sensitive. - -hadErrors = False # If any of the tests failed -results = {} # Dictionary in which to store map ({imgname}->errors) -goldDir = "gold" # Directory for gold standards (files should be ./gold/{imgname}/standard.db) -inDir = "input" # Image files, hash dbs, and keywords. -# Results will be in ./output/{datetime}/{imgname}/ -outDir = os.path.join("output",time.strftime("%Y.%m.%d-%H.%M")) - -CommonLog = "" - - - -# def AutopsyCrash(image, ignoreUnalloc, list): - # cwd = wgetcwd() - # x = 20 #seconds to wait between passes - # name = imageName(image, ignoreUnalloc, list) - # TestFolder = os.path.join(cwd, outDir, name, "AutopsyTestCase") - # y = True #default return of 'Crashed' - # i = 0 #number of passes to run - # while(i < 2): - # print "Sleeping background process for %s seconds" %(str(x)) - # time.sleep(x) - # if(os.path.exists(TestFolder)): - # y = False # 'Did not Crash' - # break - # else: - # i+=1 - # if y: - # print "Autopsy failed to initialize properly, restarting from last image..." - # c = wmi.WMI() - # for proc in c.Win32_Process(): - # if proc.name == "NetBeans Platform 7.2": - # proc.kill() - # break - # testAddImageIngest(image, ignoreUnalloc, list) - # return 1 +#-------------------------------------------------------------# +# Parses argv and stores booleans to match command line input # +#-------------------------------------------------------------# +class Args: + def __init__(self): + self.single = False + self.single_file = "" + self.rebuild = False + self.list = False + self.config_file = "" + self.unallocated = False + self.ignore = False + self.delete = False + self.verbose = False + self.exception = False + self.exception_string = "" + def parse(self): + sys.argv.pop(0) + while sys.argv: + arg = sys.argv.pop(0) + if(arg == "-f"): + try: + arg = sys.argv.pop(0) + printout("Running on a single file:") + printout(path_fix(arg) + "\n") + self.single = True + self.single_file = path_fix(arg) + except: + printerror("Error: No single file given.\n") + return False + elif(arg == "-r" or arg == "--rebuild"): + printout("Running in rebuild mode.\n") + self.rebuild = True + elif(arg == "-l" or arg == "--list"): + try: + arg = sys.argv.pop(0) + printout("Running from configuration file:") + printout(arg + "\n") + self.list = True + self.config_file = arg + except: + printerror("Error: No configuration file given.\n") + return False + elif(arg == "-u" or arg == "--unallocated"): + printout("Ignoring unallocated space.\n") + self.unallocated = True + elif(arg == "-i" or arg == "--ignore"): + printout("Ignoring the ./input directory.\n") + self.ignore = True + elif(arg == "-d" or arg == "--delete"): + printout("Deleting Solr index after ingest.\n") + self.delete = True + elif(arg == "-v" or arg == "--verbose"): + printout("Running in verbose mode:") + printout("Printing all thrown exceptions.\n") + self.verbose = True + elif(arg == "-e" or arg == "--exception"): + try: + arg = sys.argv.pop(0) + printout("Running in exception mode: ") + printout("Printing all exceptions with the string '" + arg + "'\n") + self.exception = True + self.exception_string = arg + except: + printerror("Error: No exception string given.") + elif arg == "-h" or arg == "--help": + printout(usage()) + return False + else: + printout(usage()) + return False + # Return the args were sucessfully parsed + return True + + +#-----------------------------------------------------# +# Holds all global variables for each individual test # +#-----------------------------------------------------# +class TestAutopsy: + def __init__(self): + # Paths: + self.input_dir = make_local_path("input") + self.output_dir = "" + self.gold = "gold" + # Logs: + self.antlog_dir = "" + self.common_log = "" + self.csv = "" + self.html_log = "" + # Error tracking + self.printerror = [] + self.printout = [] + self.report_passed = False + # Image info: + self.image_file = "" + self.image_name = "" + # Ant info: + self.known_bad_path = "" + self.keyword_path = "" + self.nsrl_path = "" + # Case info + self.start_date = "" + self.end_date = "" + self.total_test_time = "" + self.total_ingest_time = "" + self.autopsy_version = "" + self.heap_space = "" + self.service_times = "" + + # Set the timeout to something huge + # The entire tester should not timeout before this number in ms + # However it only seems to take about half this time + # And it's very buggy, so we're being careful + self.timeout = 24 * 60 * 60 * 1000 * 1000 + self.ant = [] + + def get_image_name(self, image_file): + path_end = image_file.rfind("/") + path_end2 = image_file.rfind("\\") + ext_start = image_file.rfind(".") + if(ext_start == -1): + name = image_file + if(path_end2 != -1): + name = image_file[path_end2+1:ext_start] + elif(ext_start == -1): + name = image_file[path_end+1:] + elif(path_end == -1): + name = image_file[:ext_start] + elif(path_end!=-1 and ext_start!=-1): + name = image_file[path_end+1:ext_start] + else: + name = image_file[path_end2+1:ext_start] + return name + + def ant_to_string(self): + string = "" + for arg in self.ant: + string += (arg + " ") + return string + + def reset(self): + # Paths: + self.input_dir = make_local_path("input") + self.gold = "gold" + # Logs: + self.antlog_dir = "" + # Error tracking + self.printerror = [] + self.printout = [] + self.report_passed = False + # Image info: + self.image_file = "" + self.image_name = "" + # Ant info: + self.known_bad_path = "" + self.keyword_path = "" + self.nsrl_path = "" + # Case info + self.start_date = "" + self.end_date = "" + self.total_test_time = "" + self.total_ingest_time = "" + self.heap_space = "" + self.service_times = "" + + # Set the timeout to something huge + # The entire tester should not timeout before this number in ms + # However it only seems to take about half this time + # And it's very buggy, so we're being careful + self.timeout = 24 * 60 * 60 * 1000 * 1000 + self.ant = [] + -# Run ingest on all the images in 'input', using notablekeywords.xml and notablehashes.txt-md5.idx -def testAddImageIngest(inFile, ignoreUnalloc, list): - print "================================================" - print "Ingesting Image: " + inFile +#---------------------------------------------------------# +# Holds all database information from querying autopsy.db # +# and standard.db. Initialized when the autopsy.db file # +# is compared to the gold standard. # +#---------------------------------------------------------# +class Database: + def __init__(self): + self.gold_artifacts = [] + self.autopsy_artifacts = [] + self.gold_attributes = 0 + self.autopsy_attributes = 0 + self.gold_objects = 0 + self.autopsy_objects = 0 + self.artifact_comparison = [] + self.attribute_comparison = [] + + def clear(self): + self.gold_artifacts = [] + self.autopsy_artifacts = [] + self.gold_attributes = 0 + self.autopsy_attributes = 0 + self.gold_objects = 0 + self.autopsy_objects = 0 + self.artifact_comparison = [] + self.attribute_comparison = [] + + def get_artifacts_count(self): + total = 0 + for nums in self.autopsy_artifacts: + total += nums + return total + + def get_artifact_comparison(self): + if not self.artifact_comparison: + return "All counts matched" + else: + return "; ".join(self.artifact_comparison) + + def get_attribute_comparison(self): + if not self.attribute_comparison: + return "All counts matched" + list = [] + for error in self.attribute_comparison: + list.append(error) + return ";".join(list) - # Set up case directory path - testCaseName = imageName(inFile, ignoreUnalloc, list) - - if os.path.exists(os.path.join(outDir,testCaseName)): - shutil.rmtree(os.path.join(outDir,testCaseName)) - os.makedirs(os.path.join(outDir,testCaseName)) - if not os.path.exists(inDir): - markError("input dir does not exist", inFile) - cwd = wgetcwd() - testInFile = wabspath(inFile) - global CommonLog - CommonLog = open(os.path.join(cwd, outDir, imageName(inFile, ignoreUnalloc, list), "CommonLog.txt"), "w") #In this function, because it must be after the makedirs - # NEEDS windows path (backslashes) for .E00 images to work - testInFile = testInFile.replace("/", "\\") - if list: - knownBadPath = os.path.join(inDir, "notablehashes.txt-md5.idx") - keywordPath = os.path.join(inDir, "notablekeywords.xml") - nsrlPath = os.path.join(inDir, "nsrl.txt-md5.idx") - else: - knownBadPath = os.path.join(cwd,inDir,"notablehashes.txt-md5.idx") - keywordPath = os.path.join(cwd,inDir,"notablekeywords.xml") - nsrlPath = os.path.join(cwd,inDir,"nsrl.txt-md5.idx") - - knownBadPath = knownBadPath.replace("/", "\\") - keywordPath = keywordPath.replace("/", "\\") - nsrlPath = nsrlPath.replace("/", "\\") +#----------------------------------# +# Main testing functions # +#----------------------------------# - antlog = os.path.join(cwd,outDir,testCaseName,"antlog.txt") - antlog = antlog.replace("/", "\\") - - timeout = 24 * 60 * 60 * 1000 # default of 24 hours, just to be safe - size = getImageSize(inFile,ignoreUnalloc, list) # get the size in bytes - timeout = (size / 1000) / 1000 # convert to MB - timeout = timeout * 1000 # convert sec to ms - timeout = timeout * 1.5 # add a little extra umph - timeout = timeout * 25 # decided we needed A LOT extra to be safe - - # set up ant target - args = ["ant"] - args.append("-q") - args.append("-f") - args.append(os.path.join("..","build.xml")) - args.append("regression-test") - args.append("-l") - args.append(antlog) - args.append("-Dimg_path=" + testInFile) - args.append("-Dknown_bad_path=" + knownBadPath) - args.append("-Dkeyword_path=" + keywordPath) - args.append("-Dnsrl_path=" + nsrlPath) - args.append("-Dgold_path=" + os.path.join(cwd,goldDir).replace("/", "\\")) - args.append("-Dout_path=" + os.path.join(cwd,outDir,testCaseName).replace("/", "\\")) - args.append("-Dignore_unalloc=" + "%s" % ignoreUnalloc) - args.append("-Dtest.timeout=" + str(timeout)) - - # print the ant testing command - print "CMD: " + " ".join(args) - - print "Starting test..." - # thread.start_new_thread(AutopsyCrash(inFile, ignoreUnalloc, list)) - # thread.start_new_thread(subprocess.call(args)) - subprocess.call(args) - - -def getImageSize(inFile, ignoreUnalloc, list): - name = imageName(inFile, ignoreUnalloc, list) - size = 0 - if list: - size += os.path.getsize(inFile) - else: - path = os.path.join(".",inDir) - - for files in os.listdir(path): - filename = os.path.splitext(files)[0] - if filename == name: - filepath = os.path.join(path, files) - if not os.path.samefile(filepath, inFile): - size += os.path.getsize(filepath) - size += os.path.getsize(inFile) - return size - -def testCompareToGold(inFile, ignoreUnalloc, list): - - global CommonLog - cwd = wgetcwd() - - print "-----------------------------------------------" - print "Comparing results for " + inFile + " with gold." - - name = imageName(inFile, ignoreUnalloc, list) - - goldFile = os.path.join("./",goldDir,name,"standard.db") - testFile = os.path.join("./",outDir,name,"AutopsyTestCase","autopsy.db") - if os.path.isfile(goldFile) == False: - markError("No gold standard exists", inFile) - return - if os.path.isfile(testFile) == False: - markError("No database exists", inFile) - return - - # For now, comparing size of blackboard_artifacts, - # blackboard_attributes, - # and tsk_objects. - goldConn = sqlite3.connect(goldFile) - goldC = goldConn.cursor() - testConn = sqlite3.connect(testFile) - testC = testConn.cursor() - - CommonLog.write("Comparing Artifacts: \n\r") - print("Comparing Artifacts: ") - - # Keep range in sync with number of items in ARTIFACT_TYPE enum - for type_id in range(1, 13): - goldC.execute("select count(*) from blackboard_artifacts where artifact_type_id=%d" % type_id) - goldArtifacts = goldC.fetchone()[0] - testC.execute("select count(*) from blackboard_artifacts where artifact_type_id=%d" % type_id) - testArtifacts = testC.fetchone()[0] - if(goldArtifacts != testArtifacts): - errString = str("Artifact counts do not match for type id %d!: " % type_id) - errString += str("Gold: %d, Test: %d" % (goldArtifacts, testArtifacts)) - CommonLog.write(errString + "\n\r") - markError(errString, inFile) - else: - CommonLog.write("Artifact counts for artifact type id %d match!" % type_id + "\n\r") - print("Artifact counts for artifact type id %d match!" % type_id) - - CommonLog.write("Comparing Attributes: \n\r") - print("Comparing Attributes: ") - goldC.execute("select count(*) from blackboard_attributes") - goldAttributes = goldC.fetchone()[0] - testC.execute("select count(*) from blackboard_attributes") - testAttributes = testC.fetchone()[0] - if(goldAttributes != testAttributes): - errString = "Attribute counts do not match!: " - errString += str("Gold: %d, Test: %d" % (goldAttributes, testAttributes)) - CommonLog.write(errString + "\n\r") - markError(errString, inFile) - else: - print("Attribute counts match!") - print("Comparing TSK Objects: ") - goldC.execute("select count(*) from tsk_objects") - goldObjects = goldC.fetchone()[0] - testC.execute("select count(*) from tsk_objects") - testObjects = testC.fetchone()[0] - if(goldObjects != testObjects): - errString = "TSK Object counts do not match!: " - errString += str("Gold: %d, Test: %d" % (goldObjects, testObjects)) - CommonLog.write(errString + "\n\r") - markError(errString, inFile) - else: - CommonLog.write("Object counts match!" + "\n\r") - print("Object counts match!") - -def clearGoldDir(inFile, ignoreUnalloc, list): - - cwd = wgetcwd() - inFile = imageName(inFile, ignoreUnalloc, list) - if os.path.exists(os.path.join(cwd,goldDir,inFile)): - shutil.rmtree(os.path.join(cwd,goldDir,inFile)) - os.makedirs(os.path.join(cwd,goldDir,inFile)) - print "Clearing gold directory: " + os.path.join(cwd,goldDir,inFile) - -def copyTestToGold(inFile, ignoreUnalloc, list): - print "------------------------------------------------" - print "Recreating gold standard from results." - inFile = imageName(inFile, ignoreUnalloc, list) - cwd = wgetcwd() - goldFile = os.path.join("./",goldDir,inFile,"standard.db") - testFile = os.path.join("./",outDir,inFile,"AutopsyTestCase","autopsy.db") - shutil.copy(testFile, goldFile) - print "Recreated gold standards" - -def copyReportToGold(inFile, ignoreUnalloc, list): - print "------------------------------------------------" - print "Recreating gold report from results." - inFile = imageName(inFile, ignoreUnalloc, list) - cwd = wgetcwd() - goldReport = os.path.join("./",goldDir,inFile,"report.html") - testReportPath = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports") - # Because Java adds a timestamp to the report file, one can't call it - # directly, so one must get a list of files in the dir, which are only - # reports, then filter for the .html report - testReport = None - for files in os.listdir(testReportPath): - if files.endswith(".html"): # Get the HTML one - testReport = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports",files) - if testReport is None: - markError("No test report exists", inFile) - return - else: - shutil.copy(testReport, goldReport) - print "Report copied" - -def deleteKeywordFiles(inFile, ignoreUnalloc, list): - print "------------------------------------------------" - print "Deleting Keyword Search files" - inFile = imageName(inFile, ignoreUnalloc, list) - cwd = wgetcwd() - shutil.rmtree(os.path.join("./", outDir, inFile, "AutopsyTestCase", "KeywordSearch")) - -def testCompareReports(inFile, ignoreUnalloc, list): - print "------------------------------------------------" - print "Comparing report to golden report." - name = imageName(inFile, ignoreUnalloc, list) - goldReport = os.path.join("./",goldDir,name,"report.html") - testReportPath = os.path.join("./",outDir,name,"AutopsyTestCase","Reports") - # Because Java adds a timestamp to the report file, one can't call it - # directly, so one must get a list of files in the dir, which are only - # reports, then filter for the .html report - testReport = None - for files in os.listdir(testReportPath): - if files.endswith(".html"): # Get the HTML one - testReport = os.path.join("./",outDir,name,"AutopsyTestCase","Reports",files) - if os.path.isfile(goldReport) == False: - markError("No gold report exists", inFile) - return - if testReport is None: - markError("No test report exists", inFile) - return - # Compare the reports - goldFile = open(goldReport) - testFile = open(testReport) - # Search for