#!/usr/bin/python #en_US.UTF-8 import sys import sqlite3 import re import subprocess import os.path import shutil import time import xml #import thread #import wmi # import win32gui from xml.dom.minidom import parse, parseString # Last modified 7/25/12 @2:30pm # Usage: ./regression.py [-f FILE] OR [-l CONFIG] [OPTIONS] # Run the RegressionTest.java file, and compare the result with a gold standard # When the -f flag is set, this script only tests the image given by FILE. # An indexed NSRL database is expected at ./input/nsrl.txt-md5.idx, # and an indexed notable hash database at ./input/notablehashes.txt-md5.idx # In addition, any keywords to search for must be in ./input/notablekeywords.xml # When the -l flag is set, the script looks for a config.xml file of the given name # where images are stored. The above input files can be outsourced to different locations # from the config.xml. For usage notes please see the example "config.xml" in # the /script folder. # Options: # -r, --rebuild Rebuild the gold standards from the test results for each image # -i, --ignore Ignores the ./input directory when searching for files # -u, --unallocated Ignores unallocated space when ingesting. Faster, but less accurate results. # -d, --delete Disables the deletion of Solr indexing directory generated by Ingest. Uses more disk space.. # -v, --verbose Prints logged warnings after each ingest # -e, --exception When followed by a string, will only print out the exceptions that occured that contain the string. Case sensitive. hadErrors = False # If any of the tests failed results = {} # Dictionary in which to store map ({imgname}->errors) goldDir = "gold" # Directory for gold standards (files should be ./gold/{imgname}/standard.db) inDir = "input" # Image files, hash dbs, and keywords. # Results will be in ./output/{datetime}/{imgname}/ outDir = os.path.join("output",time.strftime("%Y.%m.%d-%H.%M")) CommonLog = "" # def AutopsyCrash(image, ignoreUnalloc, list): # cwd = wgetcwd() # x = 20 #seconds to wait between passes # name = imageName(image, ignoreUnalloc, list) # TestFolder = os.path.join(cwd, outDir, name, "AutopsyTestCase") # y = True #default return of 'Crashed' # i = 0 #number of passes to run # while(i < 2): # print "Sleeping background process for %s seconds" %(str(x)) # time.sleep(x) # if(os.path.exists(TestFolder)): # y = False # 'Did not Crash' # break # else: # i+=1 # if y: # print "Autopsy failed to initialize properly, restarting from last image..." # c = wmi.WMI() # for proc in c.Win32_Process(): # if proc.name == "NetBeans Platform 7.2": # proc.kill() # break # testAddImageIngest(image, ignoreUnalloc, list) # return 1 # Run ingest on all the images in 'input', using notablekeywords.xml and notablehashes.txt-md5.idx def testAddImageIngest(inFile, ignoreUnalloc, list): print "================================================" print "Ingesting Image: " + inFile # Set up case directory path testCaseName = imageName(inFile, ignoreUnalloc, list) if os.path.exists(os.path.join(outDir,testCaseName)): shutil.rmtree(os.path.join(outDir,testCaseName)) os.makedirs(os.path.join(outDir,testCaseName)) if not os.path.exists(inDir): markError("input dir does not exist", inFile) cwd = wgetcwd() testInFile = wabspath(inFile) global CommonLog CommonLog = open(os.path.join(cwd, outDir, imageName(inFile, ignoreUnalloc, list), "CommonLog.txt"), "w") #In this function, because it must be after the makedirs # NEEDS windows path (backslashes) for .E00 images to work testInFile = testInFile.replace("/", "\\") if list: knownBadPath = os.path.join(inDir, "notablehashes.txt-md5.idx") keywordPath = os.path.join(inDir, "notablekeywords.xml") nsrlPath = os.path.join(inDir, "nsrl.txt-md5.idx") else: knownBadPath = os.path.join(cwd,inDir,"notablehashes.txt-md5.idx") keywordPath = os.path.join(cwd,inDir,"notablekeywords.xml") nsrlPath = os.path.join(cwd,inDir,"nsrl.txt-md5.idx") knownBadPath = knownBadPath.replace("/", "\\") keywordPath = keywordPath.replace("/", "\\") nsrlPath = nsrlPath.replace("/", "\\") antlog = os.path.join(cwd,outDir,testCaseName,"antlog.txt") antlog = antlog.replace("/", "\\") timeout = 24 * 60 * 60 * 1000 # default of 24 hours, just to be safe size = getImageSize(inFile,ignoreUnalloc, list) # get the size in bytes timeout = (size / 1000) / 1000 # convert to MB timeout = timeout * 1000 # convert sec to ms timeout = timeout * 1.5 # add a little extra umph timeout = timeout * 25 # decided we needed A LOT extra to be safe # set up ant target args = ["ant"] args.append("-q") args.append("-f") args.append(os.path.join("..","build.xml")) args.append("regression-test") args.append("-l") args.append(antlog) args.append("-Dimg_path=" + testInFile) args.append("-Dknown_bad_path=" + knownBadPath) args.append("-Dkeyword_path=" + keywordPath) args.append("-Dnsrl_path=" + nsrlPath) args.append("-Dgold_path=" + os.path.join(cwd,goldDir).replace("/", "\\")) args.append("-Dout_path=" + os.path.join(cwd,outDir,testCaseName).replace("/", "\\")) args.append("-Dignore_unalloc=" + "%s" % ignoreUnalloc) args.append("-Dtest.timeout=" + str(timeout)) # print the ant testing command print "CMD: " + " ".join(args) print "Starting test..." # thread.start_new_thread(AutopsyCrash(inFile, ignoreUnalloc, list)) # thread.start_new_thread(subprocess.call(args)) subprocess.call(args) def getImageSize(inFile, ignoreUnalloc, list): name = imageName(inFile, ignoreUnalloc, list) size = 0 if list: size += os.path.getsize(inFile) else: path = os.path.join(".",inDir) for files in os.listdir(path): filename = os.path.splitext(files)[0] if filename == name: filepath = os.path.join(path, files) if not os.path.samefile(filepath, inFile): size += os.path.getsize(filepath) size += os.path.getsize(inFile) return size def testCompareToGold(inFile, ignoreUnalloc, list): global CommonLog cwd = wgetcwd() print "-----------------------------------------------" print "Comparing results for " + inFile + " with gold." name = imageName(inFile, ignoreUnalloc, list) goldFile = os.path.join("./",goldDir,name,"standard.db") testFile = os.path.join("./",outDir,name,"AutopsyTestCase","autopsy.db") if os.path.isfile(goldFile) == False: markError("No gold standard exists", inFile) return if os.path.isfile(testFile) == False: markError("No database exists", inFile) return # For now, comparing size of blackboard_artifacts, # blackboard_attributes, # and tsk_objects. goldConn = sqlite3.connect(goldFile) goldC = goldConn.cursor() testConn = sqlite3.connect(testFile) testC = testConn.cursor() CommonLog.write("Comparing Artifacts: \n\r") print("Comparing Artifacts: ") # Keep range in sync with number of items in ARTIFACT_TYPE enum for type_id in range(1, 13): goldC.execute("select count(*) from blackboard_artifacts where artifact_type_id=%d" % type_id) goldArtifacts = goldC.fetchone()[0] testC.execute("select count(*) from blackboard_artifacts where artifact_type_id=%d" % type_id) testArtifacts = testC.fetchone()[0] if(goldArtifacts != testArtifacts): errString = str("Artifact counts do not match for type id %d!: " % type_id) errString += str("Gold: %d, Test: %d" % (goldArtifacts, testArtifacts)) CommonLog.write(errString + "\n\r") markError(errString, inFile) else: CommonLog.write("Artifact counts for artifact type id %d match!" % type_id + "\n\r") print("Artifact counts for artifact type id %d match!" % type_id) CommonLog.write("Comparing Attributes: \n\r") print("Comparing Attributes: ") goldC.execute("select count(*) from blackboard_attributes") goldAttributes = goldC.fetchone()[0] testC.execute("select count(*) from blackboard_attributes") testAttributes = testC.fetchone()[0] if(goldAttributes != testAttributes): errString = "Attribute counts do not match!: " errString += str("Gold: %d, Test: %d" % (goldAttributes, testAttributes)) CommonLog.write(errString + "\n\r") markError(errString, inFile) else: print("Attribute counts match!") print("Comparing TSK Objects: ") goldC.execute("select count(*) from tsk_objects") goldObjects = goldC.fetchone()[0] testC.execute("select count(*) from tsk_objects") testObjects = testC.fetchone()[0] if(goldObjects != testObjects): errString = "TSK Object counts do not match!: " errString += str("Gold: %d, Test: %d" % (goldObjects, testObjects)) CommonLog.write(errString + "\n\r") markError(errString, inFile) else: CommonLog.write("Object counts match!" + "\n\r") print("Object counts match!") def clearGoldDir(inFile, ignoreUnalloc, list): cwd = wgetcwd() inFile = imageName(inFile, ignoreUnalloc, list) if os.path.exists(os.path.join(cwd,goldDir,inFile)): shutil.rmtree(os.path.join(cwd,goldDir,inFile)) os.makedirs(os.path.join(cwd,goldDir,inFile)) print "Clearing gold directory: " + os.path.join(cwd,goldDir,inFile) def copyTestToGold(inFile, ignoreUnalloc, list): print "------------------------------------------------" print "Recreating gold standard from results." inFile = imageName(inFile, ignoreUnalloc, list) cwd = wgetcwd() goldFile = os.path.join("./",goldDir,inFile,"standard.db") testFile = os.path.join("./",outDir,inFile,"AutopsyTestCase","autopsy.db") shutil.copy(testFile, goldFile) print "Recreated gold standards" def copyReportToGold(inFile, ignoreUnalloc, list): print "------------------------------------------------" print "Recreating gold report from results." inFile = imageName(inFile, ignoreUnalloc, list) cwd = wgetcwd() goldReport = os.path.join("./",goldDir,inFile,"report.html") testReportPath = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports") # Because Java adds a timestamp to the report file, one can't call it # directly, so one must get a list of files in the dir, which are only # reports, then filter for the .html report testReport = None for files in os.listdir(testReportPath): if files.endswith(".html"): # Get the HTML one testReport = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports",files) if testReport is None: markError("No test report exists", inFile) return else: shutil.copy(testReport, goldReport) print "Report copied" def deleteKeywordFiles(inFile, ignoreUnalloc, list): print "------------------------------------------------" print "Deleting Keyword Search files" inFile = imageName(inFile, ignoreUnalloc, list) cwd = wgetcwd() shutil.rmtree(os.path.join("./", outDir, inFile, "AutopsyTestCase", "KeywordSearch")) def testCompareReports(inFile, ignoreUnalloc, list): print "------------------------------------------------" print "Comparing report to golden report." name = imageName(inFile, ignoreUnalloc, list) goldReport = os.path.join("./",goldDir,name,"report.html") testReportPath = os.path.join("./",outDir,name,"AutopsyTestCase","Reports") # Because Java adds a timestamp to the report file, one can't call it # directly, so one must get a list of files in the dir, which are only # reports, then filter for the .html report testReport = None for files in os.listdir(testReportPath): if files.endswith(".html"): # Get the HTML one testReport = os.path.join("./",outDir,name,"AutopsyTestCase","Reports",files) if os.path.isfile(goldReport) == False: markError("No gold report exists", inFile) return if testReport is None: markError("No test report exists", inFile) return # Compare the reports goldFile = open(goldReport) testFile = open(testReport) # Search for