#!/usr/bin/python #en_US.UTF-8 import sys import sqlite3 import re import subprocess import os.path import shutil import time import xml from xml.dom.minidom import parse, parseString # Last modified 7/17/12 @5pm # Usage: ./regression.py [-s FILE] OR [-l CONFIG] [OPTIONS] # Run the RegressionTest.java file, and compare the result with a gold standard # When the -i flag is set, this script only tests the image given by FILE. # An indexed NSRL database is expected at ./input/nsrl.txt-md5.idx, # and an indexed notable hash database at ./input/notablehashes.txt-md5.idx # In addition, any keywords to search for must be in ./input/notablekeywords.xml # When the -l flag is set, the script looks for a config.xml file of the given name # where images are stored. For usage notes please see the example "config.xml" in # the /script folder. # Options: # -r, --rebuild Rebuild the gold standards from the test results for each image # -i, --ignore Ignores unallocated space when ingesting. Faster, but less accurate results. hadErrors = False # If any of the tests failed results = {} # Dictionary in which to store map ({imgname}->errors) goldDir = "gold" # Directory for gold standards (files should be ./gold/{imgname}/standard.db) inDir = "input" # Image files, hash dbs, and keywords. # Results will be in ./output/{datetime}/{imgname}/ outDir = os.path.join("output",time.strftime("%Y.%m.%d-%H.%M")) # Run ingest on all the images in 'input', using notablekeywords.xml and notablehashes.txt-md5.idx def testAddImageIngest(inFile, ignoreUnalloc, list): print "================================================" print "Ingesting Image: " + inFile # Set up case directory path testCaseName = imageName(inFile) #check for flags to append to folder name if ignoreUnalloc: testCaseName+="-i" if list: testCaseName+="-l" if os.path.exists(os.path.join(outDir,testCaseName)): shutil.rmtree(os.path.join(outDir,testCaseName)) os.makedirs(os.path.join(outDir,testCaseName)) if not os.path.exists(inDir): markError("input dir does not exist", inFile) cwd = wgetcwd() testInFile = wabspath(inFile) # NEEDS windows path (backslashes) for .E00 images to work testInFile = testInFile.replace("/", "\\") if list: knownBadPath = os.path.join(inDir, "notablehashes.txt-md5.idx") keywordPath = os.path.join(inDir, "notablekeywords.xml") nsrlPath = os.path.join(inDir, "nsrl.txt-md5.idx") else: knownBadPath = os.path.join(cwd,inDir,"notablehashes.txt-md5.idx") keywordPath = os.path.join(cwd,inDir,"notablekeywords.xml") nsrlPath = os.path.join(cwd,inDir,"nsrl.txt-md5.idx") knownBadPath = knownBadPath.replace("/", "\\") keywordPath = keywordPath.replace("/", "\\") nsrlPath = nsrlPath.replace("/", "\\") antlog = os.path.join(cwd,outDir,testCaseName,"antlog.txt") antlog = antlog.replace("/", "\\") timeout = 24 * 60 * 60 * 1000 # default of 24 hours, just to be safe size = getImageSize(inFile, list) # get the size in bytes timeout = (size / 1000) / 1000 # convert to MB timeout = timeout * 1000 # convert sec to ms timeout = timeout * 1.5 # add a little extra umph timeout = timeout * 25 # decided we needed A LOT extra to be safe # set up ant target args = ["ant"] args.append("-q") args.append("-f") args.append(os.path.join("..","build.xml")) args.append("regression-test") args.append("-l") args.append(antlog) args.append("-Dimg_path=" + testInFile) args.append("-Dknown_bad_path=" + knownBadPath) args.append("-Dkeyword_path=" + keywordPath) args.append("-Dnsrl_path=" + nsrlPath) args.append("-Dgold_path=" + os.path.join(cwd,goldDir).replace("/", "\\")) args.append("-Dout_path=" + os.path.join(cwd,outDir,testCaseName).replace("/", "\\")) args.append("-Dignore_unalloc=" + "%s" % ignoreUnalloc) args.append("-Dtest.timeout=" + str(timeout)) # print the ant testing command print "CMD: " + " ".join(args) print "Starting test..." #fnull = open(os.devnull, 'w') #subprocess.call(args, stderr=subprocess.STDOUT, stdout=fnull) #fnull.close(); subprocess.call(args) def getImageSize(inFile, list): name = imageName(inFile) size = 0 if list: size += os.path.getsize(inFile) else: path = os.path.join(".",inDir) for files in os.listdir(path): filename = os.path.splitext(files)[0] if filename == name: filepath = os.path.join(path, files) if not os.path.samefile(filepath, inFile): size += os.path.getsize(filepath) size += os.path.getsize(inFile) return size def testCompareToGold(inFile, ignore): print "-----------------------------------------------" print "Comparing results for " + inFile + " with gold." name = imageName(inFile) if ignore: name += ("-i") cwd = wgetcwd() goldFile = os.path.join("./",goldDir,name,"standard.db") testFile = os.path.join("./",outDir,name,"AutopsyTestCase","autopsy.db") if os.path.isfile(goldFile) == False: markError("No gold standard exists", inFile) return if os.path.isfile(testFile) == False: markError("No database exists", inFile) return # For now, comparing size of blackboard_artifacts, # blackboard_attributes, # and tsk_objects. goldConn = sqlite3.connect(goldFile) goldC = goldConn.cursor() testConn = sqlite3.connect(testFile) testC = testConn.cursor() print("Comparing Artifacts: ") # Keep range in sync with number of items in ARTIFACT_TYPE enum for type_id in range(1, 13): goldC.execute("select count(*) from blackboard_artifacts where artifact_type_id=%d" % type_id) goldArtifacts = goldC.fetchone()[0] testC.execute("select count(*) from blackboard_artifacts where artifact_type_id=%d" % type_id) testArtifacts = testC.fetchone()[0] if(goldArtifacts != testArtifacts): errString = str("Artifact counts do not match for type id %d!: " % type_id) errString += str("Gold: %d, Test: %d" % (goldArtifacts, testArtifacts)) markError(errString, inFile) else: print("Artifact counts for artifact type id %d match!" % type_id) print("Comparing Attributes: ") goldC.execute("select count(*) from blackboard_attributes") goldAttributes = goldC.fetchone()[0] testC.execute("select count(*) from blackboard_attributes") testAttributes = testC.fetchone()[0] if(goldAttributes != testAttributes): errString = "Attribute counts do not match!: " errString += str("Gold: %d, Test: %d" % (goldAttributes, testAttributes)) markError(errString, inFile) else: print("Attribute counts match!") print("Comparing TSK Objects: ") goldC.execute("select count(*) from tsk_objects") goldObjects = goldC.fetchone()[0] testC.execute("select count(*) from tsk_objects") testObjects = testC.fetchone()[0] if(goldObjects != testObjects): errString = "TSK Object counts do not match!: " errString += str("Gold: %d, Test: %d" % (goldObjects, testObjects)) markError(errString, inFile) else: print("Object counts match!") def clearGoldDir(inFile, ignore, list): cwd = wgetcwd() inFile = imageName(inFile) if ignore: inFile += "-i" if list: inFile += "-l" if os.path.exists(os.path.join(cwd,goldDir,inFile)): shutil.rmtree(os.path.join(cwd,goldDir,inFile)) os.makedirs(os.path.join(cwd,goldDir,inFile)) def copyTestToGold(inFile, ignore, list): print "------------------------------------------------" print "Recreating gold standard from results." inFile = imageName(inFile) if ignore: inFile += "-i" if list: inFile += "-l" cwd = wgetcwd() goldFile = os.path.join("./",goldDir,inFile,"standard.db") testFile = os.path.join("./",outDir,inFile,"AutopsyTestCase","autopsy.db") shutil.copy(testFile, goldFile) def copyReportToGold(inFile, ignore, list): print "------------------------------------------------" print "Recreating gold report from results." inFile = imageName(inFile) if ignore: inFile += "-i" if list: inFile += "-l" cwd = wgetcwd() goldReport = os.path.join("./",goldDir,inFile,"report.html") testReportPath = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports") # Because Java adds a timestamp to the report file, one can't call it # directly, so one must get a list of files in the dir, which are only # reports, then filter for the .html report testReport = None for files in os.listdir(testReportPath): if files.endswith(".html"): # Get the HTML one testReport = os.path.join("./",outDir,inFile,"AutopsyTestCase","Reports",files) if testReport is None: markError("No test report exists", inFile) return else: shutil.copy(testReport, goldReport) def testCompareReports(inFile, ignore, list): print "------------------------------------------------" print "Comparing report to golden report." name = imageName(inFile) if ignore: name += "-i" if list: name += "-l" goldReport = os.path.join("./",goldDir,name,"report.html") testReportPath = os.path.join("./",outDir,name,"AutopsyTestCase","Reports") # Because Java adds a timestamp to the report file, one can't call it # directly, so one must get a list of files in the dir, which are only # reports, then filter for the .html report testReport = None for files in os.listdir(testReportPath): if files.endswith(".html"): # Get the HTML one testReport = os.path.join("./",outDir,name,"AutopsyTestCase","Reports",files) if os.path.isfile(goldReport) == False: markError("No gold report exists", inFile) return if testReport is None: markError("No test report exists", inFile) return # Compare the reports goldFile = open(goldReport) testFile = open(testReport) # Search for