From 0a5107ddd51b6160eb5e33f7afb8a9a79aa83788 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Fri, 16 Apr 2021 12:35:37 -0400
Subject: [PATCH 01/30] get table and column definitions for database

---
 test/script/dbaccesstest.py |  45 ++
 test/script/tskdbdiff2.py   | 969 ++++++++++++++++++++++++++++++++++++
 2 files changed, 1014 insertions(+)
 create mode 100644 test/script/dbaccesstest.py
 create mode 100644 test/script/tskdbdiff2.py

diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py
new file mode 100644
index 0000000000..cfe026395e
--- /dev/null
+++ b/test/script/dbaccesstest.py
@@ -0,0 +1,45 @@
+from typing import List, Dict
+
+import psycopg2
+import sqlite3
+
+
+def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
+    cur = conn.cursor()
+    cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'")
+    tables = list([table[0] for table in cur.fetchall()])
+    cur.close()
+
+    to_ret = {}
+    for table in tables:
+        cur = conn.cursor()
+        cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table])
+        to_ret[table] = list([col[0] for col in cur.fetchall()])
+
+    return to_ret
+
+
+def get_pg_table_columns(conn) -> Dict[str, List[str]]:
+    cursor = conn.cursor()
+    cursor.execute("""
+    SELECT cols.table_name, cols.column_name
+      FROM information_schema.columns cols
+      WHERE cols.column_name IS NOT NULL
+      AND cols.table_name IS NOT NULL
+      AND cols.table_name IN (
+        SELECT tables.tablename FROM pg_catalog.pg_tables tables
+        WHERE LOWER(schemaname) = 'public'
+      )
+    ORDER by cols.table_name, cols.ordinal_position;
+    """)
+    mapping = {}
+    for row in cursor:
+        mapping.setdefault(row[0], []).append(row[1])
+
+    cursor.close()
+    conn.close()
+    return mapping
+
+#for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items():
+#for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items():
+#    print(f"{key}: {val}")
\ No newline at end of file
diff --git a/test/script/tskdbdiff2.py b/test/script/tskdbdiff2.py
new file mode 100644
index 0000000000..7ff02d0c30
--- /dev/null
+++ b/test/script/tskdbdiff2.py
@@ -0,0 +1,969 @@
+# Requires python3
+
+import re
+import sqlite3
+import subprocess
+import shutil
+import os
+import codecs
+import datetime
+import sys
+from typing import Dict, List
+
+import psycopg2
+import psycopg2.extras
+import socket
+import csv
+
+class TskDbDiff(object):
+    """Compares two TSK/Autospy SQLite databases.
+
+    Attributes:
+        gold_artifacts:
+        autopsy_artifacts:
+        gold_attributes:
+        autopsy_attributes:
+        gold_objects:
+        autopsy_objects:
+        artifact_comparison:
+        attribute_comparision:
+        report_errors: a listof_listof_String, the error messages that will be
+        printed to screen in the run_diff method
+        passed: a boolean, did the diff pass?
+        autopsy_db_file:
+        gold_db_file:
+    """
+    def __init__(self, output_db, gold_db, output_dir=None, gold_bb_dump=None, gold_dump=None, verbose=False, isMultiUser=False, pgSettings=None):
+        """Constructor for TskDbDiff.
+
+        Args:
+            output_db_path: path to output database (non-gold standard)
+            gold_db_path: path to gold database
+            output_dir: (optional) Path to folder where generated files will be put.
+            gold_bb_dump: (optional) path to file where the gold blackboard dump is located
+            gold_dump: (optional) path to file where the gold non-blackboard dump is located
+            verbose: (optional) a boolean, if true, diff results are sent to stdout. 
+        """
+
+        self.output_db_file = output_db
+        self.gold_db_file = gold_db
+        self.output_dir = output_dir
+        self.gold_bb_dump = gold_bb_dump
+        self.gold_dump = gold_dump
+        self._generate_gold_dump = False        
+        self._generate_gold_bb_dump = False
+        self._bb_dump_diff = ""
+        self._dump_diff = ""
+        self._bb_dump = ""
+        self._dump = ""
+        self.verbose = verbose
+        self.isMultiUser = isMultiUser
+        self.pgSettings = pgSettings
+
+        if self.isMultiUser and not self.pgSettings:
+            print("Missing PostgreSQL database connection settings data.")
+            sys.exit(1)
+
+        if self.gold_bb_dump is None:
+            self._generate_gold_bb_dump = True
+        if self.gold_dump is None:
+            self._generate_gold_dump = True
+
+    def run_diff(self):
+        """Compare the databases.
+
+        Raises:
+            TskDbDiffException: if an error occurs while diffing or dumping the database
+        """
+
+        self._init_diff()
+        id_obj_path_table = -1
+        # generate the gold database dumps if necessary     
+        if self._generate_gold_dump:       
+            id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.gold_db_file, self.gold_dump, self.isMultiUser, self.pgSettings)     
+        if self._generate_gold_bb_dump:        
+            TskDbDiff._dump_output_db_bb(self.gold_db_file, self.gold_bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table)
+
+        # generate the output database dumps (both DB and BB)
+        id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.output_db_file, self._dump, self.isMultiUser, self.pgSettings)
+        TskDbDiff._dump_output_db_bb(self.output_db_file, self._bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table)
+
+        # Compare non-BB
+        dump_diff_pass = self._diff(self._dump, self.gold_dump, self._dump_diff)
+
+        # Compare BB
+        bb_dump_diff_pass = self._diff(self._bb_dump, self.gold_bb_dump, self._bb_dump_diff)
+
+        self._cleanup_diff()
+        return dump_diff_pass, bb_dump_diff_pass
+
+
+    def _init_diff(self):
+        """Set up the necessary files based on the arguments given at construction"""
+        if self.output_dir is None:
+            # No stored files
+            self._bb_dump = TskDbDiff._get_tmp_file("BlackboardDump", ".txt")
+            self._bb_dump_diff = TskDbDiff._get_tmp_file("BlackboardDump-Diff", ".txt")
+            self._dump = TskDbDiff._get_tmp_file("DBDump", ".txt")
+            self._dump_diff = TskDbDiff._get_tmp_file("DBDump-Diff", ".txt")
+        else:
+            self._bb_dump = os.path.join(self.output_dir, "BlackboardDump.txt")
+            self._bb_dump_diff = os.path.join(self.output_dir, "BlackboardDump-Diff.txt")
+            self._dump = os.path.join(self.output_dir, "DBDump.txt")
+            self._dump_diff = os.path.join(self.output_dir, "DBDump-Diff.txt")
+
+        # Sorting gold before comparing (sort behaves differently in different environments)
+        new_bb = TskDbDiff._get_tmp_file("GoldBlackboardDump", ".txt")
+        new_db = TskDbDiff._get_tmp_file("GoldDBDump", ".txt")
+        if self.gold_bb_dump is not None:
+            srtcmdlst = ["sort", self.gold_bb_dump, "-o", new_bb]
+            subprocess.call(srtcmdlst)
+            srtcmdlst = ["sort", self.gold_dump, "-o", new_db]
+            subprocess.call(srtcmdlst)
+        self.gold_bb_dump = new_bb
+        self.gold_dump = new_db
+
+
+    def _cleanup_diff(self):
+        if self.output_dir is None:
+            #cleanup temp files
+            os.remove(self._dump)
+            os.remove(self._bb_dump)
+            if os.path.isfile(self._dump_diff):
+                os.remove(self._dump_diff)
+            if os.path.isfile(self._bb_dump_diff):
+                os.remove(self._bb_dump_diff)
+
+        if self.gold_bb_dump is None:
+            os.remove(self.gold_bb_dump)
+            os.remove(self.gold_dump)
+
+
+    def _diff(self, output_file, gold_file, diff_path):
+        """Compare two text files.
+
+        Args:
+            output_file: a pathto_File, the latest text file
+            gold_file: a pathto_File, the gold text file
+            diff_path: The file to write the differences to
+        Returns False if different
+        """
+
+        if (not os.path.isfile(output_file)):
+            return False
+
+        if (not os.path.isfile(gold_file)):
+            return False
+
+        # It is faster to read the contents in and directly compare
+        output_data = codecs.open(output_file, "r", "utf_8").read()
+        gold_data = codecs.open(gold_file, "r", "utf_8").read()
+        if (gold_data == output_data):
+            return True
+
+        # If they are different, invoke 'diff'
+        diff_file = codecs.open(diff_path, "wb", "utf_8")
+        # Gold needs to be passed in as 1st arg and output as 2nd
+        dffcmdlst = ["diff", gold_file, output_file]
+        subprocess.call(dffcmdlst, stdout = diff_file)
+
+        # create file path for gold files inside output folder. In case of diff, both gold and current run files
+        # are available in the report output folder. Prefix Gold- is added to the filename.
+        gold_file_in_output_dir = output_file[:output_file.rfind("/")] + "/Gold-" + output_file[output_file.rfind("/")+1:]
+        shutil.copy(gold_file, gold_file_in_output_dir)
+
+        return False
+
+
+    def _dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table):
+        """Dumps sorted text results to the given output location.
+
+        Smart method that deals with a blackboard comparison to avoid issues
+        with different IDs based on when artifacts were created.
+
+        Args:
+            db_file: a pathto_File, the output database.
+            bb_dump_file: a pathto_File, the sorted dump file to write to
+        """
+
+        unsorted_dump = TskDbDiff._get_tmp_file("dump_data", ".txt")
+        if isMultiUser:
+            conn, unused_db = db_connect(db_file, isMultiUser, pgSettings)
+            artifact_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
+        else: # Use Sqlite
+            conn = sqlite3.connect(db_file)
+            conn.text_factory = lambda x: x.decode("utf-8", "ignore")
+            conn.row_factory = sqlite3.Row
+            artifact_cursor = conn.cursor()
+        # Get the list of all artifacts (along with type and associated file)
+        # @@@ Could add a SORT by parent_path in here since that is how we are going to later sort it.
+        artifact_cursor.execute("SELECT tsk_files.parent_path, tsk_files.name, blackboard_artifact_types.display_name, blackboard_artifacts.artifact_id FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id")
+        database_log = codecs.open(unsorted_dump, "wb", "utf_8")
+        row = artifact_cursor.fetchone()
+        appnd = False
+        counter = 0
+        artifact_count = 0
+        artifact_fail = 0
+
+        # Cycle through artifacts
+        try:
+            while (row != None):
+
+                # File Name and artifact type
+                # Remove parent object ID from Unalloc file name
+                normalizedName = re.sub('^Unalloc_[0-9]+_', 'Unalloc_', row["name"])
+                if(row["parent_path"] != None):
+                    database_log.write(row["parent_path"] + normalizedName + ' <artifact type="' + row["display_name"] + '" > ')
+                else:
+                    database_log.write(normalizedName + ' <artifact type="' + row["display_name"] + '" > ')
+
+                if isMultiUser:
+                    attribute_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
+                else:
+                    attribute_cursor = conn.cursor()
+                looptry = True
+                artifact_count += 1
+                try:
+                    art_id = ""
+                    art_id = str(row["artifact_id"])
+                  
+                    # Get attributes for this artifact
+                    if isMultiUser:
+                        attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id = %s ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id])
+                    else:
+                        attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id =? ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id])
+                    
+                    attributes = attribute_cursor.fetchall()
+                
+                    # Print attributes
+                    if (len(attributes) == 0):
+                       # @@@@ This should be </artifact> 
+                       database_log.write(' <artifact/>\n')
+                       row = artifact_cursor.fetchone()
+                       continue
+
+                    src = attributes[0][0]
+                    for attr in attributes:
+                        numvals = 0
+                        for x in range(3, 6):
+                            if(attr[x] != None):
+                                numvals += 1
+                        if(numvals > 1):
+                            msg = "There were too many values for attribute type: " + attr["display_name"] + " for artifact with id #" + str(row["artifact_id"]) + ".\n"
+
+                        if(not attr["source"] == src):
+                            msg = "There were inconsistent sources for artifact with id #" + str(row["artifact_id"]) + ".\n"
+
+                        try:
+                            if attr["value_type"] == 0:
+                                attr_value_as_string = str(attr["value_text"])                        
+                            elif attr["value_type"] == 1:
+                                attr_value_as_string = str(attr["value_int32"])                        
+                            elif attr["value_type"] == 2:
+                                attr_value_as_string = str(attr["value_int64"])
+                                if attr["attribute_type_id"]  == 36 and id_obj_path_table != -1 and int(attr_value_as_string) > 0: #normalize positive TSK_PATH_IDs from being object id to a path if the obj_id_path_table was generated
+                                    attr_value_as_string = id_obj_path_table[int(attr_value_as_string)]
+                            elif attr["value_type"] == 3:
+                                attr_value_as_string = "%20.10f" % float((attr["value_double"])) #use exact format from db schema to avoid python auto format double value to (0E-10) scientific style                       
+                            elif attr["value_type"] == 4:
+                                attr_value_as_string = "bytes"                        
+                            elif attr["value_type"] == 5:
+                                attr_value_as_string = str(attr["value_int64"])                        
+                            if attr["display_name"] == "Associated Artifact":
+                                attr_value_as_string = getAssociatedArtifactType(attribute_cursor, attr_value_as_string, isMultiUser)                            
+                            patrn = re.compile("[\n\0\a\b\r\f]")
+                            attr_value_as_string = re.sub(patrn, ' ', attr_value_as_string)
+                            if attr["source"] == "Keyword Search" and attr["display_name"] == "Keyword Preview":
+                                attr_value_as_string = "<Keyword Preview placeholder>"
+                            database_log.write('<attribute source="' + attr["source"] + '" type="' + attr["display_name"] + '" value="' + attr_value_as_string + '" />')
+                        except IOError as e:
+                            print("IO error")
+                            raise TskDbDiffException("Unexpected IO error while writing to database log." + str(e))
+
+                except sqlite3.Error as e:
+                    msg = "Attributes in artifact id (in output DB)# " + str(row["artifact_id"]) + " encountered an error: " + str(e) +" .\n"
+                    print("Attributes in artifact id (in output DB)# ", str(row["artifact_id"]), " encountered an error: ", str(e))
+                    print() 
+                    looptry = False
+                    artifact_fail += 1
+                    database_log.write('Error Extracting Attributes')
+                    database_log.close()
+                    raise TskDbDiffException(msg)
+                finally:
+                    attribute_cursor.close()
+
+               
+                # @@@@ This should be </artifact> 
+                database_log.write(' <artifact/>\n')
+                row = artifact_cursor.fetchone()
+
+            if(artifact_fail > 0):
+                msg ="There were " + str(artifact_count) + " artifacts and " + str(artifact_fail) + " threw an exception while loading.\n"
+        except Exception as e:
+            raise TskDbDiffException("Unexpected error while dumping blackboard database: " + str(e))
+        finally:
+            database_log.close()
+            artifact_cursor.close()
+            conn.close()
+        
+        # Now sort the file
+        srtcmdlst = ["sort", unsorted_dump, "-o", bb_dump_file]
+        subprocess.call(srtcmdlst)
+
+
+    # for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items():
+    # for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items():
+    #    print(f"{key}: {val}")
+
+
+
+
+
+    def _dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings):
+        """Dumps a database to a text file.
+
+        Does not dump the artifact and attributes.
+
+        Args:
+            db_file: a pathto_File, the database file to dump
+            dump_file: a pathto_File, the location to dump the non-blackboard database items
+        """
+
+        conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings)
+        id_files_table = build_id_files_table(conn.cursor(), isMultiUser)
+        id_vs_parts_table = build_id_vs_parts_table(conn.cursor(), isMultiUser)
+        id_vs_info_table = build_id_vs_info_table(conn.cursor(), isMultiUser)
+        id_fs_info_table = build_id_fs_info_table(conn.cursor(), isMultiUser)
+        id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser)
+        id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser)
+        id_legacy_artifact_types = build_id_legacy_artifact_types_table(conn.cursor(), isMultiUser)
+        id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser)
+        id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser)
+        id_accounts_table = build_id_accounts_table(conn.cursor(), isMultiUser)
+        id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table, id_accounts_table)
+
+        if isMultiUser: # Use PostgreSQL
+            os.environ['PGPASSWORD']=pgSettings.password
+            pgDump = ["pg_dump", "--inserts", "-U", pgSettings.username, "-h", pgSettings.pgHost, "-p", pgSettings.pgPort, "-d", db_file, "-E", "utf-8", "-T", "blackboard_artifacts", "-T", "blackboard_attributes", "-f", "postgreSQLDump.sql"]
+            subprocess.call(pgDump)
+            postgreSQL_db = codecs.open("postgreSQLDump.sql", "r", "utf-8")
+            # Write to the database dump
+            with codecs.open(dump_file, "wb", "utf_8") as db_log:
+                dump_line = ''
+                for line in postgreSQL_db:
+                    line = line.strip('\r\n ')
+                    # Deal with pg_dump result file
+                    if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line
+                        continue
+                    elif not line.endswith(';'): # Statement not finished
+                        dump_line += line
+                        continue
+                    else:
+                        dump_line += line
+                    if 'INSERT INTO image_gallery_groups_seen' in dump_line:
+                        dump_line = ''
+                        continue;
+                    dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
+                    db_log.write('%s\n' % dump_line)
+                    dump_line = ''
+            postgreSQL_db.close()
+        else: # use Sqlite
+            # Delete the blackboard tables
+            conn.text_factory = lambda x: x.decode("utf-8", "ignore")
+            conn.execute("DROP TABLE blackboard_artifacts")
+            conn.execute("DROP TABLE blackboard_attributes")
+            # Write to the database dump
+            with codecs.open(dump_file, "wb", "utf_8") as db_log:
+                for line in conn.iterdump():
+                    if 'INSERT INTO "image_gallery_groups_seen"' in line:
+                        continue
+                    line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
+                    db_log.write('%s\n' % line)
+        # Now sort the file  
+        srtcmdlst = ["sort", dump_file, "-o", dump_file]
+        subprocess.call(srtcmdlst)
+
+        conn.close()
+        # cleanup the backup
+        if backup_db_file:
+            os.remove(backup_db_file)
+        return id_obj_path_table
+
+
+    def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings):
+        """Dumps the given database to text files for later comparison.
+
+        Args:
+            db_file: a pathto_File, the database file to dump
+            dump_file: a pathto_File, the location to dump the non-blackboard database items
+            bb_dump_file: a pathto_File, the location to dump the blackboard database items
+        """
+        id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings)
+        TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table)
+
+
+    def _get_tmp_file(base, ext):
+        time = datetime.datetime.now().time().strftime("%H%M%f")
+        return os.path.join(os.environ['TMP'], base + time + ext)
+
+
+class TskDbDiffException(Exception):
+    pass
+
+class PGSettings(object):
+    def __init__(self, pgHost=None, pgPort=5432, user=None, password=None):
+        self.pgHost = pgHost
+        self.pgPort = pgPort
+        self.username = user
+        self.password = password
+
+    def get_pgHost(self):
+        return self.pgHost
+
+    def get_pgPort(self):
+        return self.pgPort
+
+    def get_username(self):
+        return self.username
+
+    def get_password(self):
+        return self.password
+
+
+
+
+
+def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
+    """
+    Retrieves the sqlite public tables and columns from a sqlite connection.
+    Args:
+        conn: The sqlite connection.
+
+    Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value.
+    """
+    cur = conn.cursor()
+    cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'")
+    tables = list([table[0] for table in cur.fetchall()])
+    cur.close()
+
+    to_ret = {}
+    for table in tables:
+        cur = conn.cursor()
+        cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table])
+        to_ret[table] = list([col[0] for col in cur.fetchall()])
+        cur.close()
+
+    return to_ret
+
+
+def get_pg_table_columns(conn) -> Dict[str, List[str]]:
+    """
+    Retrieves the postgres public tables and columns from a pg connection.
+    Args:
+        conn: The pg connection.
+
+    Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value.
+    """
+    cursor = conn.cursor()
+    cursor.execute("""
+    SELECT cols.table_name, cols.column_name
+      FROM information_schema.columns cols
+      WHERE cols.column_name IS NOT NULL
+      AND cols.table_name IS NOT NULL
+      AND cols.table_name IN (
+        SELECT tables.tablename FROM pg_catalog.pg_tables tables
+        WHERE LOWER(schemaname) = 'public'
+      )
+    ORDER by cols.table_name, cols.ordinal_position;
+    """)
+    mapping = {}
+    for row in cursor:
+        mapping.setdefault(row[0], []).append(row[1])
+
+    cursor.close()
+    return mapping
+
+
+def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table, artifact_table, accounts_table):
+    """ Make testing more consistent and reasonable by doctoring certain db entries.
+
+    Args:
+        line: a String, the line to remove the object id from.
+        files_table: a map from object ids to file paths.
+    """
+
+    # Sqlite statement use double quotes for table name, PostgreSQL doesn't. We check both databases results for normalization.
+    files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1
+    path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1
+    object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1
+    vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1
+    report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1
+    layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1
+    data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find('INSERT INTO data_source_info ') > -1
+    event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find('INSERT INTO tsk_event_descriptions ') > -1
+    events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1
+    ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1
+    examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1
+    ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find('INSERT INTO image_gallery_groups ') > -1
+    ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find('INSERT INTO image_gallery_groups_seen ') > -1
+    os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1
+    os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find('INSERT INTO tsk_os_account_attributes') > -1
+    os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find('INSERT INTO tsk_os_account_instances') > -1
+    data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find('INSERT INTO tsk_data_artifacts') > -1
+    
+    parens = line[line.find('(') + 1 : line.rfind(')')]
+    no_space_parens = parens.replace(" ", "")
+    fields_list = list(csv.reader([no_space_parens], quotechar="'"))[0]
+    #Add back in the quotechar for values that were originally wrapped (csv reader consumes this character)
+    fields_list_with_quotes = []
+    ptr = 0
+    for field in fields_list:
+        if(len(field) == 0):
+            field = "'" + field + "'"
+        else:
+            start = no_space_parens.find(field, ptr)
+            if((start - 1) >= 0 and no_space_parens[start - 1] == '\''):
+                if((start + len(field)) < len(no_space_parens) and no_space_parens[start + len(field)] == '\''):
+                    field = "'" + field + "'"
+        fields_list_with_quotes.append(field)
+        if(ptr > 0):
+            #Add one for each comma that is used to separate values in the original string
+            ptr+=1
+        ptr += len(field)
+
+    fields_list = fields_list_with_quotes
+
+    # remove object ID
+    if files_index:
+    
+        # Ignore TIFF size and hash if extracted from PDFs.
+        # See JIRA-6951 for more details.
+        # index -3 = 3rd from the end, which is extension
+        # index -5 = 5th from the end, which is the parent path.
+        if fields_list[-3] == "'tif'" and fields_list[-5].endswith(".pdf/'"):
+            fields_list[15] = "'SIZE_IGNORED'"
+            fields_list[23] = "'MD5_IGNORED'"
+            fields_list[24] = "'SHA256_IGNORED'"
+        newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:-1]) + ');') #leave off first (object id) and last (os_account_id) field 
+        # Remove object ID from Unalloc file name
+        newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
+        return newLine
+    # remove object ID
+    elif vs_parts_index:
+        newLine = ('INSERT INTO "tsk_vs_parts" VALUES(' + ', '.join(fields_list[1:]) + ');') 
+        return newLine
+    # remove group ID
+    elif ig_groups_index:
+        newLine = ('INSERT INTO "image_gallery_groups" VALUES(' + ', '.join(fields_list[1:]) + ');') 
+        return newLine
+    #remove id field
+    elif ig_groups_seen_index:
+        # Only removing the id and group_id fields for now. May need to care about examiner_id and seen fields in future.
+        newLine = ('INSERT INTO "image_gallery_groups_seen" VALUES(' + ', '.join(fields_list[2:]) + ');') 
+        return newLine    
+    # remove object ID
+    elif path_index:
+        obj_id = int(fields_list[0])
+        objValue = files_table[obj_id]
+        # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory
+        idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor')
+        if idx_pre > -1:
+            idx_pos =  fields_list[1].find('\\', idx_pre + 2)
+            dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator
+            dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')]
+            pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:]
+        else:
+            pathValue = fields_list[1]
+        # remove localhost from postgres par_obj_name
+        multiOutput_idx = pathValue.find('ModuleOutput')
+        if multiOutput_idx > -1:
+            pathValue = "'" + pathValue[pathValue.find('ModuleOutput'):] #postgres par_obj_name include losthost 
+
+        newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') 
+        return newLine
+    # remove object ID
+    elif layout_index:
+        obj_id = fields_list[0]
+        path= files_table[int(obj_id)]
+        newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');')
+        # Remove object ID from Unalloc file name
+        newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
+        return newLine
+    # remove object ID
+    elif object_index:
+        obj_id = fields_list[0]
+        parent_id = fields_list[1]
+        newLine = 'INSERT INTO "tsk_objects" VALUES('
+        path = None
+        parent_path = None
+
+        #if obj_id or parent_id is invalid literal, we simple return the values as it is 
+        try:
+            obj_id = int(obj_id)
+            if parent_id != 'NULL':
+                parent_id = int(parent_id)
+        except Exception as e:
+            print(obj_id, parent_id)
+            return line
+
+        if obj_id in files_table.keys():
+            path = files_table[obj_id]
+        elif obj_id in vs_parts_table.keys():
+            path = vs_parts_table[obj_id]
+        elif obj_id in vs_info_table.keys():
+            path = vs_info_table[obj_id]
+        elif obj_id in fs_info_table.keys():
+            path = fs_info_table[obj_id]
+        elif obj_id in reports_table.keys():
+            path = reports_table[obj_id]
+        # remove host name (for multi-user) and dates/times from path for reports
+        if path is not None:
+            if 'ModuleOutput' in path:
+                # skip past the host name (if any)
+                path = path[path.find('ModuleOutput'):]
+                if 'BulkExtractor' in path or 'Smirk' in path:
+                    # chop off the last folder (which contains a date/time)
+                    path = path[:path.rfind('\\')]
+            if 'Reports\\AutopsyTestCase HTML Report' in path:
+                path = 'Reports\\AutopsyTestCase HTML Report'
+
+        if parent_id in files_table.keys():
+            parent_path = files_table[parent_id]
+        elif parent_id in vs_parts_table.keys():
+            parent_path = vs_parts_table[parent_id]
+        elif parent_id in vs_info_table.keys():
+            parent_path = vs_info_table[parent_id]
+        elif parent_id in fs_info_table.keys():
+            parent_path = fs_info_table[parent_id]
+        elif parent_id in images_table.keys():
+            parent_path = images_table[parent_id]
+        elif parent_id in accounts_table.keys():
+            parent_path = accounts_table[parent_id]
+        elif parent_id == 'NULL':
+            parent_path = "NULL"
+        
+        # Remove host name (for multi-user) from parent_path
+        if parent_path is not None:
+            if 'ModuleOutput' in parent_path:
+                # skip past the host name (if any)
+                parent_path = parent_path[parent_path.find('ModuleOutput'):]
+
+        if path and parent_path:
+            # Remove object ID from Unalloc file names and regripper output
+            path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', path)
+            path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', path)
+            parent_path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', parent_path)
+            parent_path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', parent_path)
+            return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');'
+        else:
+            return newLine + '"OBJECT IDS OMITTED", ' + ', '.join(fields_list[2:]) + ');'  #omit parent object id and object id when we cant annonymize them
+    # remove time-based information, ie Test_6/11/14 -> Test    
+    elif report_index:
+        fields_list[1] = "AutopsyTestCase"
+        fields_list[2] = "0"
+        newLine = ('INSERT INTO "reports" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
+        return newLine
+    elif data_source_info_index:
+        fields_list[1] = "{device id}"
+        fields_list[4] = "{dateTime}"
+        newLine = ('INSERT INTO "data_source_info" VALUES(' + ','.join(fields_list) + ');')
+        return newLine
+    elif ingest_job_index:
+        fields_list[2] = "{host_name}"
+        start_time = int(fields_list[3])
+        end_time = int(fields_list[4])
+        if (start_time <= end_time):
+            fields_list[3] = "0"
+            fields_list[4] = "0"
+        newLine = ('INSERT INTO "ingest_jobs" VALUES(' + ','.join(fields_list) + ');')
+        return newLine
+    elif examiners_index:
+        fields_list[1] = "{examiner_name}"
+        newLine = ('INSERT INTO "tsk_examiners" VALUES(' + ','.join(fields_list) + ');')
+        return newLine
+    # remove all timing dependent columns from events table
+    elif events_index:
+        newLine = ('INSERT INTO "tsk_events" VALUES(' + ','.join(fields_list[1:2]) + ');') 
+        return newLine
+    # remove object ids from event description table
+    elif event_description_index:
+        # replace object ids with information that is deterministic 
+        file_obj_id = int(fields_list[5])
+        object_id = int(fields_list[4])
+        legacy_artifact_id = 'NULL'
+        if (fields_list[6] != 'NULL'):
+            legacy_artifact_id = int(fields_list[6])
+        if file_obj_id != 'NULL' and file_obj_id in files_table.keys():
+            fields_list[5] = files_table[file_obj_id]
+        if object_id != 'NULL' and object_id in files_table.keys():
+            fields_list[4] = files_table[object_id]
+        if legacy_artifact_id != 'NULL' and legacy_artifact_id in artifact_table.keys():
+            fields_list[6] = artifact_table[legacy_artifact_id]
+        if fields_list[1] == fields_list[2] and fields_list[1] == fields_list[3]:	
+            fields_list[1] = cleanupEventDescription(fields_list[1])
+            fields_list[2] = cleanupEventDescription(fields_list[2])
+            fields_list[3] = cleanupEventDescription(fields_list[3])
+        newLine = ('INSERT INTO "tsk_event_descriptions" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
+        return newLine
+    elif os_account_index:
+        newLine = ('INSERT INTO "tsk_os_accounts" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id since value that would be substituted is in diff line already
+        return newLine
+    elif os_account_attr_index:
+        #substitue the account object id for a non changing value
+        os_account_id = int(fields_list[1])
+        fields_list[1] = accounts_table[os_account_id]
+        #substitue the source object id for a non changing value
+        source_obj_id = int(fields_list[3])
+        if source_obj_id in files_table.keys():
+            fields_list[3] = files_table[source_obj_id]
+        elif source_obj_id in vs_parts_table.keys():
+            fields_list[3] = vs_parts_table[source_obj_id]
+        elif source_obj_id in vs_info_table.keys():
+            fields_list[3] = vs_info_table[source_obj_id]
+        elif source_obj_id in fs_info_table.keys():
+            fields_list[3] = fs_info_table[source_obj_id]
+        elif source_obj_id in images_table.keys():
+            fields_list[3] = images_table[source_obj_id]
+        elif source_obj_id in accounts_table.keys():
+            fields_list[3] = accounts_table[source_obj_id]
+        elif source_obj_id == 'NULL':
+            fields_list[3] = "NULL"
+        newLine = ('INSERT INTO "tsk_os_account_attributes" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
+        return newLine
+    elif os_account_instances_index:
+        os_account_id = int(fields_list[1])
+        fields_list[1] = accounts_table[os_account_id]
+        newLine = ('INSERT INTO "tsk_os_account_instances" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
+        return newLine
+    elif data_artifacts_index:
+        art_obj_id = int(fields_list[0])
+        if art_obj_id in files_table.keys():
+            fields_list[0] = files_table[art_obj_id]
+        else:
+            fields_list[0] = 'Artifact Object ID Omitted'
+        account_obj_id = int(fields_list[1])
+        if account_obj_id in files_table.keys():
+            fields_list[1] = files_table[account_obj_id]
+        else:
+            fields_list[1] = 'Account Object ID Omitted'
+        newLine = ('INSERT INTO "tsk_data_artifacts" VALUES(' + ','.join(fields_list[:]) + ');') # remove ids
+        return newLine
+    else:
+        return line
+        
+def cleanupEventDescription(description):
+    test = re.search("^'\D+:\d+'$", description)
+    if test is not None:
+        return re.sub(":\d+", ":<artifact_id>", description)
+    else:
+        return description
+
+def getAssociatedArtifactType(cur, artifact_id, isMultiUser):
+    if isMultiUser:
+        cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",[artifact_id])
+    else:
+        cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",[artifact_id])
+
+    info = cur.fetchone()
+    
+    return "File path: " + info[0] + " Artifact Type: " + info[1]
+
+def build_id_files_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to file paths.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary
+    # with the object id as the key and the full file path (parent + name) as the value
+    mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, parent_path, name FROM tsk_files")])
+    return mapping
+
+def build_id_vs_parts_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to vs_parts.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary
+    # with the object id as the key and (addr + start) as the value
+    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, addr, start FROM tsk_vs_parts")])
+    return mapping
+
+def build_id_vs_info_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to vs_info.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary
+    # with the object id as the key and (vs_type + img_offset) as the value
+    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")])
+    return mapping
+
+     
+def build_id_fs_info_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to fs_info.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary
+    # with the object id as the key and (img_offset + fs_type) as the value
+    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")])
+    return mapping
+
+def build_id_objects_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to par_id.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
+    # with the object id as the key and par_obj_id, type as the value
+    mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")])
+    return mapping
+
+def build_id_image_names_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to name.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id and name then create a tuple in the dictionary
+    # with the object id as the key and name, type as the value
+    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")])
+    #data_sources which are logical file sets will be found in the files table
+    return mapping
+
+def build_id_artifact_types_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to artifact ids.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
+    # with the object id as the key and artifact type as the value
+    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
+    return mapping
+
+def build_id_legacy_artifact_types_table(db_cursor, isPostgreSQL):
+    """Build the map of legacy artifact ids to artifact type.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the legacy artifact id then create a tuple in the dictionary
+    # with the artifact id as the key and artifact type as the value
+    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
+    return mapping
+
+def build_id_reports_table(db_cursor, isPostgreSQL):
+    """Build the map of report object ids to report path.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the reports table in the db, create an obj_id -> path map
+    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, path FROM reports")])
+    return mapping
+
+def build_id_accounts_table(db_cursor, isPostgreSQL):
+    """Build the map of object ids to OS account SIDs.
+
+    Args:
+        db_cursor: the database cursor
+    """
+    # for each row in the db, take the object id and account SID then creates a tuple in the dictionary
+    # with the object id as the key and the OS Account's SID as the value
+    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT os_account_obj_id, addr  FROM tsk_os_accounts")])
+    return mapping
+
+def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table, accounts_table):
+    """Build the map of object ids to artifact ids.
+
+    Args:
+        files_table: obj_id, path
+        objects_table: obj_id, par_obj_id, type
+        artifacts_table: obj_id, artifact_type_name
+        reports_table: obj_id, path
+        images_table: obj_id, name
+        accounts_table: obj_id, addr  
+    """
+    # make a copy of files_table and update it with new data from artifacts_table and reports_table
+    mapping = files_table.copy()
+    for k, v in objects_table.items():
+        path = ""
+        if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id
+            if k in reports_table.keys(): # For a report we use the report path
+                par_obj_id = v[0]
+                if par_obj_id is not None:
+                    mapping[k] = reports_table[k]
+            elif k in artifacts_table.keys(): # For an artifact we use it's par_obj_id's path+name plus it's artifact_type name
+                par_obj_id = v[0] # The parent of an artifact can be a file or a report
+                if par_obj_id in mapping.keys():
+                    path = mapping[par_obj_id]
+                elif par_obj_id in reports_table.keys():
+                    path = reports_table[par_obj_id]
+                elif par_obj_id in images_table.keys():
+                    path = images_table[par_obj_id]
+                mapping[k] = path + "/" + artifacts_table[k]
+            elif k in accounts_table.keys(): # For an OS Account object ID we use its addr  field which is the account SID
+                mapping[k] = accounts_table[k]
+        elif v[0] not in mapping.keys():
+            if v[0] in artifacts_table.keys():
+                par_obj_id = objects_table[v[0]]
+                path = mapping[par_obj_id] 
+                mapping[k] = path + "/" + artifacts_table[v[0]]
+    return mapping
+
+def db_connect(db_file, isMultiUser, pgSettings=None):
+    if isMultiUser: # use PostgreSQL
+        try:
+            return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + pgSettings.pgHost + " password=" + pgSettings.password), None
+        except:
+            print("Failed to connect to the database: " + db_file)
+    else: # Sqlite
+        # Make a copy that we can modify
+        backup_db_file = TskDbDiff._get_tmp_file("tsk_backup_db", ".db")
+        shutil.copy(db_file, backup_db_file)
+        # We sometimes get situations with messed up permissions
+        os.chmod (backup_db_file, 0o777)
+        return sqlite3.connect(backup_db_file), backup_db_file
+
+def sql_select_execute(cursor, isPostgreSQL, sql_stmt):
+    if isPostgreSQL: 
+        cursor.execute(sql_stmt)
+        return cursor.fetchall()
+    else:
+        return cursor.execute(sql_stmt)
+
+def main():
+    try:
+        sys.argv.pop(0)
+        output_db = sys.argv.pop(0)
+        gold_db = sys.argv.pop(0)
+    except:
+        print("usage: tskdbdiff [OUTPUT DB PATH] [GOLD DB PATH]")
+        sys.exit(1)
+
+    db_diff = TskDbDiff(output_db, gold_db, output_dir=".") 
+    dump_passed, bb_dump_passed = db_diff.run_diff()
+
+    if dump_passed and bb_dump_passed:
+        print("Database comparison passed.")
+    if not dump_passed:
+        print("Non blackboard database comparison failed.")
+    if not bb_dump_passed:
+        print("Blackboard database comparison failed.")
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    if sys.hexversion < 0x03000000:
+        print("Python 3 required")
+        sys.exit(1)
+
+    main()
+

From 24582c42ffba28c31b4d4f29c978886412b862f7 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Fri, 16 Apr 2021 15:36:46 -0400
Subject: [PATCH 02/30] write to sql statement

---
 test/script/dbaccesstest.py | 47 +++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py
index cfe026395e..380998b793 100644
--- a/test/script/dbaccesstest.py
+++ b/test/script/dbaccesstest.py
@@ -37,9 +37,48 @@ def get_pg_table_columns(conn) -> Dict[str, List[str]]:
         mapping.setdefault(row[0], []).append(row[1])
 
     cursor.close()
-    conn.close()
     return mapping
 
-#for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items():
-#for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items():
-#    print(f"{key}: {val}")
\ No newline at end of file
+
+def get_sql_insert_value(val) -> str:
+    if not val:
+        return "NULL"
+
+    if isinstance(val, str):
+        escaped_val = val.replace('\n', '\\n').replace("'", "''")
+        return f"'{escaped_val}'"
+
+    return str(val)
+
+
+def write_normalized(output_file, db_conn, table: str, column_names: List[str], normalizer=None):
+    cursor = db_conn.cursor()
+
+    joined_columns = ",".join([col for col in column_names])
+    cursor.execute(f"SELECT {joined_columns} FROM {table}")
+    for row in cursor:
+        if len(row) != len(column_names):
+            print(f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}")
+            continue
+
+        row_dict = {}
+        for col_idx in range(0, len(column_names)):
+            row_dict[column_names[col_idx]] = row[col_idx]
+
+        if normalizer:
+            row_dict = normalizer(table, row_dict)
+
+        values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names)
+        insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n'
+        output_file.write(insert_statement)
+
+
+
+
+#with sqlite3.connect(r"C:\Users\gregd\Desktop\autopsy_412.db") as conn, \
+with psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345") as conn, \
+        open(r"C:\Users\gregd\Desktop\dbdump.sql", mode="w", encoding='utf-8') as output_file:
+
+    for table, cols in get_pg_table_columns(conn).items():
+    # for table, cols in get_sqlite_table_columns(conn).items():
+        write_normalized(output_file, conn, table, cols)

From 2a4d3c0c8f42d8f1b0af7fa393bdbe436ac90679 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Mon, 19 Apr 2021 15:33:45 -0400
Subject: [PATCH 03/30] mapping

---
 test/script/dbaccesstest.py | 124 +++++++++++++++++++++++++++++++++++-
 1 file changed, 123 insertions(+), 1 deletion(-)

diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py
index 380998b793..7e48bcdaf4 100644
--- a/test/script/dbaccesstest.py
+++ b/test/script/dbaccesstest.py
@@ -1,4 +1,4 @@
-from typing import List, Dict
+from typing import List, Dict, Callable, Union
 
 import psycopg2
 import sqlite3
@@ -19,6 +19,128 @@ def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
     return to_ret
 
 
+IGNORE_TABLE = "IGNORE_TABLE"
+
+
+class TskDbEnvironment:
+    pass
+
+
+class MaskRow:
+    row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Dict[str, any]]
+
+    def __init__(self, row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Union[Dict[str, any], None]]):
+        self.row_masker = row_masker
+
+    def mask(self, db_env: TskDbEnvironment, row: Dict[str, any]) -> Union[Dict[str, any], None]:
+        return self.row_masker(db_env, row)
+
+
+class MaskColumns(MaskRow):
+    @classmethod
+    def _mask_col_vals(cls,
+                       col_mask: Dict[str, Union[any, Callable[[TskDbEnvironment, any], any]]],
+                       db_env: TskDbEnvironment,
+                       row: Dict[str, any]):
+
+        row_copy = dict.copy()
+        for key, val in col_mask:
+            # only replace values if present in row
+            if key in row_copy:
+                # if a column replacing function, call with original value
+                if isinstance(val, Callable):
+                    row_copy[key] = val(db_env, row[key])
+                # otherwise, just replace with mask value
+                else:
+                    row_copy[key] = val
+
+        return row_copy
+
+    def __init__(self, col_mask: Dict[str, Union[any, Callable[[any], any]]]):
+        super().__init__(lambda db_env, row: MaskColumns._mask_col_vals(col_mask, db_env, row))
+
+
+TableNormalization = Union[IGNORE_TABLE, MaskRow]
+
+
+MASKED_OBJ_ID = "MASKED_OBJ_ID"
+MASKED_ID = "MASKED_ID"
+
+table_masking: Dict[str, TableNormalization] = {
+    "tsk_files": MaskColumns({
+        # TODO
+    }),
+
+    "tsk_vs_parts": MaskColumns({
+        "obj_id": MASKED_OBJ_ID
+    }),
+    "image_gallery_groups": MaskColumns({
+        "obj_id": MASKED_OBJ_ID
+    }),
+    "image_gallery_groups_seen": IGNORE_TABLE,
+    # NOTE there was code in normalization for this, but the table is ignored?
+    # "image_gallery_groups_seen": MaskColumns({
+    #     "id": MASKED_ID,
+    #     "group_id": MASKED_ID,
+    # }),
+    # TODO
+    "tsk_files_path": None,
+    # TODO
+    "tsk_file_layout": None,
+    "tsk_objects": None,
+    "reports": MaskColumns({
+        "obj_id": MASKED_OBJ_ID,
+        "path": "AutopsyTestCase",
+        "crtime": 0
+    }),
+    "data_source_info": MaskColumns({
+        "device_id": "{device id}",
+        "added_date_time": "{dateTime}"
+    }),
+    # TODO
+    "ingest_jobs": None,
+    "tsk_examiners": MaskColumns({
+        "login_name": "{examiner_name}"
+    }),
+    "tsk_events": MaskColumns({
+        "event_id": "MASKED_EVENT_ID",
+        "time": 0,
+    }),
+    # TODO
+    "event_description_index": None,
+    "tsk_os_accounts": MaskColumns({
+        "os_account_obj_id": MASKED_OBJ_ID
+    }),
+    # TODO
+    "tsk_data_artifacts": None
+}
+
+
+#     files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1
+# path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1
+# object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1
+# vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1
+# report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1
+# layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1
+# data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find(
+#     'INSERT INTO data_source_info ') > -1
+# event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find(
+#     'INSERT INTO tsk_event_descriptions ') > -1
+# events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1
+# ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1
+# examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1
+# ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find(
+#     'INSERT INTO image_gallery_groups ') > -1
+# ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find(
+#     'INSERT INTO image_gallery_groups_seen ') > -1
+# os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1
+# os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find(
+#     'INSERT INTO tsk_os_account_attributes') > -1
+# os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find(
+#     'INSERT INTO tsk_os_account_instances') > -1
+# data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find(
+#     'INSERT INTO tsk_data_artifacts') > -1
+
 def get_pg_table_columns(conn) -> Dict[str, List[str]]:
     cursor = conn.cursor()
     cursor.execute("""

From 9d30b408467a7b69a3b88fb9b3440d21fa7ba271 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Tue, 20 Apr 2021 21:16:16 -0400
Subject: [PATCH 04/30] integrated into tskdbdiff

---
 test/script/dbaccesstest.py |  206 -------
 test/script/tskdbdiff.py    | 1150 ++++++++++++++++++++---------------
 test/script/tskdbdiff2.py   |  969 -----------------------------
 3 files changed, 662 insertions(+), 1663 deletions(-)
 delete mode 100644 test/script/dbaccesstest.py
 delete mode 100644 test/script/tskdbdiff2.py

diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py
deleted file mode 100644
index 7e48bcdaf4..0000000000
--- a/test/script/dbaccesstest.py
+++ /dev/null
@@ -1,206 +0,0 @@
-from typing import List, Dict, Callable, Union
-
-import psycopg2
-import sqlite3
-
-
-def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
-    cur = conn.cursor()
-    cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'")
-    tables = list([table[0] for table in cur.fetchall()])
-    cur.close()
-
-    to_ret = {}
-    for table in tables:
-        cur = conn.cursor()
-        cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table])
-        to_ret[table] = list([col[0] for col in cur.fetchall()])
-
-    return to_ret
-
-
-IGNORE_TABLE = "IGNORE_TABLE"
-
-
-class TskDbEnvironment:
-    pass
-
-
-class MaskRow:
-    row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Dict[str, any]]
-
-    def __init__(self, row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Union[Dict[str, any], None]]):
-        self.row_masker = row_masker
-
-    def mask(self, db_env: TskDbEnvironment, row: Dict[str, any]) -> Union[Dict[str, any], None]:
-        return self.row_masker(db_env, row)
-
-
-class MaskColumns(MaskRow):
-    @classmethod
-    def _mask_col_vals(cls,
-                       col_mask: Dict[str, Union[any, Callable[[TskDbEnvironment, any], any]]],
-                       db_env: TskDbEnvironment,
-                       row: Dict[str, any]):
-
-        row_copy = dict.copy()
-        for key, val in col_mask:
-            # only replace values if present in row
-            if key in row_copy:
-                # if a column replacing function, call with original value
-                if isinstance(val, Callable):
-                    row_copy[key] = val(db_env, row[key])
-                # otherwise, just replace with mask value
-                else:
-                    row_copy[key] = val
-
-        return row_copy
-
-    def __init__(self, col_mask: Dict[str, Union[any, Callable[[any], any]]]):
-        super().__init__(lambda db_env, row: MaskColumns._mask_col_vals(col_mask, db_env, row))
-
-
-TableNormalization = Union[IGNORE_TABLE, MaskRow]
-
-
-MASKED_OBJ_ID = "MASKED_OBJ_ID"
-MASKED_ID = "MASKED_ID"
-
-table_masking: Dict[str, TableNormalization] = {
-    "tsk_files": MaskColumns({
-        # TODO
-    }),
-
-    "tsk_vs_parts": MaskColumns({
-        "obj_id": MASKED_OBJ_ID
-    }),
-    "image_gallery_groups": MaskColumns({
-        "obj_id": MASKED_OBJ_ID
-    }),
-    "image_gallery_groups_seen": IGNORE_TABLE,
-    # NOTE there was code in normalization for this, but the table is ignored?
-    # "image_gallery_groups_seen": MaskColumns({
-    #     "id": MASKED_ID,
-    #     "group_id": MASKED_ID,
-    # }),
-    # TODO
-    "tsk_files_path": None,
-    # TODO
-    "tsk_file_layout": None,
-    "tsk_objects": None,
-    "reports": MaskColumns({
-        "obj_id": MASKED_OBJ_ID,
-        "path": "AutopsyTestCase",
-        "crtime": 0
-    }),
-    "data_source_info": MaskColumns({
-        "device_id": "{device id}",
-        "added_date_time": "{dateTime}"
-    }),
-    # TODO
-    "ingest_jobs": None,
-    "tsk_examiners": MaskColumns({
-        "login_name": "{examiner_name}"
-    }),
-    "tsk_events": MaskColumns({
-        "event_id": "MASKED_EVENT_ID",
-        "time": 0,
-    }),
-    # TODO
-    "event_description_index": None,
-    "tsk_os_accounts": MaskColumns({
-        "os_account_obj_id": MASKED_OBJ_ID
-    }),
-    # TODO
-    "tsk_data_artifacts": None
-}
-
-
-#     files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1
-# path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1
-# object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1
-# vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1
-# report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1
-# layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1
-# data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find(
-#     'INSERT INTO data_source_info ') > -1
-# event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find(
-#     'INSERT INTO tsk_event_descriptions ') > -1
-# events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1
-# ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1
-# examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1
-# ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find(
-#     'INSERT INTO image_gallery_groups ') > -1
-# ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find(
-#     'INSERT INTO image_gallery_groups_seen ') > -1
-# os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1
-# os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find(
-#     'INSERT INTO tsk_os_account_attributes') > -1
-# os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find(
-#     'INSERT INTO tsk_os_account_instances') > -1
-# data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find(
-#     'INSERT INTO tsk_data_artifacts') > -1
-
-def get_pg_table_columns(conn) -> Dict[str, List[str]]:
-    cursor = conn.cursor()
-    cursor.execute("""
-    SELECT cols.table_name, cols.column_name
-      FROM information_schema.columns cols
-      WHERE cols.column_name IS NOT NULL
-      AND cols.table_name IS NOT NULL
-      AND cols.table_name IN (
-        SELECT tables.tablename FROM pg_catalog.pg_tables tables
-        WHERE LOWER(schemaname) = 'public'
-      )
-    ORDER by cols.table_name, cols.ordinal_position;
-    """)
-    mapping = {}
-    for row in cursor:
-        mapping.setdefault(row[0], []).append(row[1])
-
-    cursor.close()
-    return mapping
-
-
-def get_sql_insert_value(val) -> str:
-    if not val:
-        return "NULL"
-
-    if isinstance(val, str):
-        escaped_val = val.replace('\n', '\\n').replace("'", "''")
-        return f"'{escaped_val}'"
-
-    return str(val)
-
-
-def write_normalized(output_file, db_conn, table: str, column_names: List[str], normalizer=None):
-    cursor = db_conn.cursor()
-
-    joined_columns = ",".join([col for col in column_names])
-    cursor.execute(f"SELECT {joined_columns} FROM {table}")
-    for row in cursor:
-        if len(row) != len(column_names):
-            print(f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}")
-            continue
-
-        row_dict = {}
-        for col_idx in range(0, len(column_names)):
-            row_dict[column_names[col_idx]] = row[col_idx]
-
-        if normalizer:
-            row_dict = normalizer(table, row_dict)
-
-        values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names)
-        insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n'
-        output_file.write(insert_statement)
-
-
-
-
-#with sqlite3.connect(r"C:\Users\gregd\Desktop\autopsy_412.db") as conn, \
-with psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345") as conn, \
-        open(r"C:\Users\gregd\Desktop\dbdump.sql", mode="w", encoding='utf-8') as output_file:
-
-    for table, cols in get_pg_table_columns(conn).items():
-    # for table, cols in get_sqlite_table_columns(conn).items():
-        write_normalized(output_file, conn, table, cols)
diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index cec54316d2..3bd516801c 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -8,6 +8,8 @@ import os
 import codecs
 import datetime
 import sys
+from typing import Callable, Dict, Union, List
+
 import psycopg2
 import psycopg2.extras
 import socket
@@ -319,83 +321,32 @@ class TskDbDiff(object):
             dump_file: a pathto_File, the location to dump the non-blackboard database items
         """
 
-        conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings)
-        id_files_table = build_id_files_table(conn.cursor(), isMultiUser)
-        id_vs_parts_table = build_id_vs_parts_table(conn.cursor(), isMultiUser)
-        id_vs_info_table = build_id_vs_info_table(conn.cursor(), isMultiUser)
-        id_fs_info_table = build_id_fs_info_table(conn.cursor(), isMultiUser)
-        id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser)
-        id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser)
-        id_legacy_artifact_types = build_id_legacy_artifact_types_table(conn.cursor(), isMultiUser)
-        id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser)
-        id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser)
-        id_accounts_table = build_id_accounts_table(conn.cursor(), isMultiUser)
-        id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table, id_accounts_table)
+        conn, output_file = db_connect(db_file, isMultiUser, pgSettings)
+        guid_utils = TskGuidUtils.create(conn)
 
-        if isMultiUser: # Use PostgreSQL
-            os.environ['PGPASSWORD']=pgSettings.password
-            pgDump = ["pg_dump", "--inserts", "-U", pgSettings.username, "-h", pgSettings.pgHost, "-p", pgSettings.pgPort, "-d", db_file, "-E", "utf-8", "-T", "blackboard_artifacts", "-T", "blackboard_attributes", "-f", "postgreSQLDump.sql"]
-            subprocess.call(pgDump)
-            postgreSQL_db = codecs.open("postgreSQLDump.sql", "r", "utf-8")
-            # Write to the database dump
-            with codecs.open(dump_file, "wb", "utf_8") as db_log:
-                dump_line = ''
-                for line in postgreSQL_db:
-                    line = line.strip('\r\n ')
-                    # Deal with pg_dump result file
-                    if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line
-                        continue
-                    elif not line.endswith(';'): # Statement not finished
-                        dump_line += line
-                        continue
-                    else:
-                        dump_line += line
-                    if 'INSERT INTO image_gallery_groups_seen' in dump_line:
-                        dump_line = ''
-                        continue;
-                    dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
-                    db_log.write('%s\n' % dump_line)
-                    dump_line = ''
-            postgreSQL_db.close()
-        else: # use Sqlite
-            # Delete the blackboard tables
-            conn.text_factory = lambda x: x.decode("utf-8", "ignore")
-            conn.execute("DROP TABLE blackboard_artifacts")
-            conn.execute("DROP TABLE blackboard_attributes")
-            # Write to the database dump
-            with codecs.open(dump_file, "wb", "utf_8") as db_log:
-                for line in conn.iterdump():
-                    if 'INSERT INTO "image_gallery_groups_seen"' in line:
-                        continue
-                    line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
-                    db_log.write('%s\n' % line)
-        # Now sort the file  
-        srtcmdlst = ["sort", dump_file, "-o", dump_file]
-        subprocess.call(srtcmdlst)
+        if isMultiUser:
+            table_cols = get_pg_table_columns(conn)
+            schema = get_pg_schema(pgSettings.username, pgSettings.password, pgSettings.pgHost, pgSettings.pgPort)
+        else:
+            table_cols = get_sqlite_table_columns(conn)
+            schema = get_sqlite_schema(conn)
+
+        output_file.write(schema + "\n")
+        for table, cols in sorted(table_cols.items(), key=lambda pr: pr[0]):
+            normalizer = TABLE_NORMALIZATIONS[table] if table in TABLE_NORMALIZATIONS else None
+            write_normalized(guid_utils, output_file, conn, table, cols, normalizer)
+
+        # Now sort the file
+        # srtcmdlst = ["sort", dump_file, "-o", dump_file]
+        # subprocess.call(srtcmdlst)
 
         conn.close()
         # cleanup the backup
-        if backup_db_file:
-            os.remove(backup_db_file)
-        return id_obj_path_table
+        # if backup_db_file:
+        #    os.remove(backup_db_file)
+        return guid_utils.obj_id_guids
 
 
-    def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings):
-        """Dumps the given database to text files for later comparison.
-
-        Args:
-            db_file: a pathto_File, the database file to dump
-            dump_file: a pathto_File, the location to dump the non-blackboard database items
-            bb_dump_file: a pathto_File, the location to dump the blackboard database items
-        """
-        id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings)
-        TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table)
-
-
-    def _get_tmp_file(base, ext):
-        time = datetime.datetime.now().time().strftime("%H%M%f")
-        return os.path.join(os.environ['TMP'], base + time + ext)
-
 
 class TskDbDiffException(Exception):
     pass
@@ -407,451 +358,680 @@ class PGSettings(object):
         self.username = user
         self.password = password
 
-    def get_pgHost():
+    def get_pgHost(self):
         return self.pgHost
 
-    def get_pgPort():
+    def get_pgPort(self):
         return self.pgPort
 
-    def get_username():
+    def get_username(self):
         return self.username
 
-    def get_password():
+    def get_password(self):
         return self.password
 
 
-def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table, artifact_table, accounts_table):
-    """ Make testing more consistent and reasonable by doctoring certain db entries.
-
-    Args:
-        line: a String, the line to remove the object id from.
-        files_table: a map from object ids to file paths.
+class TskGuidUtils:
+    """
+    This class provides guids for potentially volatile data.
     """
 
-    # Sqlite statement use double quotes for table name, PostgreSQL doesn't. We check both databases results for normalization.
-    files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1
-    path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1
-    object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1
-    vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1
-    report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1
-    layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1
-    data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find('INSERT INTO data_source_info ') > -1
-    event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find('INSERT INTO tsk_event_descriptions ') > -1
-    events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1
-    ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1
-    examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1
-    ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find('INSERT INTO image_gallery_groups ') > -1
-    ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find('INSERT INTO image_gallery_groups_seen ') > -1
-    os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1
-    os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find('INSERT INTO tsk_os_account_attributes') > -1
-    os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find('INSERT INTO tsk_os_account_instances') > -1
-    data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find('INSERT INTO tsk_data_artifacts') > -1
-    
-    parens = line[line.find('(') + 1 : line.rfind(')')]
-    no_space_parens = parens.replace(" ", "")
-    fields_list = list(csv.reader([no_space_parens], quotechar="'"))[0]
-    #Add back in the quotechar for values that were originally wrapped (csv reader consumes this character)
-    fields_list_with_quotes = []
-    ptr = 0
-    for field in fields_list:
-        if(len(field) == 0):
-            field = "'" + field + "'"
-        else:
-            start = no_space_parens.find(field, ptr)
-            if((start - 1) >= 0 and no_space_parens[start - 1] == '\''):
-                if((start + len(field)) < len(no_space_parens) and no_space_parens[start + len(field)] == '\''):
-                    field = "'" + field + "'"
-        fields_list_with_quotes.append(field)
-        if(ptr > 0):
-            #Add one for each comma that is used to separate values in the original string
-            ptr+=1
-        ptr += len(field)
+    @staticmethod
+    def _get_guid_dict(db_conn, select_statement, delim=""):
+        """
+        Retrieves a dictionary mapping the first item selected to a concatenation of the remaining values.
+        Args:
+            db_conn: The database connection.
+            select_statement: The select statement.
+            delim: The delimiter for how row data from index 1 to end shall be concatenated.
 
-    fields_list = fields_list_with_quotes
+        Returns: A dictionary mapping the key (the first item in the select statement) to a concatenation of the remaining values.
 
-    # remove object ID
-    if files_index:
-    
-        # Ignore TIFF size and hash if extracted from PDFs.
-        # See JIRA-6951 for more details.
-        # index -3 = 3rd from the end, which is extension
-        # index -5 = 5th from the end, which is the parent path.
-        if fields_list[-3] == "'tif'" and fields_list[-5].endswith(".pdf/'"):
-            fields_list[15] = "'SIZE_IGNORED'"
-            fields_list[23] = "'MD5_IGNORED'"
-            fields_list[24] = "'SHA256_IGNORED'"
-        newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:-1]) + ');') #leave off first (object id) and last (os_account_id) field 
-        # Remove object ID from Unalloc file name
-        newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
-        return newLine
-    # remove object ID
-    elif vs_parts_index:
-        newLine = ('INSERT INTO "tsk_vs_parts" VALUES(' + ', '.join(fields_list[1:]) + ');') 
-        return newLine
-    # remove group ID
-    elif ig_groups_index:
-        newLine = ('INSERT INTO "image_gallery_groups" VALUES(' + ', '.join(fields_list[1:]) + ');') 
-        return newLine
-    #remove id field
-    elif ig_groups_seen_index:
-        # Only removing the id and group_id fields for now. May need to care about examiner_id and seen fields in future.
-        newLine = ('INSERT INTO "image_gallery_groups_seen" VALUES(' + ', '.join(fields_list[2:]) + ');') 
-        return newLine    
-    # remove object ID
-    elif path_index:
-        obj_id = int(fields_list[0])
-        objValue = files_table[obj_id]
-        # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory
-        idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor')
-        if idx_pre > -1:
-            idx_pos =  fields_list[1].find('\\', idx_pre + 2)
-            dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator
-            dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')]
-            pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:]
-        else:
-            pathValue = fields_list[1]
-        # remove localhost from postgres par_obj_name
-        multiOutput_idx = pathValue.find('ModuleOutput')
-        if multiOutput_idx > -1:
-            pathValue = "'" + pathValue[pathValue.find('ModuleOutput'):] #postgres par_obj_name include losthost 
+        """
+        cursor = db_conn.cursor()
+        cursor.execute(select_statement)
+        ret_dict = {}
+        for row in cursor:
+            ret_dict[row[0]] = delim.join([str(col) for col in row[1:]])
 
-        newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') 
-        return newLine
-    # remove object ID
-    elif layout_index:
-        obj_id = fields_list[0]
-        path= files_table[int(obj_id)]
-        newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');')
-        # Remove object ID from Unalloc file name
-        newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
-        return newLine
-    # remove object ID
-    elif object_index:
-        obj_id = fields_list[0]
-        parent_id = fields_list[1]
-        newLine = 'INSERT INTO "tsk_objects" VALUES('
-        path = None
-        parent_path = None
+        return ret_dict
 
-        #if obj_id or parent_id is invalid literal, we simple return the values as it is 
-        try:
-            obj_id = int(obj_id)
-            if parent_id != 'NULL':
-                parent_id = int(parent_id)
-        except Exception as e:
-            print(obj_id, parent_id)
-            return line
+    @staticmethod
+    def create(db_conn):
+        """
+        Creates an instance of this class by querying for relevant guid data.
+        Args:
+            db_conn: The database connection.
 
-        if obj_id in files_table.keys():
-            path = files_table[obj_id]
-        elif obj_id in vs_parts_table.keys():
-            path = vs_parts_table[obj_id]
-        elif obj_id in vs_info_table.keys():
-            path = vs_info_table[obj_id]
-        elif obj_id in fs_info_table.keys():
-            path = fs_info_table[obj_id]
-        elif obj_id in reports_table.keys():
-            path = reports_table[obj_id]
-        # remove host name (for multi-user) and dates/times from path for reports
-        if path is not None:
-            if 'ModuleOutput' in path:
-                # skip past the host name (if any)
-                path = path[path.find('ModuleOutput'):]
-                if 'BulkExtractor' in path or 'Smirk' in path:
-                    # chop off the last folder (which contains a date/time)
-                    path = path[:path.rfind('\\')]
-            if 'Reports\\AutopsyTestCase HTML Report' in path:
-                path = 'Reports\\AutopsyTestCase HTML Report'
+        Returns: The instance of this class.
 
-        if parent_id in files_table.keys():
-            parent_path = files_table[parent_id]
-        elif parent_id in vs_parts_table.keys():
-            parent_path = vs_parts_table[parent_id]
-        elif parent_id in vs_info_table.keys():
-            parent_path = vs_info_table[parent_id]
-        elif parent_id in fs_info_table.keys():
-            parent_path = fs_info_table[parent_id]
-        elif parent_id in images_table.keys():
-            parent_path = images_table[parent_id]
-        elif parent_id in accounts_table.keys():
-            parent_path = accounts_table[parent_id]
-        elif parent_id == 'NULL':
-            parent_path = "NULL"
-        
-        # Remove host name (for multi-user) from parent_path
-        if parent_path is not None:
-            if 'ModuleOutput' in parent_path:
-                # skip past the host name (if any)
-                parent_path = parent_path[parent_path.find('ModuleOutput'):]
+        """
+        guid_files = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, parent_path, name FROM tsk_files")
+        guid_vs_parts = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, addr, start FROM tsk_vs_parts", "_")
+        guid_fs_info = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info", "_")
+        guid_image_names = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, name FROM tsk_image_names "
+                                                                "WHERE sequence=0")
+        guid_os_accounts = TskGuidUtils._get_guid_dict(db_conn, "SELECT os_account_obj_id, addr FROM tsk_os_accounts")
+        guid_reports = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, path FROM reports")
 
-        if path and parent_path:
-            # Remove object ID from Unalloc file names and regripper output
-            path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', path)
-            path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', path)
-            parent_path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', parent_path)
-            parent_path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', parent_path)
-            return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');'
-        else:
-            return newLine + '"OBJECT IDS OMITTED", ' + ', '.join(fields_list[2:]) + ');'  #omit parent object id and object id when we cant annonymize them
-    # remove time-based information, ie Test_6/11/14 -> Test    
-    elif report_index:
-        fields_list[1] = "AutopsyTestCase"
-        fields_list[2] = "0"
-        newLine = ('INSERT INTO "reports" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
-        return newLine
-    elif data_source_info_index:
-        fields_list[1] = "{device id}"
-        fields_list[4] = "{dateTime}"
-        newLine = ('INSERT INTO "data_source_info" VALUES(' + ','.join(fields_list) + ');')
-        return newLine
-    elif ingest_job_index:
-        fields_list[2] = "{host_name}"
-        start_time = int(fields_list[3])
-        end_time = int(fields_list[4])
-        if (start_time <= end_time):
-            fields_list[3] = "0"
-            fields_list[4] = "0"
-        newLine = ('INSERT INTO "ingest_jobs" VALUES(' + ','.join(fields_list) + ');')
-        return newLine
-    elif examiners_index:
-        fields_list[1] = "{examiner_name}"
-        newLine = ('INSERT INTO "tsk_examiners" VALUES(' + ','.join(fields_list) + ');')
-        return newLine
-    # remove all timing dependent columns from events table
-    elif events_index:
-        newLine = ('INSERT INTO "tsk_events" VALUES(' + ','.join(fields_list[1:2]) + ');') 
-        return newLine
-    # remove object ids from event description table
-    elif event_description_index:
-        # replace object ids with information that is deterministic 
-        file_obj_id = int(fields_list[5])
-        object_id = int(fields_list[4])
-        legacy_artifact_id = 'NULL'
-        if (fields_list[6] != 'NULL'):
-            legacy_artifact_id = int(fields_list[6])
-        if file_obj_id != 'NULL' and file_obj_id in files_table.keys():
-            fields_list[5] = files_table[file_obj_id]
-        if object_id != 'NULL' and object_id in files_table.keys():
-            fields_list[4] = files_table[object_id]
-        if legacy_artifact_id != 'NULL' and legacy_artifact_id in artifact_table.keys():
-            fields_list[6] = artifact_table[legacy_artifact_id]
-        if fields_list[1] == fields_list[2] and fields_list[1] == fields_list[3]:	
-            fields_list[1] = cleanupEventDescription(fields_list[1])
-            fields_list[2] = cleanupEventDescription(fields_list[2])
-            fields_list[3] = cleanupEventDescription(fields_list[3])
-        newLine = ('INSERT INTO "tsk_event_descriptions" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
-        return newLine
-    elif os_account_index:
-        newLine = ('INSERT INTO "tsk_os_accounts" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id since value that would be substituted is in diff line already
-        return newLine
-    elif os_account_attr_index:
-        #substitue the account object id for a non changing value
-        os_account_id = int(fields_list[1])
-        fields_list[1] = accounts_table[os_account_id]
-        #substitue the source object id for a non changing value
-        source_obj_id = int(fields_list[3])
-        if source_obj_id in files_table.keys():
-            fields_list[3] = files_table[source_obj_id]
-        elif source_obj_id in vs_parts_table.keys():
-            fields_list[3] = vs_parts_table[source_obj_id]
-        elif source_obj_id in vs_info_table.keys():
-            fields_list[3] = vs_info_table[source_obj_id]
-        elif source_obj_id in fs_info_table.keys():
-            fields_list[3] = fs_info_table[source_obj_id]
-        elif source_obj_id in images_table.keys():
-            fields_list[3] = images_table[source_obj_id]
-        elif source_obj_id in accounts_table.keys():
-            fields_list[3] = accounts_table[source_obj_id]
-        elif source_obj_id == 'NULL':
-            fields_list[3] = "NULL"
-        newLine = ('INSERT INTO "tsk_os_account_attributes" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
-        return newLine
-    elif os_account_instances_index:
-        os_account_id = int(fields_list[1])
-        fields_list[1] = accounts_table[os_account_id]
-        newLine = ('INSERT INTO "tsk_os_account_instances" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
-        return newLine
-    elif data_artifacts_index:
-        art_obj_id = int(fields_list[0])
-        if art_obj_id in files_table.keys():
-            fields_list[0] = files_table[art_obj_id]
-        else:
-            fields_list[0] = 'Artifact Object ID Omitted'
-        account_obj_id = int(fields_list[1])
-        if account_obj_id in files_table.keys():
-            fields_list[1] = files_table[account_obj_id]
-        else:
-            fields_list[1] = 'Account Object ID Omitted'
-        newLine = ('INSERT INTO "tsk_data_artifacts" VALUES(' + ','.join(fields_list[:]) + ');') # remove ids
-        return newLine
+        objid_artifacts = TskGuidUtils._get_guid_dict(db_conn,
+                                                      "SELECT "
+                                                      "blackboard_artifacts.artifact_obj_id, "
+                                                      "blackboard_artifact_types.type_name FROM "
+                                                      "blackboard_artifacts INNER JOIN blackboard_artifact_types "
+                                                      "ON blackboard_artifact_types.artifact_type_id = "
+                                                      "blackboard_artifacts.artifact_type_id")
+
+        cursor = db_conn.cursor()
+        cursor.execute("SELECT obj_id, par_obj_id FROM tsk_objects")
+        par_obj_objects = dict([(row[0], row[1]) for row in cursor])
+
+        guid_artifacts = {}
+        for k, v in objid_artifacts.items():
+            if k in par_obj_objects:
+                par_obj_id = par_obj_objects[k]
+
+                # check for artifact parent in files, images, reports
+                path = ''
+                for artifact_parent_dict in [guid_files, guid_image_names, guid_reports]:
+                    if par_obj_id in artifact_parent_dict:
+                        path = artifact_parent_dict[par_obj_id]
+                        break
+
+                guid_artifacts[par_obj_id] = "/".join([path, v])
+
+        return TskGuidUtils(
+            obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts,
+                          **guid_fs_info, **guid_fs_info, **guid_image_names},
+            artifact_types=objid_artifacts)
+
+    artifact_types: Dict[int, str]
+    obj_id_guids: Dict[int, any]
+
+    def __init__(self, obj_id_guids: Dict[int, any], artifact_types: Dict[int, str]):
+        """
+        Main constructor.
+        Args:
+            obj_id_guids: A dictionary mapping object ids to their guids.
+            artifact_types: A dictionary mapping artifact ids to their types.
+        """
+        self.artifact_types = artifact_types
+        self.obj_id_guids = obj_id_guids
+
+    def get_guid_for_objid(self, obj_id, omitted_value: Union[str, None] = 'Object ID Omitted'):
+        """
+        Returns the guid for the specified object id or returns omitted value if the object id is not found.
+        Args:
+            obj_id: The object id.
+            omitted_value: The value if no object id mapping is found.
+
+        Returns: The relevant guid or the omitted_value.
+
+        """
+        return self.obj_id_guids[obj_id] if obj_id in self.obj_id_guids else omitted_value
+
+    def get_guid_for_file_objid(self, obj_id, omitted_value: Union[str, None] = 'Object ID Omitted'):
+        # TODO this is just an alias; could probably be removed
+        return self.get_guid_for_objid(obj_id, omitted_value)
+
+    def get_guid_for_accountid(self, account_id, omitted_value: Union[str, None] = 'Account ID Omitted'):
+        # TODO this is just an alias; could probably be removed
+        return self.get_guid_for_objid(account_id, omitted_value)
+
+    def get_guid_for_artifactid(self, artifact_id, omitted_value: Union[str, None] = 'Artifact ID Omitted'):
+        """
+        Returns the guid for the specified artifact id or returns omitted value if the artifact id is not found.
+        Args:
+            artifact_id: The artifact id.
+            omitted_value: The value if no object id mapping is found.
+
+        Returns: The relevant guid or the omitted_value.
+        """
+        return self.artifact_types[artifact_id] if artifact_id in self.artifact_types else omitted_value
+
+
+class NormalizeRow:
+    """
+    Given a dictionary representing a row (i.e. column name mapped to value), returns a normalized representation of
+    that row such that the values should be less volatile from run to run.
+    """
+    row_masker: Callable[[TskGuidUtils, Dict[str, any]], Dict[str, any]]
+
+    def __init__(self, row_masker: Callable[[TskGuidUtils, Dict[str, any]], Union[Dict[str, any], None]]):
+        """
+        Main constructor.
+        Args:
+            row_masker: The function to be called to mask the specified row.
+        """
+        self.row_masker = row_masker
+
+    def normalize(self, guid_util: TskGuidUtils, row: Dict[str, any]) -> Union[Dict[str, any], None]:
+        """
+        Normalizes a row such that the values should be less volatile from run to run.
+        Args:
+            guid_util: The TskGuidUtils instance providing guids for volatile ids.
+            row: The row values mapping column name to value.
+
+        Returns: The normalized row or None if the row should be ignored.
+
+        """
+        return self.row_masker(guid_util, row)
+
+
+class NormalizeColumns(NormalizeRow):
+    """
+    Utility for normalizing specific column values of a row so they are not volatile values that will change from run
+    to run.
+    """
+
+    @classmethod
+    def _normalize_col_vals(cls,
+                            col_mask: Dict[str, Union[any, Callable[[TskGuidUtils, any], any]]],
+                            guid_util: TskGuidUtils,
+                            row: Dict[str, any]):
+        """
+        Normalizes column values for each column rule provided.
+        Args:
+            col_mask: A dictionary mapping columns to either the replacement value or a function to retrieve the
+            replacement value given the TskGuidUtils instance and original value as arguments.
+            guid_util: The TskGuidUtil used to provide guids for volatile values.
+            row: The dictionary representing the row mapping column names to values.
+
+        Returns: The new row representation.
+
+        """
+        row_copy = row.copy()
+        for key, val in col_mask.items():
+            # only replace values if present in row
+            if key in row_copy:
+                # if a column replacing function, call with original value
+                if isinstance(val, Callable):
+                    row_copy[key] = val(guid_util, row[key])
+                # otherwise, just replace with mask value
+                else:
+                    row_copy[key] = val
+
+        return row_copy
+
+    def __init__(self, col_mask: Dict[str, Union[any, Callable[[any], any]]]):
+        super().__init__(lambda guid_util, row: NormalizeColumns._normalize_col_vals(col_mask, guid_util, row))
+
+
+def get_path_segs(path: Union[str, None]) -> Union[List[str], None]:
+    """
+    Breaks a path string into its folders and filenames.
+    Args:
+        path: The path string or None.
+
+    Returns: The path segments or None.
+
+    """
+    if path:
+        return list(filter(lambda x: len(x.strip()) > 0, [path for path in os.path.normpath(path).split(os.sep)]))
     else:
-        return line
-        
-def cleanupEventDescription(description):
-    test = re.search("^'\D+:\d+'$", description)
-    if test is not None:
-        return re.sub(":\d+", ":<artifact_id>", description)
+        return None
+
+
+def index_of(lst, search_item) -> int:
+    """
+    Returns the index of the item in the list or -1.
+    Args:
+        lst: The list.
+        search_item: The item to search for.
+
+    Returns: The index in the list of the item or -1.
+
+    """
+    for idx, item in enumerate(lst):
+        if item == search_item:
+            return idx
+
+    return -1
+
+
+def get_sql_insert_value(val) -> str:
+    """
+    Returns the value that would appear in a sql insert statement (i.e. string becomes 'string', None becomes NULL)
+    Args:
+        val: The original value.
+
+    Returns: The sql insert equivalent value.
+
+    """
+    if val is None:
+        return "NULL"
+
+    if isinstance(val, str):
+        escaped_val = val.replace('\n', '\\n').replace("'", "''")
+        return f"'{escaped_val}'"
+
+    return str(val)
+
+
+def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
+    """
+    Retrieves a dictionary mapping table names to a list of all the columns for that table
+    where the columns are in ordinal value.
+    Args:
+        conn: The database connection.
+
+    Returns: A dictionary of the form { table_name: [col_name1, col_name2...col_nameN] }
+
+    """
+    cur = conn.cursor()
+    cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'")
+    tables = list([table[0] for table in cur.fetchall()])
+    cur.close()
+
+    to_ret = {}
+    for table in tables:
+        cur = conn.cursor()
+        cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table])
+        to_ret[table] = list([col[0] for col in cur.fetchall()])
+
+    return to_ret
+
+
+def get_pg_table_columns(conn) -> Dict[str, List[str]]:
+    """
+    Returns a dictionary mapping table names to the list of their columns in ordinal order.
+    Args:
+        conn: The pg database connection.
+
+    Returns: The dictionary of tables mapped to a list of their ordinal-orderd column names.
+    """
+    cursor = conn.cursor()
+    cursor.execute("""
+    SELECT cols.table_name, cols.column_name
+      FROM information_schema.columns cols
+      WHERE cols.column_name IS NOT NULL
+      AND cols.table_name IS NOT NULL
+      AND cols.table_name IN (
+        SELECT tables.tablename FROM pg_catalog.pg_tables tables
+        WHERE LOWER(schemaname) = 'public'
+      )
+    ORDER by cols.table_name, cols.ordinal_position;
+    """)
+    mapping = {}
+    for row in cursor:
+        mapping.setdefault(row[0], []).append(row[1])
+
+    cursor.close()
+    return mapping
+
+
+def sanitize_schema(original: str) -> str:
+    """
+    Sanitizes sql script representing table/index creations.
+    Args:
+        original: The original sql schema creation script.
+
+    Returns: The sanitized schema.
+    """
+    sanitized_lines = []
+    dump_line = ''
+    for line in original.splitlines():
+        line = line.strip('\r\n ')
+        # It's comment or alter statement or catalog entry or set idle entry or empty line
+        if (line.startswith('--') or line.lower().startswith(
+                'alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line):
+            continue
+        elif line.endswith(';'):  # Statement not finished
+            dump_line += line
+            sanitized_lines.append(dump_line)
+            dump_line = ''
+        else:
+            dump_line += line
+
+    if len(dump_line.strip()) > 0:
+        sanitized_lines.append(dump_line)
+
+    return "\n".join(sanitized_lines)
+
+
+def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str):
+    """
+    Gets the schema to be added to the dump text from the postgres database.
+    Args:
+        pg_username: The postgres user name.
+        pg_pword: The postgres password.
+        pg_host: The postgres host.
+        pg_port: The postgres port.
+
+    Returns: The normalized schema.
+
+    """
+    os.environ['PGPASSWORD'] = pg_pword
+    pg_dump = ["pg_dump", "--inserts", "-U", pg_username, "-h", pg_host, "-p", pg_port,
+               "-T", "blackboard_artifacts", "-T", "blackboard_attributes"]
+    output = subprocess.check_output(pg_dump)
+    return sanitize_schema(output)
+
+
+def get_sqlite_schema(db_conn):
+    """
+    Gets the schema to be added to the dump text from the sqlite database.
+    Args:
+        db_conn: The database connection.
+
+    Returns: The normalized schema.
+
+    """
+    cursor = db_conn.cursor()
+    query = "SELECT sql FROM sqlite_master " \
+            "WHERE type IN ('table', 'index') AND sql IS NOT NULL " \
+            "ORDER BY type DESC, tbl_name ASC"
+
+    cursor.execute(query)
+    schema = '\n'.join([str(row[0]) + ';' for row in cursor])
+    return sanitize_schema(schema)
+
+
+def _mask_event_desc(desc: str) -> str:
+    """
+    Masks dynamic event descriptions of the form "<artifact_type_name>:<artifact id>" so the artifact id is no longer
+    present.
+    Args:
+        desc: The original description.
+
+    Returns: The normalized description.
+
+    """
+    match = re.search(r"^\s*(\D+):\d+\s*$", desc.strip())
+    if match:
+        return f"{match.group(1)}:<artifact_id>"
+
+    return desc
+
+
+def normalize_tsk_event_descriptions(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
+    """
+    Normalizes event description rows masking possibly changing column values.
+    Args:
+        guid_util: Provides guids for ids that may change from run to run.
+        row: A dictionary mapping column names to values.
+
+    Returns: The normalized event description row.
+    """
+    row_copy = row.copy()
+    # replace object ids with information that is deterministic
+    row_copy['content_obj_id'] = guid_util.get_guid_for_file_objid(row['content_obj_id'])
+    row_copy['data_source_obj_id'] = guid_util.get_guid_for_file_objid(row['data_source_obj_id'])
+    row_copy['artifact_id'] = guid_util.get_guid_for_artifactid(row['artifact_id'])
+
+    if row['full_description'] == row['med_description'] == row['short_description']:
+        row_copy['full_description'] = _mask_event_desc(row['full_description'])
+        row_copy['med_description'] = _mask_event_desc(row['med_description'])
+        row_copy['short_description'] = _mask_event_desc(row['short_description'])
+
+    return row_copy
+
+
+def normalize_ingest_jobs(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
+    """
+    Normalizes ingest jobs table rows.
+    Args:
+        guid_util: Provides guids for ids that may change from run to run.
+        row: A dictionary mapping column names to values.
+
+    Returns: The normalized ingest job row.
+
+    """
+    row_copy = row.copy()
+    row_copy['host_name'] = "{host_name}"
+
+    start_time = row['start_date_time']
+    end_time = row['end_date_time']
+    if start_time <= end_time:
+        row_copy['start_date_time'] = 0
+        row_copy['end_date_time'] = 0
+
+    return row_copy
+
+
+def normalize_unalloc_files(path_str: Union[str, None]) -> Union[str, None]:
+    """
+    Normalizes a path string removing timestamps from unalloc files.
+    Args:
+        path_str: The original path string.
+
+    Returns: The path string where timestamps are removed from unalloc strings.
+
+    """
+    return re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str) if path_str else None
+
+
+def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]:
+    """
+    Normalizes a path string removing timestamps from regripper files.
+    Args:
+        path_str: The original path string.
+
+    Returns: The path string where timestamps are removed from regripper paths.
+
+    """
+    return re.sub(r'regripper\-[0-9]+\-full', 'regripper-full', path_str) if path_str else None
+
+
+def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
+    """
+    Normalizes files table rows.
+    Args:
+        guid_util: Provides guids for ids that may change from run to run.
+        row: A dictionary mapping column names to values.
+
+    Returns: The normalized files table row.
+
+    """
+    # Ignore TIFF size and hash if extracted from PDFs.
+    # See JIRA-6951 for more details.
+    row_copy = row.copy()
+    if row['extension'] and row['extension'].strip().lower() == 'tif' and \
+            row['parent_path'] and row['parent_path'].strip().lower().endswith('.pdf/'):
+        row_copy['size'] = "SIZE_IGNORED"
+        row_copy['md5'] = "MD5_IGNORED"
+        row_copy['sha256'] = "SHA256_IGNORED"
+
+    row_copy['obj_id'] = MASKED_OBJ_ID
+    row_copy['os_account_obj_id'] = 'MASKED_OS_ACCOUNT_OBJ_ID'
+    row_copy['parent_path'] = normalize_unalloc_files(row['parent_path'])
+    row_copy['name'] = normalize_unalloc_files(row['name'])
+    return row_copy
+
+
+def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
+    """
+    Normalizes file path table rows.
+    Args:
+        guid_util: Provides guids for ids that may change from run to run.
+        row: A dictionary mapping column names to values.
+
+    Returns: The normalized file path table row.
+    """
+    row_copy = row.copy()
+    path = row['path']
+    if path:
+        path_parts = get_path_segs(path)
+        module_output_idx = index_of(path_parts, 'ModuleOutput')
+        if module_output_idx >= 0:
+            # remove everything up to and including ModuleOutput if ModuleOutput present
+            path_parts = path_parts[module_output_idx:]
+            if len(path_parts) > 1 and path_parts[0] == 'Embedded File Extractor':
+                match = re.match(r'^(.+?)_[0-9]*$', path_parts[1])
+                if match:
+                    path_parts[1] = match.group(1)
+
+        row_copy['path'] = os.path.join(*path_parts) if len(path_parts) > 0 else '/'
+
+    row_copy['obj_id'] = guid_util.get_guid_for_file_objid(row['obj_id'])
+    return row_copy
+
+
+def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
+    """
+    Normalizes object table rows.
+    Args:
+        guid_util: Provides guids for ids that may change from run to run.
+        row: A dictionary mapping column names to values.
+
+    Returns: The normalized object table row.
+    """
+    parent_id = row['par_obj_id']
+    path = guid_util.get_guid_for_objid(row['obj_id'], omitted_value=None)
+    row_copy = row.copy()
+
+    # remove host name (for multi-user) and dates/times from path for reports
+    if path is not None:
+        path_parts = get_path_segs(path)
+        module_output_idx = index_of(path_parts, 'ModuleOutput')
+        if module_output_idx >= 0:
+            # remove everything up to and including ModuleOutput if ModuleOutput present
+            path_parts = path_parts[module_output_idx:]
+
+            if "BulkExtractor" in path_parts or "Smirk" in path_parts:
+                # chop off the last folder (which contains a date/time)
+                path_parts = path_parts[:-1]
+
+        for idx in range(0, len(path_parts) - 1):
+            if path_parts[idx] == "Reports" and path_parts[idx + 1] == "AutopsyTestCase HTML Report":
+                path_parts = ["Reports", "AutopsyTestCase HTML Report"]
+
+        path = os.path.join(*path_parts) if len(path_parts) > 0 else '/'
+
+    parent_path = guid_util.get_guid_for_objid(parent_id, omitted_value=None)
+
+    # Remove host name (for multi-user) from parent_path
+    if parent_path is not None:
+        parent_path_parts = get_path_segs(parent_path)
+        module_output_idx = index_of(parent_path_parts, 'ModuleOutput')
+        if module_output_idx >= 0:
+            parent_path_parts = parent_path_parts[module_output_idx:]
+
+        parent_path = os.path.join(*parent_path_parts) if len(parent_path_parts) > 0 else '/'
+
+    # handle regripper and unalloc file replacements
+    if path and parent_path:
+        row_copy['obj_id'] = normalize_regripper_files(normalize_unalloc_files(path))
+        row_copy['par_obj_id'] = normalize_regripper_files(normalize_unalloc_files(parent_path))
     else:
-        return description
+        row_copy['obj_id'] = MASKED_OBJ_ID
+        row_copy['par_obj_id'] = "MASKED_PARENT_OBJ_ID"
 
-def getAssociatedArtifactType(cur, artifact_id, isMultiUser):
-    if isMultiUser:
-        cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",[artifact_id])
-    else:
-        cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",[artifact_id])
+    return row_copy
 
-    info = cur.fetchone()
-    
-    return "File path: " + info[0] + " Artifact Type: " + info[1]
 
-def build_id_files_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to file paths.
+MASKED_OBJ_ID = "MASKED_OBJ_ID"
+MASKED_ID = "MASKED_ID"
 
-    Args:
-        db_cursor: the database cursor
+IGNORE_TABLE = "IGNORE_TABLE"
+
+TableNormalization = Union[IGNORE_TABLE, NormalizeRow]
+
+"""
+This dictionary maps tables where data should be specially handled to how they should be handled.
+"""
+TABLE_NORMALIZATIONS: Dict[str, TableNormalization] = {
+    "image_gallery_groups_seen": IGNORE_TABLE,
+    "blackboard_artifacts": IGNORE_TABLE,
+    "blackboard_attributes": IGNORE_TABLE,
+    "tsk_files": NormalizeRow(normalize_tsk_files),
+    "tsk_vs_parts": NormalizeColumns({
+        "obj_id": MASKED_OBJ_ID
+    }),
+    "image_gallery_groups": NormalizeColumns({
+        "obj_id": MASKED_OBJ_ID
+    }),
+    "tsk_files_path": NormalizeRow(normalize_tsk_files_path),
+    "tsk_file_layout": NormalizeColumns({
+        "obj_id": lambda guid_util, col: guid_util.get_guid_for_file_objid(col)
+    }),
+    "tsk_objects": NormalizeRow(normalize_tsk_objects),
+    "reports": NormalizeColumns({
+        "obj_id": MASKED_OBJ_ID,
+        "path": "AutopsyTestCase",
+        "crtime": 0
+    }),
+    "data_source_info": NormalizeColumns({
+        "device_id": "{device id}",
+        "added_date_time": "{dateTime}"
+    }),
+    "ingest_jobs": NormalizeRow(normalize_ingest_jobs),
+    "tsk_examiners": NormalizeColumns({
+        "login_name": "{examiner_name}"
+    }),
+    "tsk_events": NormalizeColumns({
+        "event_id": "MASKED_EVENT_ID",
+        "event_description_id": None,
+        "time": None,
+    }),
+    "tsk_event_descriptions": NormalizeRow(normalize_tsk_event_descriptions),
+    "tsk_os_accounts": NormalizeColumns({
+        "os_account_obj_id": MASKED_OBJ_ID
+    }),
+    "tsk_os_account_attributes": NormalizeColumns({
+        "id": MASKED_ID,
+        "os_account_obj_id": lambda guid_util, col: guid_util.get_guid_for_accountid(col),
+        "source_obj_id": lambda guid_util, col: guid_util.get_guid_for_objid(col)
+    }),
+    "tsk_os_account_instances": NormalizeColumns({
+        "id": MASKED_ID,
+        "os_account_obj_id": lambda guid_util, col: guid_util.get_guid_for_accountid(col)
+    }),
+    "tsk_data_artifacts": NormalizeColumns({
+        "artifact_obj_id":
+            lambda guid_util, col: guid_util.get_guid_for_file_objid(col, omitted_value="Artifact Object ID Omitted"),
+        "os_account_obj_id":
+            lambda guid_util, col: guid_util.get_guid_for_file_objid(col, omitted_value="Account Object ID Omitted"),
+    })
+}
+
+
+def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, column_names: List[str],
+                     normalizer: Union[TableNormalization, None] = None):
     """
-    # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary
-    # with the object id as the key and the full file path (parent + name) as the value
-    mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, parent_path, name FROM tsk_files")])
-    return mapping
-
-def build_id_vs_parts_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to vs_parts.
-
+    Outputs rows of a file as their normalized values (where values should not change from run to run).
     Args:
-        db_cursor: the database cursor
+        guid_utils: Provides guids to replace values that would potentially change from run to run.
+        output_file: The file where the normalized dump will be written.
+        db_conn: The database connection.
+        table: The name of the table.
+        column_names: The name of the columns in the table in ordinal order.
+        normalizer: The normalizer (if any) to use so that data is properly normalized.
     """
-    # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary
-    # with the object id as the key and (addr + start) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, addr, start FROM tsk_vs_parts")])
-    return mapping
+    if normalizer == IGNORE_TABLE:
+        return
 
-def build_id_vs_info_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to vs_info.
+    cursor = db_conn.cursor()
 
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary
-    # with the object id as the key and (vs_type + img_offset) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")])
-    return mapping
+    joined_columns = ",".join([col for col in column_names])
+    cursor.execute(f"SELECT {joined_columns} FROM {table}")
+    for row in cursor:
+        if len(row) != len(column_names):
+            print(
+                f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}")
+            continue
 
-     
-def build_id_fs_info_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to fs_info.
+        row_dict = {}
+        for col_idx in range(0, len(column_names)):
+            row_dict[column_names[col_idx]] = row[col_idx]
 
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary
-    # with the object id as the key and (img_offset + fs_type) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")])
-    return mapping
+        if normalizer and isinstance(normalizer, NormalizeRow):
+            row_masker: NormalizeRow = normalizer
+            row_dict = row_masker.normalize(guid_utils, row_dict)
 
-def build_id_objects_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to par_id.
+        if row_dict is not None:
+            # entries = []
+            # for idx in range(0, len(column_names)):
+            #     column = column_names[idx]
+            #     value = get_sql_insert_value(row_dict[column] if column in row_dict else None)
+            #     entries.append((column, value))
+            # insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries])
+            # insert_statement = f"{table}: {{{insert_values}}}\n"
 
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
-    # with the object id as the key and par_obj_id, type as the value
-    mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")])
-    return mapping
+            values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names)
+            insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n'
+            output_file.write(insert_statement)
 
-def build_id_image_names_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to name.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id and name then create a tuple in the dictionary
-    # with the object id as the key and name, type as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")])
-    #data_sources which are logical file sets will be found in the files table
-    return mapping
-
-def build_id_artifact_types_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to artifact ids.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
-    # with the object id as the key and artifact type as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
-    return mapping
-
-def build_id_legacy_artifact_types_table(db_cursor, isPostgreSQL):
-    """Build the map of legacy artifact ids to artifact type.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the legacy artifact id then create a tuple in the dictionary
-    # with the artifact id as the key and artifact type as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
-    return mapping
-
-def build_id_reports_table(db_cursor, isPostgreSQL):
-    """Build the map of report object ids to report path.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the reports table in the db, create an obj_id -> path map
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, path FROM reports")])
-    return mapping
-
-def build_id_accounts_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to OS account SIDs.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id and account SID then creates a tuple in the dictionary
-    # with the object id as the key and the OS Account's SID as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT os_account_obj_id, addr  FROM tsk_os_accounts")])
-    return mapping
-
-def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table, accounts_table):
-    """Build the map of object ids to artifact ids.
-
-    Args:
-        files_table: obj_id, path
-        objects_table: obj_id, par_obj_id, type
-        artifacts_table: obj_id, artifact_type_name
-        reports_table: obj_id, path
-        images_table: obj_id, name
-        accounts_table: obj_id, addr  
-    """
-    # make a copy of files_table and update it with new data from artifacts_table and reports_table
-    mapping = files_table.copy()
-    for k, v in objects_table.items():
-        path = ""
-        if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id
-            if k in reports_table.keys(): # For a report we use the report path
-                par_obj_id = v[0]
-                if par_obj_id is not None:
-                    mapping[k] = reports_table[k]
-            elif k in artifacts_table.keys(): # For an artifact we use it's par_obj_id's path+name plus it's artifact_type name
-                par_obj_id = v[0] # The parent of an artifact can be a file or a report
-                if par_obj_id in mapping.keys():
-                    path = mapping[par_obj_id]
-                elif par_obj_id in reports_table.keys():
-                    path = reports_table[par_obj_id]
-                elif par_obj_id in images_table.keys():
-                    path = images_table[par_obj_id]
-                mapping[k] = path + "/" + artifacts_table[k]
-            elif k in accounts_table.keys(): # For an OS Account object ID we use its addr  field which is the account SID
-                mapping[k] = accounts_table[k]
-        elif v[0] not in mapping.keys():
-            if v[0] in artifacts_table.keys():
-                par_obj_id = objects_table[v[0]]
-                path = mapping[par_obj_id] 
-                mapping[k] = path + "/" + artifacts_table[v[0]]
-    return mapping
 
 def db_connect(db_file, isMultiUser, pgSettings=None):
     if isMultiUser: # use PostgreSQL
@@ -867,12 +1047,6 @@ def db_connect(db_file, isMultiUser, pgSettings=None):
         os.chmod (backup_db_file, 0o777)
         return sqlite3.connect(backup_db_file), backup_db_file
 
-def sql_select_execute(cursor, isPostgreSQL, sql_stmt):
-    if isPostgreSQL: 
-        cursor.execute(sql_stmt)
-        return cursor.fetchall()
-    else:
-        return cursor.execute(sql_stmt)
 
 def main():
     try:
diff --git a/test/script/tskdbdiff2.py b/test/script/tskdbdiff2.py
deleted file mode 100644
index 7ff02d0c30..0000000000
--- a/test/script/tskdbdiff2.py
+++ /dev/null
@@ -1,969 +0,0 @@
-# Requires python3
-
-import re
-import sqlite3
-import subprocess
-import shutil
-import os
-import codecs
-import datetime
-import sys
-from typing import Dict, List
-
-import psycopg2
-import psycopg2.extras
-import socket
-import csv
-
-class TskDbDiff(object):
-    """Compares two TSK/Autospy SQLite databases.
-
-    Attributes:
-        gold_artifacts:
-        autopsy_artifacts:
-        gold_attributes:
-        autopsy_attributes:
-        gold_objects:
-        autopsy_objects:
-        artifact_comparison:
-        attribute_comparision:
-        report_errors: a listof_listof_String, the error messages that will be
-        printed to screen in the run_diff method
-        passed: a boolean, did the diff pass?
-        autopsy_db_file:
-        gold_db_file:
-    """
-    def __init__(self, output_db, gold_db, output_dir=None, gold_bb_dump=None, gold_dump=None, verbose=False, isMultiUser=False, pgSettings=None):
-        """Constructor for TskDbDiff.
-
-        Args:
-            output_db_path: path to output database (non-gold standard)
-            gold_db_path: path to gold database
-            output_dir: (optional) Path to folder where generated files will be put.
-            gold_bb_dump: (optional) path to file where the gold blackboard dump is located
-            gold_dump: (optional) path to file where the gold non-blackboard dump is located
-            verbose: (optional) a boolean, if true, diff results are sent to stdout. 
-        """
-
-        self.output_db_file = output_db
-        self.gold_db_file = gold_db
-        self.output_dir = output_dir
-        self.gold_bb_dump = gold_bb_dump
-        self.gold_dump = gold_dump
-        self._generate_gold_dump = False        
-        self._generate_gold_bb_dump = False
-        self._bb_dump_diff = ""
-        self._dump_diff = ""
-        self._bb_dump = ""
-        self._dump = ""
-        self.verbose = verbose
-        self.isMultiUser = isMultiUser
-        self.pgSettings = pgSettings
-
-        if self.isMultiUser and not self.pgSettings:
-            print("Missing PostgreSQL database connection settings data.")
-            sys.exit(1)
-
-        if self.gold_bb_dump is None:
-            self._generate_gold_bb_dump = True
-        if self.gold_dump is None:
-            self._generate_gold_dump = True
-
-    def run_diff(self):
-        """Compare the databases.
-
-        Raises:
-            TskDbDiffException: if an error occurs while diffing or dumping the database
-        """
-
-        self._init_diff()
-        id_obj_path_table = -1
-        # generate the gold database dumps if necessary     
-        if self._generate_gold_dump:       
-            id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.gold_db_file, self.gold_dump, self.isMultiUser, self.pgSettings)     
-        if self._generate_gold_bb_dump:        
-            TskDbDiff._dump_output_db_bb(self.gold_db_file, self.gold_bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table)
-
-        # generate the output database dumps (both DB and BB)
-        id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.output_db_file, self._dump, self.isMultiUser, self.pgSettings)
-        TskDbDiff._dump_output_db_bb(self.output_db_file, self._bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table)
-
-        # Compare non-BB
-        dump_diff_pass = self._diff(self._dump, self.gold_dump, self._dump_diff)
-
-        # Compare BB
-        bb_dump_diff_pass = self._diff(self._bb_dump, self.gold_bb_dump, self._bb_dump_diff)
-
-        self._cleanup_diff()
-        return dump_diff_pass, bb_dump_diff_pass
-
-
-    def _init_diff(self):
-        """Set up the necessary files based on the arguments given at construction"""
-        if self.output_dir is None:
-            # No stored files
-            self._bb_dump = TskDbDiff._get_tmp_file("BlackboardDump", ".txt")
-            self._bb_dump_diff = TskDbDiff._get_tmp_file("BlackboardDump-Diff", ".txt")
-            self._dump = TskDbDiff._get_tmp_file("DBDump", ".txt")
-            self._dump_diff = TskDbDiff._get_tmp_file("DBDump-Diff", ".txt")
-        else:
-            self._bb_dump = os.path.join(self.output_dir, "BlackboardDump.txt")
-            self._bb_dump_diff = os.path.join(self.output_dir, "BlackboardDump-Diff.txt")
-            self._dump = os.path.join(self.output_dir, "DBDump.txt")
-            self._dump_diff = os.path.join(self.output_dir, "DBDump-Diff.txt")
-
-        # Sorting gold before comparing (sort behaves differently in different environments)
-        new_bb = TskDbDiff._get_tmp_file("GoldBlackboardDump", ".txt")
-        new_db = TskDbDiff._get_tmp_file("GoldDBDump", ".txt")
-        if self.gold_bb_dump is not None:
-            srtcmdlst = ["sort", self.gold_bb_dump, "-o", new_bb]
-            subprocess.call(srtcmdlst)
-            srtcmdlst = ["sort", self.gold_dump, "-o", new_db]
-            subprocess.call(srtcmdlst)
-        self.gold_bb_dump = new_bb
-        self.gold_dump = new_db
-
-
-    def _cleanup_diff(self):
-        if self.output_dir is None:
-            #cleanup temp files
-            os.remove(self._dump)
-            os.remove(self._bb_dump)
-            if os.path.isfile(self._dump_diff):
-                os.remove(self._dump_diff)
-            if os.path.isfile(self._bb_dump_diff):
-                os.remove(self._bb_dump_diff)
-
-        if self.gold_bb_dump is None:
-            os.remove(self.gold_bb_dump)
-            os.remove(self.gold_dump)
-
-
-    def _diff(self, output_file, gold_file, diff_path):
-        """Compare two text files.
-
-        Args:
-            output_file: a pathto_File, the latest text file
-            gold_file: a pathto_File, the gold text file
-            diff_path: The file to write the differences to
-        Returns False if different
-        """
-
-        if (not os.path.isfile(output_file)):
-            return False
-
-        if (not os.path.isfile(gold_file)):
-            return False
-
-        # It is faster to read the contents in and directly compare
-        output_data = codecs.open(output_file, "r", "utf_8").read()
-        gold_data = codecs.open(gold_file, "r", "utf_8").read()
-        if (gold_data == output_data):
-            return True
-
-        # If they are different, invoke 'diff'
-        diff_file = codecs.open(diff_path, "wb", "utf_8")
-        # Gold needs to be passed in as 1st arg and output as 2nd
-        dffcmdlst = ["diff", gold_file, output_file]
-        subprocess.call(dffcmdlst, stdout = diff_file)
-
-        # create file path for gold files inside output folder. In case of diff, both gold and current run files
-        # are available in the report output folder. Prefix Gold- is added to the filename.
-        gold_file_in_output_dir = output_file[:output_file.rfind("/")] + "/Gold-" + output_file[output_file.rfind("/")+1:]
-        shutil.copy(gold_file, gold_file_in_output_dir)
-
-        return False
-
-
-    def _dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table):
-        """Dumps sorted text results to the given output location.
-
-        Smart method that deals with a blackboard comparison to avoid issues
-        with different IDs based on when artifacts were created.
-
-        Args:
-            db_file: a pathto_File, the output database.
-            bb_dump_file: a pathto_File, the sorted dump file to write to
-        """
-
-        unsorted_dump = TskDbDiff._get_tmp_file("dump_data", ".txt")
-        if isMultiUser:
-            conn, unused_db = db_connect(db_file, isMultiUser, pgSettings)
-            artifact_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
-        else: # Use Sqlite
-            conn = sqlite3.connect(db_file)
-            conn.text_factory = lambda x: x.decode("utf-8", "ignore")
-            conn.row_factory = sqlite3.Row
-            artifact_cursor = conn.cursor()
-        # Get the list of all artifacts (along with type and associated file)
-        # @@@ Could add a SORT by parent_path in here since that is how we are going to later sort it.
-        artifact_cursor.execute("SELECT tsk_files.parent_path, tsk_files.name, blackboard_artifact_types.display_name, blackboard_artifacts.artifact_id FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id")
-        database_log = codecs.open(unsorted_dump, "wb", "utf_8")
-        row = artifact_cursor.fetchone()
-        appnd = False
-        counter = 0
-        artifact_count = 0
-        artifact_fail = 0
-
-        # Cycle through artifacts
-        try:
-            while (row != None):
-
-                # File Name and artifact type
-                # Remove parent object ID from Unalloc file name
-                normalizedName = re.sub('^Unalloc_[0-9]+_', 'Unalloc_', row["name"])
-                if(row["parent_path"] != None):
-                    database_log.write(row["parent_path"] + normalizedName + ' <artifact type="' + row["display_name"] + '" > ')
-                else:
-                    database_log.write(normalizedName + ' <artifact type="' + row["display_name"] + '" > ')
-
-                if isMultiUser:
-                    attribute_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
-                else:
-                    attribute_cursor = conn.cursor()
-                looptry = True
-                artifact_count += 1
-                try:
-                    art_id = ""
-                    art_id = str(row["artifact_id"])
-                  
-                    # Get attributes for this artifact
-                    if isMultiUser:
-                        attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id = %s ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id])
-                    else:
-                        attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id =? ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id])
-                    
-                    attributes = attribute_cursor.fetchall()
-                
-                    # Print attributes
-                    if (len(attributes) == 0):
-                       # @@@@ This should be </artifact> 
-                       database_log.write(' <artifact/>\n')
-                       row = artifact_cursor.fetchone()
-                       continue
-
-                    src = attributes[0][0]
-                    for attr in attributes:
-                        numvals = 0
-                        for x in range(3, 6):
-                            if(attr[x] != None):
-                                numvals += 1
-                        if(numvals > 1):
-                            msg = "There were too many values for attribute type: " + attr["display_name"] + " for artifact with id #" + str(row["artifact_id"]) + ".\n"
-
-                        if(not attr["source"] == src):
-                            msg = "There were inconsistent sources for artifact with id #" + str(row["artifact_id"]) + ".\n"
-
-                        try:
-                            if attr["value_type"] == 0:
-                                attr_value_as_string = str(attr["value_text"])                        
-                            elif attr["value_type"] == 1:
-                                attr_value_as_string = str(attr["value_int32"])                        
-                            elif attr["value_type"] == 2:
-                                attr_value_as_string = str(attr["value_int64"])
-                                if attr["attribute_type_id"]  == 36 and id_obj_path_table != -1 and int(attr_value_as_string) > 0: #normalize positive TSK_PATH_IDs from being object id to a path if the obj_id_path_table was generated
-                                    attr_value_as_string = id_obj_path_table[int(attr_value_as_string)]
-                            elif attr["value_type"] == 3:
-                                attr_value_as_string = "%20.10f" % float((attr["value_double"])) #use exact format from db schema to avoid python auto format double value to (0E-10) scientific style                       
-                            elif attr["value_type"] == 4:
-                                attr_value_as_string = "bytes"                        
-                            elif attr["value_type"] == 5:
-                                attr_value_as_string = str(attr["value_int64"])                        
-                            if attr["display_name"] == "Associated Artifact":
-                                attr_value_as_string = getAssociatedArtifactType(attribute_cursor, attr_value_as_string, isMultiUser)                            
-                            patrn = re.compile("[\n\0\a\b\r\f]")
-                            attr_value_as_string = re.sub(patrn, ' ', attr_value_as_string)
-                            if attr["source"] == "Keyword Search" and attr["display_name"] == "Keyword Preview":
-                                attr_value_as_string = "<Keyword Preview placeholder>"
-                            database_log.write('<attribute source="' + attr["source"] + '" type="' + attr["display_name"] + '" value="' + attr_value_as_string + '" />')
-                        except IOError as e:
-                            print("IO error")
-                            raise TskDbDiffException("Unexpected IO error while writing to database log." + str(e))
-
-                except sqlite3.Error as e:
-                    msg = "Attributes in artifact id (in output DB)# " + str(row["artifact_id"]) + " encountered an error: " + str(e) +" .\n"
-                    print("Attributes in artifact id (in output DB)# ", str(row["artifact_id"]), " encountered an error: ", str(e))
-                    print() 
-                    looptry = False
-                    artifact_fail += 1
-                    database_log.write('Error Extracting Attributes')
-                    database_log.close()
-                    raise TskDbDiffException(msg)
-                finally:
-                    attribute_cursor.close()
-
-               
-                # @@@@ This should be </artifact> 
-                database_log.write(' <artifact/>\n')
-                row = artifact_cursor.fetchone()
-
-            if(artifact_fail > 0):
-                msg ="There were " + str(artifact_count) + " artifacts and " + str(artifact_fail) + " threw an exception while loading.\n"
-        except Exception as e:
-            raise TskDbDiffException("Unexpected error while dumping blackboard database: " + str(e))
-        finally:
-            database_log.close()
-            artifact_cursor.close()
-            conn.close()
-        
-        # Now sort the file
-        srtcmdlst = ["sort", unsorted_dump, "-o", bb_dump_file]
-        subprocess.call(srtcmdlst)
-
-
-    # for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items():
-    # for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items():
-    #    print(f"{key}: {val}")
-
-
-
-
-
-    def _dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings):
-        """Dumps a database to a text file.
-
-        Does not dump the artifact and attributes.
-
-        Args:
-            db_file: a pathto_File, the database file to dump
-            dump_file: a pathto_File, the location to dump the non-blackboard database items
-        """
-
-        conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings)
-        id_files_table = build_id_files_table(conn.cursor(), isMultiUser)
-        id_vs_parts_table = build_id_vs_parts_table(conn.cursor(), isMultiUser)
-        id_vs_info_table = build_id_vs_info_table(conn.cursor(), isMultiUser)
-        id_fs_info_table = build_id_fs_info_table(conn.cursor(), isMultiUser)
-        id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser)
-        id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser)
-        id_legacy_artifact_types = build_id_legacy_artifact_types_table(conn.cursor(), isMultiUser)
-        id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser)
-        id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser)
-        id_accounts_table = build_id_accounts_table(conn.cursor(), isMultiUser)
-        id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table, id_accounts_table)
-
-        if isMultiUser: # Use PostgreSQL
-            os.environ['PGPASSWORD']=pgSettings.password
-            pgDump = ["pg_dump", "--inserts", "-U", pgSettings.username, "-h", pgSettings.pgHost, "-p", pgSettings.pgPort, "-d", db_file, "-E", "utf-8", "-T", "blackboard_artifacts", "-T", "blackboard_attributes", "-f", "postgreSQLDump.sql"]
-            subprocess.call(pgDump)
-            postgreSQL_db = codecs.open("postgreSQLDump.sql", "r", "utf-8")
-            # Write to the database dump
-            with codecs.open(dump_file, "wb", "utf_8") as db_log:
-                dump_line = ''
-                for line in postgreSQL_db:
-                    line = line.strip('\r\n ')
-                    # Deal with pg_dump result file
-                    if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line
-                        continue
-                    elif not line.endswith(';'): # Statement not finished
-                        dump_line += line
-                        continue
-                    else:
-                        dump_line += line
-                    if 'INSERT INTO image_gallery_groups_seen' in dump_line:
-                        dump_line = ''
-                        continue;
-                    dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
-                    db_log.write('%s\n' % dump_line)
-                    dump_line = ''
-            postgreSQL_db.close()
-        else: # use Sqlite
-            # Delete the blackboard tables
-            conn.text_factory = lambda x: x.decode("utf-8", "ignore")
-            conn.execute("DROP TABLE blackboard_artifacts")
-            conn.execute("DROP TABLE blackboard_attributes")
-            # Write to the database dump
-            with codecs.open(dump_file, "wb", "utf_8") as db_log:
-                for line in conn.iterdump():
-                    if 'INSERT INTO "image_gallery_groups_seen"' in line:
-                        continue
-                    line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
-                    db_log.write('%s\n' % line)
-        # Now sort the file  
-        srtcmdlst = ["sort", dump_file, "-o", dump_file]
-        subprocess.call(srtcmdlst)
-
-        conn.close()
-        # cleanup the backup
-        if backup_db_file:
-            os.remove(backup_db_file)
-        return id_obj_path_table
-
-
-    def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings):
-        """Dumps the given database to text files for later comparison.
-
-        Args:
-            db_file: a pathto_File, the database file to dump
-            dump_file: a pathto_File, the location to dump the non-blackboard database items
-            bb_dump_file: a pathto_File, the location to dump the blackboard database items
-        """
-        id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings)
-        TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table)
-
-
-    def _get_tmp_file(base, ext):
-        time = datetime.datetime.now().time().strftime("%H%M%f")
-        return os.path.join(os.environ['TMP'], base + time + ext)
-
-
-class TskDbDiffException(Exception):
-    pass
-
-class PGSettings(object):
-    def __init__(self, pgHost=None, pgPort=5432, user=None, password=None):
-        self.pgHost = pgHost
-        self.pgPort = pgPort
-        self.username = user
-        self.password = password
-
-    def get_pgHost(self):
-        return self.pgHost
-
-    def get_pgPort(self):
-        return self.pgPort
-
-    def get_username(self):
-        return self.username
-
-    def get_password(self):
-        return self.password
-
-
-
-
-
-def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
-    """
-    Retrieves the sqlite public tables and columns from a sqlite connection.
-    Args:
-        conn: The sqlite connection.
-
-    Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value.
-    """
-    cur = conn.cursor()
-    cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'")
-    tables = list([table[0] for table in cur.fetchall()])
-    cur.close()
-
-    to_ret = {}
-    for table in tables:
-        cur = conn.cursor()
-        cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table])
-        to_ret[table] = list([col[0] for col in cur.fetchall()])
-        cur.close()
-
-    return to_ret
-
-
-def get_pg_table_columns(conn) -> Dict[str, List[str]]:
-    """
-    Retrieves the postgres public tables and columns from a pg connection.
-    Args:
-        conn: The pg connection.
-
-    Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value.
-    """
-    cursor = conn.cursor()
-    cursor.execute("""
-    SELECT cols.table_name, cols.column_name
-      FROM information_schema.columns cols
-      WHERE cols.column_name IS NOT NULL
-      AND cols.table_name IS NOT NULL
-      AND cols.table_name IN (
-        SELECT tables.tablename FROM pg_catalog.pg_tables tables
-        WHERE LOWER(schemaname) = 'public'
-      )
-    ORDER by cols.table_name, cols.ordinal_position;
-    """)
-    mapping = {}
-    for row in cursor:
-        mapping.setdefault(row[0], []).append(row[1])
-
-    cursor.close()
-    return mapping
-
-
-def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table, artifact_table, accounts_table):
-    """ Make testing more consistent and reasonable by doctoring certain db entries.
-
-    Args:
-        line: a String, the line to remove the object id from.
-        files_table: a map from object ids to file paths.
-    """
-
-    # Sqlite statement use double quotes for table name, PostgreSQL doesn't. We check both databases results for normalization.
-    files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1
-    path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1
-    object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1
-    vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1
-    report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1
-    layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1
-    data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find('INSERT INTO data_source_info ') > -1
-    event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find('INSERT INTO tsk_event_descriptions ') > -1
-    events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1
-    ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1
-    examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1
-    ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find('INSERT INTO image_gallery_groups ') > -1
-    ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find('INSERT INTO image_gallery_groups_seen ') > -1
-    os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1
-    os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find('INSERT INTO tsk_os_account_attributes') > -1
-    os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find('INSERT INTO tsk_os_account_instances') > -1
-    data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find('INSERT INTO tsk_data_artifacts') > -1
-    
-    parens = line[line.find('(') + 1 : line.rfind(')')]
-    no_space_parens = parens.replace(" ", "")
-    fields_list = list(csv.reader([no_space_parens], quotechar="'"))[0]
-    #Add back in the quotechar for values that were originally wrapped (csv reader consumes this character)
-    fields_list_with_quotes = []
-    ptr = 0
-    for field in fields_list:
-        if(len(field) == 0):
-            field = "'" + field + "'"
-        else:
-            start = no_space_parens.find(field, ptr)
-            if((start - 1) >= 0 and no_space_parens[start - 1] == '\''):
-                if((start + len(field)) < len(no_space_parens) and no_space_parens[start + len(field)] == '\''):
-                    field = "'" + field + "'"
-        fields_list_with_quotes.append(field)
-        if(ptr > 0):
-            #Add one for each comma that is used to separate values in the original string
-            ptr+=1
-        ptr += len(field)
-
-    fields_list = fields_list_with_quotes
-
-    # remove object ID
-    if files_index:
-    
-        # Ignore TIFF size and hash if extracted from PDFs.
-        # See JIRA-6951 for more details.
-        # index -3 = 3rd from the end, which is extension
-        # index -5 = 5th from the end, which is the parent path.
-        if fields_list[-3] == "'tif'" and fields_list[-5].endswith(".pdf/'"):
-            fields_list[15] = "'SIZE_IGNORED'"
-            fields_list[23] = "'MD5_IGNORED'"
-            fields_list[24] = "'SHA256_IGNORED'"
-        newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:-1]) + ');') #leave off first (object id) and last (os_account_id) field 
-        # Remove object ID from Unalloc file name
-        newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
-        return newLine
-    # remove object ID
-    elif vs_parts_index:
-        newLine = ('INSERT INTO "tsk_vs_parts" VALUES(' + ', '.join(fields_list[1:]) + ');') 
-        return newLine
-    # remove group ID
-    elif ig_groups_index:
-        newLine = ('INSERT INTO "image_gallery_groups" VALUES(' + ', '.join(fields_list[1:]) + ');') 
-        return newLine
-    #remove id field
-    elif ig_groups_seen_index:
-        # Only removing the id and group_id fields for now. May need to care about examiner_id and seen fields in future.
-        newLine = ('INSERT INTO "image_gallery_groups_seen" VALUES(' + ', '.join(fields_list[2:]) + ');') 
-        return newLine    
-    # remove object ID
-    elif path_index:
-        obj_id = int(fields_list[0])
-        objValue = files_table[obj_id]
-        # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory
-        idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor')
-        if idx_pre > -1:
-            idx_pos =  fields_list[1].find('\\', idx_pre + 2)
-            dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator
-            dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')]
-            pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:]
-        else:
-            pathValue = fields_list[1]
-        # remove localhost from postgres par_obj_name
-        multiOutput_idx = pathValue.find('ModuleOutput')
-        if multiOutput_idx > -1:
-            pathValue = "'" + pathValue[pathValue.find('ModuleOutput'):] #postgres par_obj_name include losthost 
-
-        newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') 
-        return newLine
-    # remove object ID
-    elif layout_index:
-        obj_id = fields_list[0]
-        path= files_table[int(obj_id)]
-        newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');')
-        # Remove object ID from Unalloc file name
-        newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
-        return newLine
-    # remove object ID
-    elif object_index:
-        obj_id = fields_list[0]
-        parent_id = fields_list[1]
-        newLine = 'INSERT INTO "tsk_objects" VALUES('
-        path = None
-        parent_path = None
-
-        #if obj_id or parent_id is invalid literal, we simple return the values as it is 
-        try:
-            obj_id = int(obj_id)
-            if parent_id != 'NULL':
-                parent_id = int(parent_id)
-        except Exception as e:
-            print(obj_id, parent_id)
-            return line
-
-        if obj_id in files_table.keys():
-            path = files_table[obj_id]
-        elif obj_id in vs_parts_table.keys():
-            path = vs_parts_table[obj_id]
-        elif obj_id in vs_info_table.keys():
-            path = vs_info_table[obj_id]
-        elif obj_id in fs_info_table.keys():
-            path = fs_info_table[obj_id]
-        elif obj_id in reports_table.keys():
-            path = reports_table[obj_id]
-        # remove host name (for multi-user) and dates/times from path for reports
-        if path is not None:
-            if 'ModuleOutput' in path:
-                # skip past the host name (if any)
-                path = path[path.find('ModuleOutput'):]
-                if 'BulkExtractor' in path or 'Smirk' in path:
-                    # chop off the last folder (which contains a date/time)
-                    path = path[:path.rfind('\\')]
-            if 'Reports\\AutopsyTestCase HTML Report' in path:
-                path = 'Reports\\AutopsyTestCase HTML Report'
-
-        if parent_id in files_table.keys():
-            parent_path = files_table[parent_id]
-        elif parent_id in vs_parts_table.keys():
-            parent_path = vs_parts_table[parent_id]
-        elif parent_id in vs_info_table.keys():
-            parent_path = vs_info_table[parent_id]
-        elif parent_id in fs_info_table.keys():
-            parent_path = fs_info_table[parent_id]
-        elif parent_id in images_table.keys():
-            parent_path = images_table[parent_id]
-        elif parent_id in accounts_table.keys():
-            parent_path = accounts_table[parent_id]
-        elif parent_id == 'NULL':
-            parent_path = "NULL"
-        
-        # Remove host name (for multi-user) from parent_path
-        if parent_path is not None:
-            if 'ModuleOutput' in parent_path:
-                # skip past the host name (if any)
-                parent_path = parent_path[parent_path.find('ModuleOutput'):]
-
-        if path and parent_path:
-            # Remove object ID from Unalloc file names and regripper output
-            path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', path)
-            path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', path)
-            parent_path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', parent_path)
-            parent_path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', parent_path)
-            return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');'
-        else:
-            return newLine + '"OBJECT IDS OMITTED", ' + ', '.join(fields_list[2:]) + ');'  #omit parent object id and object id when we cant annonymize them
-    # remove time-based information, ie Test_6/11/14 -> Test    
-    elif report_index:
-        fields_list[1] = "AutopsyTestCase"
-        fields_list[2] = "0"
-        newLine = ('INSERT INTO "reports" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
-        return newLine
-    elif data_source_info_index:
-        fields_list[1] = "{device id}"
-        fields_list[4] = "{dateTime}"
-        newLine = ('INSERT INTO "data_source_info" VALUES(' + ','.join(fields_list) + ');')
-        return newLine
-    elif ingest_job_index:
-        fields_list[2] = "{host_name}"
-        start_time = int(fields_list[3])
-        end_time = int(fields_list[4])
-        if (start_time <= end_time):
-            fields_list[3] = "0"
-            fields_list[4] = "0"
-        newLine = ('INSERT INTO "ingest_jobs" VALUES(' + ','.join(fields_list) + ');')
-        return newLine
-    elif examiners_index:
-        fields_list[1] = "{examiner_name}"
-        newLine = ('INSERT INTO "tsk_examiners" VALUES(' + ','.join(fields_list) + ');')
-        return newLine
-    # remove all timing dependent columns from events table
-    elif events_index:
-        newLine = ('INSERT INTO "tsk_events" VALUES(' + ','.join(fields_list[1:2]) + ');') 
-        return newLine
-    # remove object ids from event description table
-    elif event_description_index:
-        # replace object ids with information that is deterministic 
-        file_obj_id = int(fields_list[5])
-        object_id = int(fields_list[4])
-        legacy_artifact_id = 'NULL'
-        if (fields_list[6] != 'NULL'):
-            legacy_artifact_id = int(fields_list[6])
-        if file_obj_id != 'NULL' and file_obj_id in files_table.keys():
-            fields_list[5] = files_table[file_obj_id]
-        if object_id != 'NULL' and object_id in files_table.keys():
-            fields_list[4] = files_table[object_id]
-        if legacy_artifact_id != 'NULL' and legacy_artifact_id in artifact_table.keys():
-            fields_list[6] = artifact_table[legacy_artifact_id]
-        if fields_list[1] == fields_list[2] and fields_list[1] == fields_list[3]:	
-            fields_list[1] = cleanupEventDescription(fields_list[1])
-            fields_list[2] = cleanupEventDescription(fields_list[2])
-            fields_list[3] = cleanupEventDescription(fields_list[3])
-        newLine = ('INSERT INTO "tsk_event_descriptions" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
-        return newLine
-    elif os_account_index:
-        newLine = ('INSERT INTO "tsk_os_accounts" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id since value that would be substituted is in diff line already
-        return newLine
-    elif os_account_attr_index:
-        #substitue the account object id for a non changing value
-        os_account_id = int(fields_list[1])
-        fields_list[1] = accounts_table[os_account_id]
-        #substitue the source object id for a non changing value
-        source_obj_id = int(fields_list[3])
-        if source_obj_id in files_table.keys():
-            fields_list[3] = files_table[source_obj_id]
-        elif source_obj_id in vs_parts_table.keys():
-            fields_list[3] = vs_parts_table[source_obj_id]
-        elif source_obj_id in vs_info_table.keys():
-            fields_list[3] = vs_info_table[source_obj_id]
-        elif source_obj_id in fs_info_table.keys():
-            fields_list[3] = fs_info_table[source_obj_id]
-        elif source_obj_id in images_table.keys():
-            fields_list[3] = images_table[source_obj_id]
-        elif source_obj_id in accounts_table.keys():
-            fields_list[3] = accounts_table[source_obj_id]
-        elif source_obj_id == 'NULL':
-            fields_list[3] = "NULL"
-        newLine = ('INSERT INTO "tsk_os_account_attributes" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
-        return newLine
-    elif os_account_instances_index:
-        os_account_id = int(fields_list[1])
-        fields_list[1] = accounts_table[os_account_id]
-        newLine = ('INSERT INTO "tsk_os_account_instances" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
-        return newLine
-    elif data_artifacts_index:
-        art_obj_id = int(fields_list[0])
-        if art_obj_id in files_table.keys():
-            fields_list[0] = files_table[art_obj_id]
-        else:
-            fields_list[0] = 'Artifact Object ID Omitted'
-        account_obj_id = int(fields_list[1])
-        if account_obj_id in files_table.keys():
-            fields_list[1] = files_table[account_obj_id]
-        else:
-            fields_list[1] = 'Account Object ID Omitted'
-        newLine = ('INSERT INTO "tsk_data_artifacts" VALUES(' + ','.join(fields_list[:]) + ');') # remove ids
-        return newLine
-    else:
-        return line
-        
-def cleanupEventDescription(description):
-    test = re.search("^'\D+:\d+'$", description)
-    if test is not None:
-        return re.sub(":\d+", ":<artifact_id>", description)
-    else:
-        return description
-
-def getAssociatedArtifactType(cur, artifact_id, isMultiUser):
-    if isMultiUser:
-        cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",[artifact_id])
-    else:
-        cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",[artifact_id])
-
-    info = cur.fetchone()
-    
-    return "File path: " + info[0] + " Artifact Type: " + info[1]
-
-def build_id_files_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to file paths.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary
-    # with the object id as the key and the full file path (parent + name) as the value
-    mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, parent_path, name FROM tsk_files")])
-    return mapping
-
-def build_id_vs_parts_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to vs_parts.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary
-    # with the object id as the key and (addr + start) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, addr, start FROM tsk_vs_parts")])
-    return mapping
-
-def build_id_vs_info_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to vs_info.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary
-    # with the object id as the key and (vs_type + img_offset) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")])
-    return mapping
-
-     
-def build_id_fs_info_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to fs_info.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary
-    # with the object id as the key and (img_offset + fs_type) as the value
-    mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")])
-    return mapping
-
-def build_id_objects_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to par_id.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
-    # with the object id as the key and par_obj_id, type as the value
-    mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")])
-    return mapping
-
-def build_id_image_names_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to name.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id and name then create a tuple in the dictionary
-    # with the object id as the key and name, type as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")])
-    #data_sources which are logical file sets will be found in the files table
-    return mapping
-
-def build_id_artifact_types_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to artifact ids.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
-    # with the object id as the key and artifact type as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
-    return mapping
-
-def build_id_legacy_artifact_types_table(db_cursor, isPostgreSQL):
-    """Build the map of legacy artifact ids to artifact type.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the legacy artifact id then create a tuple in the dictionary
-    # with the artifact id as the key and artifact type as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
-    return mapping
-
-def build_id_reports_table(db_cursor, isPostgreSQL):
-    """Build the map of report object ids to report path.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the reports table in the db, create an obj_id -> path map
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, path FROM reports")])
-    return mapping
-
-def build_id_accounts_table(db_cursor, isPostgreSQL):
-    """Build the map of object ids to OS account SIDs.
-
-    Args:
-        db_cursor: the database cursor
-    """
-    # for each row in the db, take the object id and account SID then creates a tuple in the dictionary
-    # with the object id as the key and the OS Account's SID as the value
-    mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT os_account_obj_id, addr  FROM tsk_os_accounts")])
-    return mapping
-
-def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table, accounts_table):
-    """Build the map of object ids to artifact ids.
-
-    Args:
-        files_table: obj_id, path
-        objects_table: obj_id, par_obj_id, type
-        artifacts_table: obj_id, artifact_type_name
-        reports_table: obj_id, path
-        images_table: obj_id, name
-        accounts_table: obj_id, addr  
-    """
-    # make a copy of files_table and update it with new data from artifacts_table and reports_table
-    mapping = files_table.copy()
-    for k, v in objects_table.items():
-        path = ""
-        if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id
-            if k in reports_table.keys(): # For a report we use the report path
-                par_obj_id = v[0]
-                if par_obj_id is not None:
-                    mapping[k] = reports_table[k]
-            elif k in artifacts_table.keys(): # For an artifact we use it's par_obj_id's path+name plus it's artifact_type name
-                par_obj_id = v[0] # The parent of an artifact can be a file or a report
-                if par_obj_id in mapping.keys():
-                    path = mapping[par_obj_id]
-                elif par_obj_id in reports_table.keys():
-                    path = reports_table[par_obj_id]
-                elif par_obj_id in images_table.keys():
-                    path = images_table[par_obj_id]
-                mapping[k] = path + "/" + artifacts_table[k]
-            elif k in accounts_table.keys(): # For an OS Account object ID we use its addr  field which is the account SID
-                mapping[k] = accounts_table[k]
-        elif v[0] not in mapping.keys():
-            if v[0] in artifacts_table.keys():
-                par_obj_id = objects_table[v[0]]
-                path = mapping[par_obj_id] 
-                mapping[k] = path + "/" + artifacts_table[v[0]]
-    return mapping
-
-def db_connect(db_file, isMultiUser, pgSettings=None):
-    if isMultiUser: # use PostgreSQL
-        try:
-            return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + pgSettings.pgHost + " password=" + pgSettings.password), None
-        except:
-            print("Failed to connect to the database: " + db_file)
-    else: # Sqlite
-        # Make a copy that we can modify
-        backup_db_file = TskDbDiff._get_tmp_file("tsk_backup_db", ".db")
-        shutil.copy(db_file, backup_db_file)
-        # We sometimes get situations with messed up permissions
-        os.chmod (backup_db_file, 0o777)
-        return sqlite3.connect(backup_db_file), backup_db_file
-
-def sql_select_execute(cursor, isPostgreSQL, sql_stmt):
-    if isPostgreSQL: 
-        cursor.execute(sql_stmt)
-        return cursor.fetchall()
-    else:
-        return cursor.execute(sql_stmt)
-
-def main():
-    try:
-        sys.argv.pop(0)
-        output_db = sys.argv.pop(0)
-        gold_db = sys.argv.pop(0)
-    except:
-        print("usage: tskdbdiff [OUTPUT DB PATH] [GOLD DB PATH]")
-        sys.exit(1)
-
-    db_diff = TskDbDiff(output_db, gold_db, output_dir=".") 
-    dump_passed, bb_dump_passed = db_diff.run_diff()
-
-    if dump_passed and bb_dump_passed:
-        print("Database comparison passed.")
-    if not dump_passed:
-        print("Non blackboard database comparison failed.")
-    if not bb_dump_passed:
-        print("Blackboard database comparison failed.")
-
-    sys.exit(0)
-
-
-if __name__ == "__main__":
-    if sys.hexversion < 0x03000000:
-        print("Python 3 required")
-        sys.exit(1)
-
-    main()
-

From 6b86cb53b1ab42c78b0063fe9ed9e7d93e7d4646 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Tue, 20 Apr 2021 21:45:26 -0400
Subject: [PATCH 05/30] updates

---
 test/script/tskdbdiff.py | 42 ++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 3bd516801c..0b0371db2b 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -169,12 +169,29 @@ class TskDbDiff(object):
 
         # create file path for gold files inside output folder. In case of diff, both gold and current run files
         # are available in the report output folder. Prefix Gold- is added to the filename.
-        gold_file_in_output_dir = output_file[:output_file.rfind("/")] + "/Gold-" + output_file[output_file.rfind("/")+1:]
+        gold_file_in_output_dir = os.path.join(os.path.dirname(output_file), "Gold-" + os.path.basename(output_file))
         shutil.copy(gold_file, gold_file_in_output_dir)
 
         return False
 
 
+    @staticmethod
+    def _get_associated_artifact_type(cur, artifact_id, isMultiUser):
+        if isMultiUser:
+            cur.execute(
+                "SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",
+                [artifact_id])
+        else:
+            cur.execute(
+                "SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",
+                [artifact_id])
+
+        info = cur.fetchone()
+
+        return "File path: " + info[0] + " Artifact Type: " + info[1]
+
+
+    @staticmethod
     def _dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table):
         """Dumps sorted text results to the given output location.
 
@@ -270,7 +287,7 @@ class TskDbDiff(object):
                             elif attr["value_type"] == 5:
                                 attr_value_as_string = str(attr["value_int64"])                        
                             if attr["display_name"] == "Associated Artifact":
-                                attr_value_as_string = getAssociatedArtifactType(attribute_cursor, attr_value_as_string, isMultiUser)                            
+                                attr_value_as_string = TskDbDiff._get_associated_artifact_type(attribute_cursor, attr_value_as_string, isMultiUser)
                             patrn = re.compile("[\n\0\a\b\r\f]")
                             attr_value_as_string = re.sub(patrn, ' ', attr_value_as_string)
                             if attr["source"] == "Keyword Search" and attr["display_name"] == "Keyword Preview":
@@ -310,7 +327,7 @@ class TskDbDiff(object):
         srtcmdlst = ["sort", unsorted_dump, "-o", bb_dump_file]
         subprocess.call(srtcmdlst)
 
-
+    @staticmethod
     def _dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings):
         """Dumps a database to a text file.
 
@@ -321,7 +338,7 @@ class TskDbDiff(object):
             dump_file: a pathto_File, the location to dump the non-blackboard database items
         """
 
-        conn, output_file = db_connect(db_file, isMultiUser, pgSettings)
+        conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings)
         guid_utils = TskGuidUtils.create(conn)
 
         if isMultiUser:
@@ -331,14 +348,15 @@ class TskDbDiff(object):
             table_cols = get_sqlite_table_columns(conn)
             schema = get_sqlite_schema(conn)
 
-        output_file.write(schema + "\n")
-        for table, cols in sorted(table_cols.items(), key=lambda pr: pr[0]):
-            normalizer = TABLE_NORMALIZATIONS[table] if table in TABLE_NORMALIZATIONS else None
-            write_normalized(guid_utils, output_file, conn, table, cols, normalizer)
+        with codecs.open(dump_file, "wb", "utf_8") as output_file:
+            output_file.write(schema + "\n")
+            for table, cols in sorted(table_cols.items(), key=lambda pr: pr[0]):
+                normalizer = TABLE_NORMALIZATIONS[table] if table in TABLE_NORMALIZATIONS else None
+                write_normalized(guid_utils, output_file, conn, table, cols, normalizer)
 
         # Now sort the file
-        # srtcmdlst = ["sort", dump_file, "-o", dump_file]
-        # subprocess.call(srtcmdlst)
+        srtcmdlst = ["sort", dump_file, "-o", dump_file]
+        subprocess.call(srtcmdlst)
 
         conn.close()
         # cleanup the backup
@@ -346,6 +364,10 @@ class TskDbDiff(object):
         #    os.remove(backup_db_file)
         return guid_utils.obj_id_guids
 
+    @staticmethod
+    def _get_tmp_file(base, ext):
+        time = datetime.datetime.now().time().strftime("%H%M%f")
+        return os.path.join(os.environ['TMP'], base + time + ext)
 
 
 class TskDbDiffException(Exception):

From 35ad98ca30bc6cbb823e28af621eec9ec4b53437 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Wed, 21 Apr 2021 09:40:03 -0400
Subject: [PATCH 06/30] bug fixes

---
 test/script/tskdbdiff.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 0b0371db2b..21807bf136 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -414,7 +414,7 @@ class TskGuidUtils:
         cursor.execute(select_statement)
         ret_dict = {}
         for row in cursor:
-            ret_dict[row[0]] = delim.join([str(col) for col in row[1:]])
+            ret_dict[row[0]] = delim.join([str(col) if col else '' for col in row[1:]])
 
         return ret_dict
 
@@ -460,11 +460,11 @@ class TskGuidUtils:
                         path = artifact_parent_dict[par_obj_id]
                         break
 
-                guid_artifacts[par_obj_id] = "/".join([path, v])
+                guid_artifacts[k] = "/".join([path, v])
 
         return TskGuidUtils(
             obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts,
-                          **guid_fs_info, **guid_fs_info, **guid_image_names},
+                          **guid_fs_info, **guid_fs_info, **guid_image_names, **guid_artifacts},
             artifact_types=objid_artifacts)
 
     artifact_types: Dict[int, str]
@@ -777,9 +777,9 @@ def normalize_tsk_event_descriptions(guid_util: TskGuidUtils, row: Dict[str, any
     """
     row_copy = row.copy()
     # replace object ids with information that is deterministic
+    row_copy['event_description_id'] = MASKED_ID
     row_copy['content_obj_id'] = guid_util.get_guid_for_file_objid(row['content_obj_id'])
-    row_copy['data_source_obj_id'] = guid_util.get_guid_for_file_objid(row['data_source_obj_id'])
-    row_copy['artifact_id'] = guid_util.get_guid_for_artifactid(row['artifact_id'])
+    row_copy['artifact_id'] = guid_util.get_guid_for_artifactid(row['artifact_id']) if row['artifact_id'] else None
 
     if row['full_description'] == row['med_description'] == row['short_description']:
         row_copy['full_description'] = _mask_event_desc(row['full_description'])
@@ -961,11 +961,11 @@ TABLE_NORMALIZATIONS: Dict[str, TableNormalization] = {
         "obj_id": MASKED_OBJ_ID
     }),
     "image_gallery_groups": NormalizeColumns({
-        "obj_id": MASKED_OBJ_ID
+        "group_id": MASKED_ID
     }),
     "tsk_files_path": NormalizeRow(normalize_tsk_files_path),
     "tsk_file_layout": NormalizeColumns({
-        "obj_id": lambda guid_util, col: guid_util.get_guid_for_file_objid(col)
+        "obj_id": lambda guid_util, col: normalize_unalloc_files(guid_util.get_guid_for_file_objid(col))
     }),
     "tsk_objects": NormalizeRow(normalize_tsk_objects),
     "reports": NormalizeColumns({
@@ -1042,6 +1042,7 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str,
             row_dict = row_masker.normalize(guid_utils, row_dict)
 
         if row_dict is not None:
+            # NOTE: This is an alternate approach to representing values as json-like lines
             # entries = []
             # for idx in range(0, len(column_names)):
             #     column = column_names[idx]
@@ -1051,7 +1052,7 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str,
             # insert_statement = f"{table}: {{{insert_values}}}\n"
 
             values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names)
-            insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n'
+            insert_statement = f'INSERT INTO "{table}" VALUES({values_statement});\n'
             output_file.write(insert_statement)
 
 

From 991d1985c371ccf5965de305113736c7016fb9b8 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Wed, 21 Apr 2021 09:52:18 -0400
Subject: [PATCH 07/30] bug fix

---
 test/script/tskdbdiff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 21807bf136..5c944efb88 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -727,7 +727,7 @@ def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str):
     pg_dump = ["pg_dump", "--inserts", "-U", pg_username, "-h", pg_host, "-p", pg_port,
                "-T", "blackboard_artifacts", "-T", "blackboard_attributes"]
     output = subprocess.check_output(pg_dump)
-    return sanitize_schema(output)
+    return sanitize_schema(str(output))
 
 
 def get_sqlite_schema(db_conn):

From 37e3087f5b0a6ac2288d72fb072d8a9cd47cc891 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Wed, 21 Apr 2021 11:40:04 -0400
Subject: [PATCH 08/30] bug fixes

---
 test/script/tskdbdiff.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 5c944efb88..a5fffe72e7 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -589,7 +589,7 @@ def get_path_segs(path: Union[str, None]) -> Union[List[str], None]:
 
     """
     if path:
-        return list(filter(lambda x: len(x.strip()) > 0, [path for path in os.path.normpath(path).split(os.sep)]))
+        return list(filter(lambda x: len(x.strip()) > 0, [s for s in re.split(r"[\\/]", path)]))
     else:
         return None
 
@@ -759,7 +759,7 @@ def _mask_event_desc(desc: str) -> str:
     Returns: The normalized description.
 
     """
-    match = re.search(r"^\s*(\D+):\d+\s*$", desc.strip())
+    match = re.search(r"^\s*(.+?)\s*:\s*\d+\s*$", desc.strip())
     if match:
         return f"{match.group(1)}:<artifact_id>"
 
@@ -878,10 +878,10 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di
         if module_output_idx >= 0:
             # remove everything up to and including ModuleOutput if ModuleOutput present
             path_parts = path_parts[module_output_idx:]
-            if len(path_parts) > 1 and path_parts[0] == 'Embedded File Extractor':
-                match = re.match(r'^(.+?)_[0-9]*$', path_parts[1])
+            if len(path_parts) > 1 and path_parts[1] == 'Embedded File Extractor':
+                match = re.match(r'^(.+?)_\d*$', path_parts[2])
                 if match:
-                    path_parts[1] = match.group(1)
+                    path_parts[2] = match.group(1)
 
         row_copy['path'] = os.path.join(*path_parts) if len(path_parts) > 0 else '/'
 

From 6c3920cf69dbb4b988a4b2efc4ec6722ba3ba0ca Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Fri, 23 Apr 2021 13:00:14 -0400
Subject: [PATCH 09/30] comment update

---
 test/script/tskdbdiff.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index a5fffe72e7..d82a9d110b 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -1044,12 +1044,13 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str,
         if row_dict is not None:
             # NOTE: This is an alternate approach to representing values as json-like lines
             # entries = []
-            # for idx in range(0, len(column_names)):
-            #     column = column_names[idx]
-            #     value = get_sql_insert_value(row_dict[column] if column in row_dict else None)
-            #     entries.append((column, value))
+            # for column in column_names:
+            #     value = get_sql_insert_value(row_dict[column] if column in row_dict and row_dict[column] else None)
+            #     if value:
+            #         entries.append((column, value))
             # insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries])
             # insert_statement = f"{table}: {{{insert_values}}}\n"
+            # output_file.write(insert_statement)
 
             values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names)
             insert_statement = f'INSERT INTO "{table}" VALUES({values_statement});\n'

From 9e4289c4535bde62baa9b9d1a38ef2b40e226795 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Tue, 4 May 2021 16:50:47 -0400
Subject: [PATCH 10/30] add missing method

---
 test/script/tskdbdiff.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index d82a9d110b..3e067476d8 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -364,6 +364,18 @@ class TskDbDiff(object):
         #    os.remove(backup_db_file)
         return guid_utils.obj_id_guids
 
+    @staticmethod
+    def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings):
+        """Dumps the given database to text files for later comparison.
+
+        Args:
+            db_file: a pathto_File, the database file to dump
+            dump_file: a pathto_File, the location to dump the non-blackboard database items
+            bb_dump_file: a pathto_File, the location to dump the blackboard database items
+        """
+        id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings)
+        TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table)
+
     @staticmethod
     def _get_tmp_file(base, ext):
         time = datetime.datetime.now().time().strftime("%H%M%f")

From c42314308e9d622e8d2e09ada58330499c2c0093 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Wed, 5 May 2021 11:41:33 -0400
Subject: [PATCH 11/30] commenting on regex

---
 test/script/tskdbdiff.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 3e067476d8..bb10177b06 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -601,6 +601,7 @@ def get_path_segs(path: Union[str, None]) -> Union[List[str], None]:
 
     """
     if path:
+        # split on backslash or forward slash
         return list(filter(lambda x: len(x.strip()) > 0, [s for s in re.split(r"[\\/]", path)]))
     else:
         return None
@@ -771,6 +772,8 @@ def _mask_event_desc(desc: str) -> str:
     Returns: The normalized description.
 
     """
+
+    # Takes a string like "Shell Bags: 30840" and replaces with "ShellBags:<artifact_id>"
     match = re.search(r"^\s*(.+?)\s*:\s*\d+\s*$", desc.strip())
     if match:
         return f"{match.group(1)}:<artifact_id>"
@@ -832,6 +835,9 @@ def normalize_unalloc_files(path_str: Union[str, None]) -> Union[str, None]:
     Returns: The path string where timestamps are removed from unalloc strings.
 
     """
+
+    # takes a file name like "Unalloc_30580_7466496_2980941312" and removes the object id to become
+    # "Unalloc_7466496_2980941312"
     return re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str) if path_str else None
 
 
@@ -844,6 +850,7 @@ def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]:
     Returns: The path string where timestamps are removed from regripper paths.
 
     """
+    # takes a file name like "regripper-12345-full" and removes the id to become "regripper-full"
     return re.sub(r'regripper\-[0-9]+\-full', 'regripper-full', path_str) if path_str else None
 
 
@@ -891,6 +898,9 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di
             # remove everything up to and including ModuleOutput if ModuleOutput present
             path_parts = path_parts[module_output_idx:]
             if len(path_parts) > 1 and path_parts[1] == 'Embedded File Extractor':
+                # Takes a folder like ModuleOutput\Embedded File Extractor/f_000168_4435\f_000168
+                # and fixes the folder after 'Embedded File Extractor', 'f_000168_4435' to remove the last number
+                # to become 'f_000168'
                 match = re.match(r'^(.+?)_\d*$', path_parts[2])
                 if match:
                     path_parts[2] = match.group(1)

From 6cdb168a050b85871a0730984ce4dc64627d7827 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Thu, 6 May 2021 08:23:54 -0400
Subject: [PATCH 12/30] pg_dump fix

---
 test/script/tskdbdiff.py | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index bb10177b06..073aeef88f 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -343,7 +343,8 @@ class TskDbDiff(object):
 
         if isMultiUser:
             table_cols = get_pg_table_columns(conn)
-            schema = get_pg_schema(pgSettings.username, pgSettings.password, pgSettings.pgHost, pgSettings.pgPort)
+            schema = get_pg_schema(db_file, pgSettings.username, pgSettings.password,
+                                   pgSettings.pgHost, pgSettings.pgPort)
         else:
             table_cols = get_sqlite_table_columns(conn)
             schema = get_sqlite_schema(conn)
@@ -707,9 +708,14 @@ def sanitize_schema(original: str) -> str:
     dump_line = ''
     for line in original.splitlines():
         line = line.strip('\r\n ')
+        lower_line = line.lower()
         # It's comment or alter statement or catalog entry or set idle entry or empty line
-        if (line.startswith('--') or line.lower().startswith(
-                'alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line):
+        if (not line or
+                line.startswith('--') or
+                lower_line.startswith('set') or
+                lower_line.startswith('alter') or
+                "pg_catalog" in line or
+                "idle_in_transaction_session_timeout" in line):
             continue
         elif line.endswith(';'):  # Statement not finished
             dump_line += line
@@ -724,10 +730,11 @@ def sanitize_schema(original: str) -> str:
     return "\n".join(sanitized_lines)
 
 
-def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str):
+def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg_port: str):
     """
     Gets the schema to be added to the dump text from the postgres database.
     Args:
+        dbname: The name of the database.
         pg_username: The postgres user name.
         pg_pword: The postgres password.
         pg_host: The postgres host.
@@ -737,10 +744,11 @@ def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str):
 
     """
     os.environ['PGPASSWORD'] = pg_pword
-    pg_dump = ["pg_dump", "--inserts", "-U", pg_username, "-h", pg_host, "-p", pg_port,
-               "-T", "blackboard_artifacts", "-T", "blackboard_attributes"]
+    pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port, "--schema-only", "-d", dbname, "-t",
+               "public.*"]
     output = subprocess.check_output(pg_dump)
-    return sanitize_schema(str(output))
+    output_str = output.decode('UTF-8')
+    return sanitize_schema(output_str)
 
 
 def get_sqlite_schema(db_conn):
@@ -1052,7 +1060,8 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str,
     for row in cursor:
         if len(row) != len(column_names):
             print(
-                f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}")
+                f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are"
+                f" {len(column_names)} with {str(column_names)}")
             continue
 
         row_dict = {}
@@ -1082,7 +1091,8 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str,
 def db_connect(db_file, isMultiUser, pgSettings=None):
     if isMultiUser: # use PostgreSQL
         try:
-            return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + pgSettings.pgHost + " password=" + pgSettings.password), None
+            return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" +
+                                    pgSettings.pgHost + " password=" + pgSettings.password), None
         except:
             print("Failed to connect to the database: " + db_file)
     else: # Sqlite
@@ -1122,4 +1132,3 @@ if __name__ == "__main__":
         sys.exit(1)
 
     main()
-

From 46c4017fe7ffbdbbcc52bb9b6b2d405094f374ff Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Thu, 6 May 2021 11:20:02 -0400
Subject: [PATCH 13/30] small fix

---
 test/script/tskdbdiff.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 073aeef88f..108c168b88 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -744,8 +744,8 @@ def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg
 
     """
     os.environ['PGPASSWORD'] = pg_pword
-    pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port, "--schema-only", "-d", dbname, "-t",
-               "public.*"]
+    pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port,
+               "--schema-only", "-d", dbname, "-t", "public.*"]
     output = subprocess.check_output(pg_dump)
     output_str = output.decode('UTF-8')
     return sanitize_schema(output_str)

From e2c0a08ac4533bfe292e740ad01c33bc744e4a3d Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Fri, 7 May 2021 11:23:12 -0400
Subject: [PATCH 14/30] formatting, guid fixes

---
 test/script/tskdbdiff.py | 42 +++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 108c168b88..76b3cb5693 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -443,6 +443,7 @@ class TskGuidUtils:
         """
         guid_files = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, parent_path, name FROM tsk_files")
         guid_vs_parts = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, addr, start FROM tsk_vs_parts", "_")
+        guid_vs_info = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info", "_")
         guid_fs_info = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info", "_")
         guid_image_names = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, name FROM tsk_image_names "
                                                                 "WHERE sequence=0")
@@ -450,13 +451,21 @@ class TskGuidUtils:
         guid_reports = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, path FROM reports")
 
         objid_artifacts = TskGuidUtils._get_guid_dict(db_conn,
-                                                      "SELECT "
-                                                      "blackboard_artifacts.artifact_obj_id, "
-                                                      "blackboard_artifact_types.type_name FROM "
-                                                      "blackboard_artifacts INNER JOIN blackboard_artifact_types "
+                                                      "SELECT blackboard_artifacts.artifact_obj_id, "
+                                                      "blackboard_artifact_types.type_name "
+                                                      "FROM blackboard_artifacts "
+                                                      "INNER JOIN blackboard_artifact_types "
                                                       "ON blackboard_artifact_types.artifact_type_id = "
                                                       "blackboard_artifacts.artifact_type_id")
 
+        artifact_objid_artifacts = TskGuidUtils._get_guid_dict(db_conn,
+                                                               "SELECT blackboard_artifacts.artifact_id, "
+                                                               "blackboard_artifact_types.type_name "
+                                                               "FROM blackboard_artifacts "
+                                                               "INNER JOIN blackboard_artifact_types "
+                                                               "ON blackboard_artifact_types.artifact_type_id = "
+                                                               "blackboard_artifacts.artifact_type_id")
+
         cursor = db_conn.cursor()
         cursor.execute("SELECT obj_id, par_obj_id FROM tsk_objects")
         par_obj_objects = dict([(row[0], row[1]) for row in cursor])
@@ -476,9 +485,10 @@ class TskGuidUtils:
                 guid_artifacts[k] = "/".join([path, v])
 
         return TskGuidUtils(
-            obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts,
+            # aggregate all the object id dictionaries together
+            obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts, **guid_vs_info,
                           **guid_fs_info, **guid_fs_info, **guid_image_names, **guid_artifacts},
-            artifact_types=objid_artifacts)
+            artifact_types=artifact_objid_artifacts)
 
     artifact_types: Dict[int, str]
     obj_id_guids: Dict[int, any]
@@ -506,11 +516,11 @@ class TskGuidUtils:
         return self.obj_id_guids[obj_id] if obj_id in self.obj_id_guids else omitted_value
 
     def get_guid_for_file_objid(self, obj_id, omitted_value: Union[str, None] = 'Object ID Omitted'):
-        # TODO this is just an alias; could probably be removed
+        # this method is just an alias for get_guid_for_objid
         return self.get_guid_for_objid(obj_id, omitted_value)
 
     def get_guid_for_accountid(self, account_id, omitted_value: Union[str, None] = 'Account ID Omitted'):
-        # TODO this is just an alias; could probably be removed
+        # this method is just an alias for get_guid_for_objid
         return self.get_guid_for_objid(account_id, omitted_value)
 
     def get_guid_for_artifactid(self, artifact_id, omitted_value: Union[str, None] = 'Artifact ID Omitted'):
@@ -859,7 +869,7 @@ def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]:
 
     """
     # takes a file name like "regripper-12345-full" and removes the id to become "regripper-full"
-    return re.sub(r'regripper\-[0-9]+\-full', 'regripper-full', path_str) if path_str else None
+    return re.sub(r'regripper-[0-9]+-full', 'regripper-full', path_str) if path_str else None
 
 
 def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
@@ -1088,19 +1098,19 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str,
             output_file.write(insert_statement)
 
 
-def db_connect(db_file, isMultiUser, pgSettings=None):
-    if isMultiUser: # use PostgreSQL
+def db_connect(db_file, is_multi_user, pg_settings=None):
+    if is_multi_user:  # use PostgreSQL
         try:
-            return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" +
-                                    pgSettings.pgHost + " password=" + pgSettings.password), None
+            return psycopg2.connect("dbname=" + db_file + " user=" + pg_settings.username + " host=" +
+                                    pg_settings.pgHost + " password=" + pg_settings.password), None
         except:
             print("Failed to connect to the database: " + db_file)
-    else: # Sqlite
+    else:  # Sqlite
         # Make a copy that we can modify
         backup_db_file = TskDbDiff._get_tmp_file("tsk_backup_db", ".db")
         shutil.copy(db_file, backup_db_file)
         # We sometimes get situations with messed up permissions
-        os.chmod (backup_db_file, 0o777)
+        os.chmod(backup_db_file, 0o777)
         return sqlite3.connect(backup_db_file), backup_db_file
 
 
@@ -1113,7 +1123,7 @@ def main():
         print("usage: tskdbdiff [OUTPUT DB PATH] [GOLD DB PATH]")
         sys.exit(1)
 
-    db_diff = TskDbDiff(output_db, gold_db, output_dir=".") 
+    db_diff = TskDbDiff(output_db, gold_db, output_dir=".")
     dump_passed, bb_dump_passed = db_diff.run_diff()
 
     if dump_passed and bb_dump_passed:

From 32f4492a7b042ff94d85c4f771411777be82de46 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Fri, 7 May 2021 14:39:57 -0400
Subject: [PATCH 15/30] tsk_objects fix

---
 test/script/tskdbdiff.py | 54 ++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 76b3cb5693..9ce406a5ce 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -929,21 +929,24 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di
     return row_copy
 
 
-def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
+def normalize_tsk_objects_path(guid_util: TskGuidUtils, objid: int,
+                               no_path_placeholder: Union[str, None]) -> Union[str, None]:
     """
-    Normalizes object table rows.
+    Returns a normalized path to be used in a tsk_objects table row.
     Args:
-        guid_util: Provides guids for ids that may change from run to run.
-        row: A dictionary mapping column names to values.
+        guid_util: The utility for fetching guids.
+        objid: The object id of the item.
+        no_path_placeholder: text to return if no path value found.
+
+    Returns: The 'no_path_placeholder' text if no path.  Otherwise, the normalized path.
 
-    Returns: The normalized object table row.
     """
-    parent_id = row['par_obj_id']
-    path = guid_util.get_guid_for_objid(row['obj_id'], omitted_value=None)
-    row_copy = row.copy()
+    path = guid_util.get_guid_for_objid(objid, omitted_value=None)
 
-    # remove host name (for multi-user) and dates/times from path for reports
-    if path is not None:
+    if not path:
+        return no_path_placeholder
+    else:
+        # remove host name (for multi-user) and dates/times from path for reports
         path_parts = get_path_segs(path)
         module_output_idx = index_of(path_parts, 'ModuleOutput')
         if module_output_idx >= 0:
@@ -955,30 +958,27 @@ def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[
                 path_parts = path_parts[:-1]
 
         for idx in range(0, len(path_parts) - 1):
-            if path_parts[idx] == "Reports" and path_parts[idx + 1] == "AutopsyTestCase HTML Report":
+            if path_parts[idx].lower() == "reports" and \
+                    path_parts[idx + 1].lower().startswith("autopsytestcase html report"):
                 path_parts = ["Reports", "AutopsyTestCase HTML Report"]
 
         path = os.path.join(*path_parts) if len(path_parts) > 0 else '/'
 
-    parent_path = guid_util.get_guid_for_objid(parent_id, omitted_value=None)
+        return normalize_regripper_files(normalize_unalloc_files(path))
 
-    # Remove host name (for multi-user) from parent_path
-    if parent_path is not None:
-        parent_path_parts = get_path_segs(parent_path)
-        module_output_idx = index_of(parent_path_parts, 'ModuleOutput')
-        if module_output_idx >= 0:
-            parent_path_parts = parent_path_parts[module_output_idx:]
 
-        parent_path = os.path.join(*parent_path_parts) if len(parent_path_parts) > 0 else '/'
-
-    # handle regripper and unalloc file replacements
-    if path and parent_path:
-        row_copy['obj_id'] = normalize_regripper_files(normalize_unalloc_files(path))
-        row_copy['par_obj_id'] = normalize_regripper_files(normalize_unalloc_files(parent_path))
-    else:
-        row_copy['obj_id'] = MASKED_OBJ_ID
-        row_copy['par_obj_id'] = "MASKED_PARENT_OBJ_ID"
+def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
+    """
+    Normalizes object table rows.
+    Args:
+        guid_util: Provides guids for ids that may change from run to run.
+        row: A dictionary mapping column names to values.
 
+    Returns: The normalized object table row.
+    """
+    row_copy = row.copy()
+    row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID)
+    row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID')
     return row_copy
 
 

From 971c1d54b3b788fcedffb3fd712f21d5c2722b34 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Mon, 10 May 2021 12:56:10 -0400
Subject: [PATCH 16/30] bug fixes

---
 test/script/tskdbdiff.py | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 9ce406a5ce..aa28181f8f 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -723,16 +723,24 @@ def sanitize_schema(original: str) -> str:
         if (not line or
                 line.startswith('--') or
                 lower_line.startswith('set') or
-                lower_line.startswith('alter') or
-                "pg_catalog" in line or
-                "idle_in_transaction_session_timeout" in line):
+                " set default nextval" in lower_line or
+                " owner to " in lower_line or
+                " owned by " in lower_line or
+                "pg_catalog" in lower_line or
+                "idle_in_transaction_session_timeout" in lower_line):
             continue
-        elif line.endswith(';'):  # Statement not finished
-            dump_line += line
+
+        # if there is no white space or parenthesis delimiter, add a space
+        if re.match(r'^.+?[^\s()]$', dump_line) and re.match(r'^[^\s()]', line):
+            dump_line += ' '
+
+        # append the line to the outputted line
+        dump_line += line
+
+        # if line ends with ';' then this will be one statement in diff
+        if line.endswith(';'):
             sanitized_lines.append(dump_line)
             dump_line = ''
-        else:
-            dump_line += line
 
     if len(dump_line.strip()) > 0:
         sanitized_lines.append(dump_line)
@@ -740,7 +748,7 @@ def sanitize_schema(original: str) -> str:
     return "\n".join(sanitized_lines)
 
 
-def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg_port: str):
+def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg_port: Union[str, int]):
     """
     Gets the schema to be added to the dump text from the postgres database.
     Args:
@@ -754,7 +762,7 @@ def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg
 
     """
     os.environ['PGPASSWORD'] = pg_pword
-    pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port,
+    pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", str(pg_port),
                "--schema-only", "-d", dbname, "-t", "public.*"]
     output = subprocess.check_output(pg_dump)
     output_str = output.decode('UTF-8')
@@ -957,10 +965,12 @@ def normalize_tsk_objects_path(guid_util: TskGuidUtils, objid: int,
                 # chop off the last folder (which contains a date/time)
                 path_parts = path_parts[:-1]
 
-        for idx in range(0, len(path_parts) - 1):
-            if path_parts[idx].lower() == "reports" and \
-                    path_parts[idx + 1].lower().startswith("autopsytestcase html report"):
-                path_parts = ["Reports", "AutopsyTestCase HTML Report"]
+        if path_parts and len(path_parts) >= 2:
+            for idx in range(0, len(path_parts) - 1):
+                if path_parts[idx].lower() == "reports" and \
+                        path_parts[idx + 1].lower().startswith("autopsytestcase html report"):
+                    path_parts = ["Reports", "AutopsyTestCase HTML Report"]
+                    break
 
         path = os.path.join(*path_parts) if len(path_parts) > 0 else '/'
 

From 9ffe631a955b6ceb981228cc9462dcf29a9f53f7 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Mon, 10 May 2021 13:53:52 -0400
Subject: [PATCH 17/30] null fix

---
 test/script/tskdbdiff.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index aa28181f8f..9113059e1d 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -987,8 +987,9 @@ def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[
     Returns: The normalized object table row.
     """
     row_copy = row.copy()
-    row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID)
-    row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID')
+    row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID) if row['obj_id'] else None
+    row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID') \
+        if row['par_obj_id'] else None
     return row_copy
 
 

From a9fea75770de9149185e442bbb965c818cd92e0f Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Mon, 10 May 2021 14:12:27 -0400
Subject: [PATCH 18/30] hashsetCountFix

---
 Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java b/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java
index 5318a99a00..5906b01a78 100644
--- a/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java
+++ b/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java
@@ -179,7 +179,6 @@ public class HashsetHits implements AutopsyVisitableItem {
                     TSK_HASHSET_HIT);
 
             super.setName(HASHSET_HITS);
-            super.setDisplayName(DISPLAY_NAME);
             this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/hashset_hits.png"); //NON-NLS
         }
 

From 4a1f3259b39bed16145d37d2bc55387752d61934 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Mon, 10 May 2021 14:53:43 -0400
Subject: [PATCH 19/30] artifact type constructor deprecation

---
 .../ArtifactSelectionDialog.java               | 18 ++++--------------
 .../infrastructure/ReportVisualPanel2.java     | 18 ++++--------------
 .../infrastructure/TableReportGenerator.java   | 18 ++++--------------
 .../datamodel/DataSourceInfoUtilitiesTest.java |  2 +-
 4 files changed, 13 insertions(+), 43 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java
index 78ae4479a5..715e18eb81 100644
--- a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java
+++ b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java
@@ -72,20 +72,10 @@ class ArtifactSelectionDialog extends javax.swing.JDialog {
     private void populateList() {
         try {
             ArrayList<BlackboardArtifact.Type> doNotReport = new ArrayList<>();
-            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getTypeID(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getLabel(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getDisplayName()));
-            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getTypeID(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getLabel(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getDisplayName())); // output is too unstructured for table review 
-            doNotReport.add(new BlackboardArtifact.Type(
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getTypeID(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getLabel(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getDisplayName()));
-            doNotReport.add(new BlackboardArtifact.Type(
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getTypeID(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getLabel(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getDisplayName()));
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO));
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT)); // output is too unstructured for table review 
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT));
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT));
             
             artifactTypes = Case.getCurrentCaseThrows().getSleuthkitCase().getArtifactTypesInUse();
             artifactTypes.removeAll(doNotReport);
diff --git a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java
index c7dbcfd5b1..a596eb6c38 100644
--- a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java
+++ b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java
@@ -200,20 +200,10 @@ final class ReportVisualPanel2 extends JPanel {
         try {
             Case openCase = Case.getCurrentCaseThrows();
             ArrayList<BlackboardArtifact.Type> doNotReport = new ArrayList<>();
-            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getTypeID(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getLabel(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getDisplayName()));
-            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getTypeID(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getLabel(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getDisplayName())); // output is too unstructured for table review
-            doNotReport.add(new BlackboardArtifact.Type(
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getTypeID(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getLabel(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getDisplayName()));
-            doNotReport.add(new BlackboardArtifact.Type(
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getTypeID(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getLabel(),
-                    BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getDisplayName()));
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO));
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT)); // output is too unstructured for table review
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT));
+            doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT));
             // get artifact types that exist in the current case
             artifacts = openCase.getSleuthkitCase().getArtifactTypesInUse();
 
diff --git a/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java b/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java
index ca6722911e..34186c13b2 100644
--- a/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java
+++ b/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java
@@ -102,20 +102,10 @@ class TableReportGenerator {
     private void getAllExistingArtiactTypes() throws NoCurrentCaseException, TskCoreException {
         // get all possible artifact types
         ArrayList<BlackboardArtifact.Type> doNotReport = new ArrayList<>();
-        doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getTypeID(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getLabel(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getDisplayName()));
-        doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getTypeID(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getLabel(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getDisplayName())); // output is too unstructured for table review
-        doNotReport.add(new BlackboardArtifact.Type(
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getTypeID(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getLabel(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getDisplayName()));
-        doNotReport.add(new BlackboardArtifact.Type(
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getTypeID(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getLabel(),
-                BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getDisplayName()));
+        doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO));
+        doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT)); // output is too unstructured for table review
+        doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT));
+        doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT));
 
         Case.getCurrentCaseThrows().getSleuthkitCase().getArtifactTypes().forEach(artifactTypes::add);
         artifactTypes.removeAll(doNotReport);
diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java
index 28b55155c1..7d2453c221 100644
--- a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java
+++ b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java
@@ -287,7 +287,7 @@ public class DataSourceInfoUtilitiesTest {
     @Test
     public void getArtifacts_failOnBytes() throws TskCoreException {
         testFailOnBadAttrType(
-                new BlackboardArtifact.Type(999, "BYTE_ARRAY_TYPE", "Byte Array Type"),
+                new BlackboardArtifact.Type(999, "BYTE_ARRAY_TYPE", "Byte Array Type", BlackboardArtifact.Category.DATA_ARTIFACT),
                 new BlackboardAttribute.Type(999, "BYTE_ARR_ATTR_TYPE", "Byte Array Attribute Type", TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.BYTE),
                 new byte[]{0x0, 0x1, 0x2},
                 BlackboardAttribute::new);

From 234c6f34e3d80040154705006060a95d17d7115a Mon Sep 17 00:00:00 2001
From: Kelly Kelly <kelly@basistech.com>
Date: Tue, 11 May 2021 13:33:59 -0400
Subject: [PATCH 20/30] Moved the calls to isSupport and isPreferred to a
 Swingworker in DataContentPanel

---
 .../corecomponents/DataContentPanel.java      | 212 +++++++++++++-----
 1 file changed, 160 insertions(+), 52 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java
index 8ffabd27e6..eef00e31a3 100644
--- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java
+++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java
@@ -1,15 +1,15 @@
 /*
  * Autopsy Forensic Browser
- * 
+ *
  * Copyright 2011-2018 Basis Technology Corp.
  * Contact: carrier <at> sleuthkit <dot> org
- * 
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  *     http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -23,8 +23,10 @@ import java.beans.PropertyChangeEvent;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
+import java.util.concurrent.ExecutionException;
 import java.util.logging.Level;
 import javax.swing.JTabbedPane;
+import javax.swing.SwingWorker;
 import javax.swing.event.ChangeEvent;
 import javax.swing.event.ChangeListener;
 import org.openide.nodes.Node;
@@ -49,6 +51,8 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent,
     private final boolean isMain;
     private boolean listeningToTabbedPane = false;
 
+    private DataContentPanelWorker workerThread;
+
     /**
      * Creates new DataContentPanel panel The main data content panel can only
      * be created by the data content top component, thus this constructor is
@@ -132,43 +136,54 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent,
     public void setNode(Node selectedNode) {
         // change the cursor to "waiting cursor" for this operation
         this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR));
-        try {
 
-            String defaultName = NbBundle.getMessage(DataContentTopComponent.class, "CTL_DataContentTopComponent");
-            // set the file path
-            if (selectedNode == null) {
-                setName(defaultName);
-            } else {
-                Content content = selectedNode.getLookup().lookup(Content.class);
-                if (content != null) {
-                    //String path = DataConversion.getformattedPath(ContentUtils.getDisplayPath(selectedNode.getLookup().lookup(Content.class)), 0);
-                    String path = defaultName;
-                    try {
-                        path = content.getUniquePath();
-                    } catch (TskCoreException ex) {
-                        logger.log(Level.SEVERE, "Exception while calling Content.getUniquePath() for {0}", content); //NON-NLS
-                    }
-                    setName(path);
-                } else {
-                    setName(defaultName);
+        // Reset everything
+        for (int index = 0; index < jTabbedPane1.getTabCount(); index++) {
+            jTabbedPane1.setEnabledAt(index, false);
+            viewers.get(index).resetComponent();
+        }
+
+        String defaultName = NbBundle.getMessage(DataContentTopComponent.class, "CTL_DataContentTopComponent");
+        // set the file path
+        if (selectedNode == null) {
+            setName(defaultName);
+        } else {
+            Content content = selectedNode.getLookup().lookup(Content.class);
+            if (content != null) {
+                //String path = DataConversion.getformattedPath(ContentUtils.getDisplayPath(selectedNode.getLookup().lookup(Content.class)), 0);
+                String path = defaultName;
+                try {
+                    path = content.getUniquePath();
+                } catch (TskCoreException ex) {
+                    logger.log(Level.SEVERE, "Exception while calling Content.getUniquePath() for {0}", content); //NON-NLS
                 }
+                setName(path);
+            } else {
+                setName(defaultName);
             }
+        }
 
-            currentNode = selectedNode;
+        currentNode = selectedNode;
 
-            setupTabs(selectedNode);
-        } finally {
-            this.setCursor(null);
+        if (workerThread != null) {
+            workerThread.cancel(true);
+        }
+
+        if (selectedNode != null) {
+            workerThread = new DataContentPanelWorker(currentNode);
+            workerThread.execute();
         }
     }
 
     /**
-     * Resets the tabs based on the selected Node. If the selected node is null
-     * or not supported, disable that tab as well.
+     * Update the state of the tabs based on the given data.
      *
-     * @param selectedNode the selected content Node
+     * @param selectedNode     The currently selected node.
+     * @param supportedIndices The indices of the tabs that are supported by
+     *                         this node type.
+     * @param preferredIndex   The index of the tab which is preferred.
      */
-    public void setupTabs(Node selectedNode) {
+    private void updateTabs(Node selectedNode, List<Integer> supportedIndices, int preferredIndex) {
         // Deferring becoming a listener to the tabbed pane until this point
         // eliminates handling a superfluous stateChanged event during construction.
         if (listeningToTabbedPane == false) {
@@ -176,31 +191,12 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent,
             listeningToTabbedPane = true;
         }
 
-        int currTabIndex = jTabbedPane1.getSelectedIndex();
-        int totalTabs = jTabbedPane1.getTabCount();
-        int maxPreferred = 0;
-        int preferredViewerIndex = 0;
-        for (int i = 0; i < totalTabs; ++i) {
-            UpdateWrapper dcv = viewers.get(i);
-            dcv.resetComponent();
-
-            // disable an unsupported tab (ex: picture viewer)
-            if ((selectedNode == null) || (dcv.isSupported(selectedNode) == false)) {
-                jTabbedPane1.setEnabledAt(i, false);
-            } else {
-                jTabbedPane1.setEnabledAt(i, true);
-
-                // remember the viewer with the highest preference value
-                int currentPreferred = dcv.isPreferred(selectedNode);
-                if (currentPreferred > maxPreferred) {
-                    preferredViewerIndex = i;
-                    maxPreferred = currentPreferred;
-                }
-            }
+        for (Integer index : supportedIndices) {
+            jTabbedPane1.setEnabledAt(index, true);
         }
 
         // let the user decide if we should stay with the current viewer
-        int tabIndex = UserPreferences.keepPreferredContentViewer() ? currTabIndex : preferredViewerIndex;
+        int tabIndex = UserPreferences.keepPreferredContentViewer() ? jTabbedPane1.getSelectedIndex() : preferredIndex;
 
         UpdateWrapper dcv = viewers.get(tabIndex);
         // this is really only needed if no tabs were enabled 
@@ -272,4 +268,116 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent,
         }
     }
 
+    /**
+     * SwingWorker class to determine which tabs should be enabled for the given
+     * node.
+     */
+    private class DataContentPanelWorker extends SwingWorker<WorkerResults, Void> {
+
+        private final Node node;
+
+        /**
+         * Worker constructor.
+         *
+         * @param node
+         */
+        DataContentPanelWorker(Node node) {
+            this.node = node;
+        }
+
+        @Override
+        protected WorkerResults doInBackground() throws Exception {
+            if (node == null) {
+                return null;
+            }
+
+            List<Integer> supportedViewers = new ArrayList<>();
+            int preferredViewerIndex = 0;
+            int maxPreferred = 0;
+
+            for (int index = 0; index < viewers.size(); index++) {
+                UpdateWrapper dcv = viewers.get(index);
+                if (dcv.isSupported(node)) {
+                    supportedViewers.add(index);
+
+                    int currentPreferred = dcv.isPreferred(node);
+                    if (currentPreferred > maxPreferred) {
+                        preferredViewerIndex = index;
+                        maxPreferred = currentPreferred;
+                    }
+                }
+
+                if (this.isCancelled()) {
+                    return null;
+                }
+
+            }
+
+            return new WorkerResults(node, supportedViewers, preferredViewerIndex);
+        }
+
+        @Override
+        protected void done() {
+            // Do nothing if the thread was cancelled.
+            if (isCancelled()) {
+                return;
+            }
+
+            try {
+                WorkerResults results = get();
+
+                if (results != null) {
+                    updateTabs(results.getNode(), results.getSupportedIndices(), results.getPreferredViewerIndex());
+                }
+
+            } catch (InterruptedException | ExecutionException ex) {
+                logger.log(Level.SEVERE, "Failed to updated data content panel for node " + node.getName(), ex);
+            } finally {
+                setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR));
+            }
+        }
+    }
+
+    /**
+     * Utility class to store all of the data the SwingWorker collected.
+     */
+    private class WorkerResults {
+
+        private final Node node;
+        private final List<Integer> supportedViewerIndices;
+        private final int preferredViewerIndex;
+
+        WorkerResults(Node node, List<Integer> supportedViewerIndices, int preferredViewerIndex) {
+            this.node = node;
+            this.supportedViewerIndices = supportedViewerIndices;
+            this.preferredViewerIndex = preferredViewerIndex;
+        }
+
+        /**
+         * Returns the selected node.
+         *
+         * @return
+         */
+        Node getNode() {
+            return node;
+        }
+
+        /**
+         * A list of tab indices that are supported by this node type.
+         *
+         * @return A list of indices.
+         */
+        List<Integer> getSupportedIndices() {
+            return supportedViewerIndices;
+        }
+
+        /**
+         * Returns the preferred tab index for the given node type.
+         *
+         * @return A valid tab index.
+         */
+        int getPreferredViewerIndex() {
+            return preferredViewerIndex;
+        }
+    }
 }

From 2e6fca968cf497a3080fbde09805700c9fc30b25 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Wed, 12 May 2021 16:36:14 -0400
Subject: [PATCH 21/30] is none changes

---
 test/script/tskdbdiff.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 9113059e1d..a6f24e1695 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -427,7 +427,8 @@ class TskGuidUtils:
         cursor.execute(select_statement)
         ret_dict = {}
         for row in cursor:
-            ret_dict[row[0]] = delim.join([str(col) if col else '' for col in row[1:]])
+            # concatenate value rows with delimiter filtering out any null values.
+            ret_dict[row[0]] = delim.join(filter(lambda col: col is not None, [str(col) for col in row[1:]]))
 
         return ret_dict
 
@@ -864,7 +865,7 @@ def normalize_unalloc_files(path_str: Union[str, None]) -> Union[str, None]:
 
     # takes a file name like "Unalloc_30580_7466496_2980941312" and removes the object id to become
     # "Unalloc_7466496_2980941312"
-    return re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str) if path_str else None
+    return None if path_str is None else re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str)
 
 
 def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]:
@@ -877,7 +878,7 @@ def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]:
 
     """
     # takes a file name like "regripper-12345-full" and removes the id to become "regripper-full"
-    return re.sub(r'regripper-[0-9]+-full', 'regripper-full', path_str) if path_str else None
+    return None if path_str is None else re.sub(r'regripper-[0-9]+-full', 'regripper-full', path_str)
 
 
 def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]:
@@ -893,8 +894,8 @@ def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[st
     # Ignore TIFF size and hash if extracted from PDFs.
     # See JIRA-6951 for more details.
     row_copy = row.copy()
-    if row['extension'] and row['extension'].strip().lower() == 'tif' and \
-            row['parent_path'] and row['parent_path'].strip().lower().endswith('.pdf/'):
+    if row['extension'] is not None and row['extension'].strip().lower() == 'tif' and \
+            row['parent_path'] is not None and row['parent_path'].strip().lower().endswith('.pdf/'):
         row_copy['size'] = "SIZE_IGNORED"
         row_copy['md5'] = "MD5_IGNORED"
         row_copy['sha256'] = "SHA256_IGNORED"
@@ -917,7 +918,7 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di
     """
     row_copy = row.copy()
     path = row['path']
-    if path:
+    if path is not None:
         path_parts = get_path_segs(path)
         module_output_idx = index_of(path_parts, 'ModuleOutput')
         if module_output_idx >= 0:
@@ -951,7 +952,7 @@ def normalize_tsk_objects_path(guid_util: TskGuidUtils, objid: int,
     """
     path = guid_util.get_guid_for_objid(objid, omitted_value=None)
 
-    if not path:
+    if path is None:
         return no_path_placeholder
     else:
         # remove host name (for multi-user) and dates/times from path for reports
@@ -987,9 +988,12 @@ def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[
     Returns: The normalized object table row.
     """
     row_copy = row.copy()
-    row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID) if row['obj_id'] else None
-    row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID') \
-        if row['par_obj_id'] else None
+    row_copy['obj_id'] = None if row['obj_id'] is None else \
+        normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID)
+
+    row_copy['par_obj_id'] = None if row['par_obj_id'] is None else \
+        normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID')
+
     return row_copy
 
 

From 4463592c7a1395701b780b9fbb707630d5ea35ed Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Wed, 12 May 2021 16:47:38 -0400
Subject: [PATCH 22/30] fix

---
 test/script/tskdbdiff.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index a6f24e1695..7e22f5009a 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -428,7 +428,7 @@ class TskGuidUtils:
         ret_dict = {}
         for row in cursor:
             # concatenate value rows with delimiter filtering out any null values.
-            ret_dict[row[0]] = delim.join(filter(lambda col: col is not None, [str(col) for col in row[1:]]))
+            ret_dict[row[0]] = delim.join([str(col) for col in filter(lambda col: col is not None, row[1:])])
 
         return ret_dict
 

From 7479aeec9855ca17a6428bd9157c24eb1f6ab238 Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Thu, 13 May 2021 11:05:23 -0400
Subject: [PATCH 23/30] json-like output

---
 test/script/tskdbdiff.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py
index 7e22f5009a..ac0f4cb044 100644
--- a/test/script/tskdbdiff.py
+++ b/test/script/tskdbdiff.py
@@ -1098,18 +1098,14 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str,
             row_dict = row_masker.normalize(guid_utils, row_dict)
 
         if row_dict is not None:
-            # NOTE: This is an alternate approach to representing values as json-like lines
-            # entries = []
-            # for column in column_names:
-            #     value = get_sql_insert_value(row_dict[column] if column in row_dict and row_dict[column] else None)
-            #     if value:
-            #         entries.append((column, value))
-            # insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries])
-            # insert_statement = f"{table}: {{{insert_values}}}\n"
-            # output_file.write(insert_statement)
-
-            values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names)
-            insert_statement = f'INSERT INTO "{table}" VALUES({values_statement});\n'
+            # show row as json-like value
+            entries = []
+            for column in column_names:
+                value = get_sql_insert_value(row_dict[column] if column in row_dict and row_dict[column] else None)
+                if value is not None:
+                    entries.append((column, value))
+            insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries])
+            insert_statement = f"{table}: {{{insert_values}}}\n"
             output_file.write(insert_statement)
 
 

From 1042f2844ffc71b07ea1215af859d7b2196bd649 Mon Sep 17 00:00:00 2001
From: William Schaefer <wschaefer@basistech.net>
Date: Thu, 13 May 2021 17:09:04 -0400
Subject: [PATCH 24/30] 7608 fix other occurrences viewer

---
 .../contentviewer/OtherOccurrencesPanel.java  | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
index d6588c2313..12d676200e 100644
--- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
+++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
@@ -380,13 +380,12 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
         int totalCount = 0;
         Set<String> dataSources = new HashSet<>();
         if (CentralRepository.isEnabled()) {
-
             try {
-                List<CorrelationAttributeInstance> instances;
-                instances = CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value);
+                correlationAttributes.addAll(CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value));
                 HashMap<UniquePathKey, OtherOccurrenceNodeInstanceData> nodeDataMap = new HashMap<>();
                 String caseUUID = Case.getCurrentCase().getName();
-                for (CorrelationAttributeInstance artifactInstance : instances) {
+                // get the attributes we can correlate on
+                for (CorrelationAttributeInstance artifactInstance : correlationAttributes) {
 
                     // Only add the attribute if it isn't the object the user selected.
                     // We consider it to be a different object if at least one of the following is true:
@@ -395,10 +394,9 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
                     // - the data source device ID is different
                     // - the file path is different
                     if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID)
-                            || (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName))
-                            || (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId))
-                            || (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {
-                        correlationAttributes.add(artifactInstance);
+                            && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName))
+                            && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId))
+                            && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {                     
                         continue;
                     }
                     OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, aType, value);
@@ -510,7 +508,7 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
      * artifact. If the central repo is not enabled, this will only return files
      * from the current case with matching MD5 hashes.
      *
-     * @param corAttr        CorrelationAttribute to query for
+     * @param corAttr CorrelationAttribute to query for
      *
      * @return A collection of correlated artifact instances
      */
@@ -533,9 +531,9 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
                     // - the data source device ID is different
                     // - the file path is different
                     if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID)
-                            || (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName))
-                            || (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId))
-                            || (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {
+                            && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName))
+                            && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId))
+                            && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {
                         continue;
                     }
                     OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, corAttr.getCorrelationType(), corAttr.getCorrelationValue());

From abfbc3106e15cf86c5ef66ce625f903713bdcf88 Mon Sep 17 00:00:00 2001
From: William Schaefer <wschaefer@basistech.net>
Date: Thu, 13 May 2021 17:34:54 -0400
Subject: [PATCH 25/30] 7608 fix discovery use case and add comment

---
 .../contentviewer/OtherOccurrencesPanel.java             | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
index 12d676200e..b0d24cad06 100644
--- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
+++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
@@ -381,11 +381,12 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
         Set<String> dataSources = new HashSet<>();
         if (CentralRepository.isEnabled()) {
             try {
-                correlationAttributes.addAll(CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value));
+                List<CorrelationAttributeInstance> instances;
+                instances = CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value);
                 HashMap<UniquePathKey, OtherOccurrenceNodeInstanceData> nodeDataMap = new HashMap<>();
                 String caseUUID = Case.getCurrentCase().getName();
                 // get the attributes we can correlate on
-                for (CorrelationAttributeInstance artifactInstance : correlationAttributes) {
+                for (CorrelationAttributeInstance artifactInstance : instances) {
 
                     // Only add the attribute if it isn't the object the user selected.
                     // We consider it to be a different object if at least one of the following is true:
@@ -396,7 +397,9 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
                     if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID)
                             && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName))
                             && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId))
-                            && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {                     
+                            && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {   
+                            //because we are only correlating on one type we can add that only when everything is the same
+                            correlationAttributes.add(artifactInstance);
                         continue;
                     }
                     OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, aType, value);

From f5b017362c43c3ec681d1497cb4a5e65b7d42498 Mon Sep 17 00:00:00 2001
From: William Schaefer <wschaefer@basistech.net>
Date: Thu, 13 May 2021 18:44:03 -0400
Subject: [PATCH 26/30] 7608 fix the discovery version

---
 .../contentviewer/OtherOccurrencesPanel.java                 | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
index b0d24cad06..f1e724c092 100644
--- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
+++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
@@ -397,11 +397,10 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
                     if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID)
                             && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName))
                             && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId))
-                            && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {   
-                            //because we are only correlating on one type we can add that only when everything is the same
-                            correlationAttributes.add(artifactInstance);
+                            && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) {
                         continue;
                     }
+                    correlationAttributes.add(artifactInstance);
                     OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, aType, value);
                     UniquePathKey uniquePathKey = new UniquePathKey(newNode);
                     nodeDataMap.put(uniquePathKey, newNode);

From 9ecf7c77ddb17eed8d200edeb4a50a07c9524aa8 Mon Sep 17 00:00:00 2001
From: William Schaefer <wschaefer@basistech.net>
Date: Thu, 13 May 2021 18:45:55 -0400
Subject: [PATCH 27/30] 7608 remove wrong comment

---
 .../centralrepository/contentviewer/OtherOccurrencesPanel.java   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
index f1e724c092..1be181bff2 100644
--- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
+++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
@@ -385,7 +385,6 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
                 instances = CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value);
                 HashMap<UniquePathKey, OtherOccurrenceNodeInstanceData> nodeDataMap = new HashMap<>();
                 String caseUUID = Case.getCurrentCase().getName();
-                // get the attributes we can correlate on
                 for (CorrelationAttributeInstance artifactInstance : instances) {
 
                     // Only add the attribute if it isn't the object the user selected.

From a914ab6295f771809dcd02cb95f77071f4498c11 Mon Sep 17 00:00:00 2001
From: William Schaefer <wschaefer@basistech.net>
Date: Fri, 14 May 2021 10:50:40 -0400
Subject: [PATCH 28/30] 7613 add pop up menu back

---
 .../centralrepository/contentviewer/OtherOccurrencesPanel.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
index d6588c2313..45bbceed2b 100644
--- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
+++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java
@@ -130,7 +130,7 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel {
         exportToCSVMenuItem.addActionListener(actList);
         showCaseDetailsMenuItem.addActionListener(actList);
         showCommonalityMenuItem.addActionListener(actList);
-
+        filesTable.setComponentPopupMenu(rightClickPopupMenu);
         // Configure column sorting.
         TableRowSorter<TableModel> sorter = new TableRowSorter<>(filesTable.getModel());
         filesTable.setRowSorter(sorter);

From 2f5790c2fee67a749a374b9e723c68ebaa80dfa6 Mon Sep 17 00:00:00 2001
From: Mark McKinnon <mark.mckinnon@davenport.edu>
Date: Sat, 15 May 2021 22:19:41 -0400
Subject: [PATCH 29/30] Update ExtractRegistry.java

Install date from regripper is UTC time, when it is parsed it does not recognize a timezone so it defaults to current timezone of pc, or that appears to happen.  When  UTC timezone added to parse it stores the epoch time correctly.
---
 .../org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java
index b0d7fe08c3..7875dd794a 100644
--- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java
+++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java
@@ -631,7 +631,7 @@ class ExtractRegistry extends Extract {
                                     case "InstallDate": //NON-NLS
                                         if (value != null && !value.isEmpty()) {
                                             try {
-                                                installtime = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy", US).parse(value).getTime();
+                                                installtime = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyyZ", US).parse(value+"+0000").getTime();
                                                 String Tempdate = installtime.toString();
                                                 installtime = Long.valueOf(Tempdate) / MS_IN_SEC;
                                             } catch (ParseException e) {

From cc66187e22b7f915eaa36574a7dd79d1c919dbdc Mon Sep 17 00:00:00 2001
From: Greg DiCristofaro <gregd@basistech.com>
Date: Tue, 18 May 2021 08:06:39 -0400
Subject: [PATCH 30/30] unit test fixes for changes in blackboard artifact type
 constructors

---
 .../datamodel/DataSourceInfoUtilitiesTest.java                  | 2 +-
 .../datasourcesummary/datamodel/UserActivitySummaryTest.java    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java
index 7d2453c221..1b5d809b1f 100644
--- a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java
+++ b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java
@@ -287,7 +287,7 @@ public class DataSourceInfoUtilitiesTest {
     @Test
     public void getArtifacts_failOnBytes() throws TskCoreException {
         testFailOnBadAttrType(
-                new BlackboardArtifact.Type(999, "BYTE_ARRAY_TYPE", "Byte Array Type", BlackboardArtifact.Category.DATA_ARTIFACT),
+                BlackboardArtifact.Type.TSK_YARA_HIT,
                 new BlackboardAttribute.Type(999, "BYTE_ARR_ATTR_TYPE", "Byte Array Attribute Type", TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.BYTE),
                 new byte[]{0x0, 0x1, 0x2},
                 BlackboardAttribute::new);
diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java
index 572d4f86b3..2b6a1c6600 100644
--- a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java
+++ b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java
@@ -353,7 +353,7 @@ public class UserActivitySummaryTest {
         List<TopDeviceAttachedResult> results = summary.getRecentDevices(dataSource, 10);
 
         Assert.assertEquals(1, results.size());
-        Assert.assertEquals((long) (DAY_SECONDS + 2), results.get(0).getLastAccessed().getTime() / 1000);
+        Assert.assertEquals((DAY_SECONDS + 2), results.get(0).getLastAccessed().getTime() / 1000);
         Assert.assertTrue("ID1".equalsIgnoreCase(results.get(0).getDeviceId()));
         Assert.assertTrue("MAKE1".equalsIgnoreCase(results.get(0).getDeviceMake()));
         Assert.assertTrue("MODEL1".equalsIgnoreCase(results.get(0).getDeviceModel()));