From 0a5107ddd51b6160eb5e33f7afb8a9a79aa83788 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Fri, 16 Apr 2021 12:35:37 -0400 Subject: [PATCH 01/30] get table and column definitions for database --- test/script/dbaccesstest.py | 45 ++ test/script/tskdbdiff2.py | 969 ++++++++++++++++++++++++++++++++++++ 2 files changed, 1014 insertions(+) create mode 100644 test/script/dbaccesstest.py create mode 100644 test/script/tskdbdiff2.py diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py new file mode 100644 index 0000000000..cfe026395e --- /dev/null +++ b/test/script/dbaccesstest.py @@ -0,0 +1,45 @@ +from typing import List, Dict + +import psycopg2 +import sqlite3 + + +def get_sqlite_table_columns(conn) -> Dict[str, List[str]]: + cur = conn.cursor() + cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'") + tables = list([table[0] for table in cur.fetchall()]) + cur.close() + + to_ret = {} + for table in tables: + cur = conn.cursor() + cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table]) + to_ret[table] = list([col[0] for col in cur.fetchall()]) + + return to_ret + + +def get_pg_table_columns(conn) -> Dict[str, List[str]]: + cursor = conn.cursor() + cursor.execute(""" + SELECT cols.table_name, cols.column_name + FROM information_schema.columns cols + WHERE cols.column_name IS NOT NULL + AND cols.table_name IS NOT NULL + AND cols.table_name IN ( + SELECT tables.tablename FROM pg_catalog.pg_tables tables + WHERE LOWER(schemaname) = 'public' + ) + ORDER by cols.table_name, cols.ordinal_position; + """) + mapping = {} + for row in cursor: + mapping.setdefault(row[0], []).append(row[1]) + + cursor.close() + conn.close() + return mapping + +#for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items(): +#for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items(): +# print(f"{key}: {val}") \ No newline at end of file diff --git a/test/script/tskdbdiff2.py b/test/script/tskdbdiff2.py new file mode 100644 index 0000000000..7ff02d0c30 --- /dev/null +++ b/test/script/tskdbdiff2.py @@ -0,0 +1,969 @@ +# Requires python3 + +import re +import sqlite3 +import subprocess +import shutil +import os +import codecs +import datetime +import sys +from typing import Dict, List + +import psycopg2 +import psycopg2.extras +import socket +import csv + +class TskDbDiff(object): + """Compares two TSK/Autospy SQLite databases. + + Attributes: + gold_artifacts: + autopsy_artifacts: + gold_attributes: + autopsy_attributes: + gold_objects: + autopsy_objects: + artifact_comparison: + attribute_comparision: + report_errors: a listof_listof_String, the error messages that will be + printed to screen in the run_diff method + passed: a boolean, did the diff pass? + autopsy_db_file: + gold_db_file: + """ + def __init__(self, output_db, gold_db, output_dir=None, gold_bb_dump=None, gold_dump=None, verbose=False, isMultiUser=False, pgSettings=None): + """Constructor for TskDbDiff. + + Args: + output_db_path: path to output database (non-gold standard) + gold_db_path: path to gold database + output_dir: (optional) Path to folder where generated files will be put. + gold_bb_dump: (optional) path to file where the gold blackboard dump is located + gold_dump: (optional) path to file where the gold non-blackboard dump is located + verbose: (optional) a boolean, if true, diff results are sent to stdout. + """ + + self.output_db_file = output_db + self.gold_db_file = gold_db + self.output_dir = output_dir + self.gold_bb_dump = gold_bb_dump + self.gold_dump = gold_dump + self._generate_gold_dump = False + self._generate_gold_bb_dump = False + self._bb_dump_diff = "" + self._dump_diff = "" + self._bb_dump = "" + self._dump = "" + self.verbose = verbose + self.isMultiUser = isMultiUser + self.pgSettings = pgSettings + + if self.isMultiUser and not self.pgSettings: + print("Missing PostgreSQL database connection settings data.") + sys.exit(1) + + if self.gold_bb_dump is None: + self._generate_gold_bb_dump = True + if self.gold_dump is None: + self._generate_gold_dump = True + + def run_diff(self): + """Compare the databases. + + Raises: + TskDbDiffException: if an error occurs while diffing or dumping the database + """ + + self._init_diff() + id_obj_path_table = -1 + # generate the gold database dumps if necessary + if self._generate_gold_dump: + id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.gold_db_file, self.gold_dump, self.isMultiUser, self.pgSettings) + if self._generate_gold_bb_dump: + TskDbDiff._dump_output_db_bb(self.gold_db_file, self.gold_bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table) + + # generate the output database dumps (both DB and BB) + id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.output_db_file, self._dump, self.isMultiUser, self.pgSettings) + TskDbDiff._dump_output_db_bb(self.output_db_file, self._bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table) + + # Compare non-BB + dump_diff_pass = self._diff(self._dump, self.gold_dump, self._dump_diff) + + # Compare BB + bb_dump_diff_pass = self._diff(self._bb_dump, self.gold_bb_dump, self._bb_dump_diff) + + self._cleanup_diff() + return dump_diff_pass, bb_dump_diff_pass + + + def _init_diff(self): + """Set up the necessary files based on the arguments given at construction""" + if self.output_dir is None: + # No stored files + self._bb_dump = TskDbDiff._get_tmp_file("BlackboardDump", ".txt") + self._bb_dump_diff = TskDbDiff._get_tmp_file("BlackboardDump-Diff", ".txt") + self._dump = TskDbDiff._get_tmp_file("DBDump", ".txt") + self._dump_diff = TskDbDiff._get_tmp_file("DBDump-Diff", ".txt") + else: + self._bb_dump = os.path.join(self.output_dir, "BlackboardDump.txt") + self._bb_dump_diff = os.path.join(self.output_dir, "BlackboardDump-Diff.txt") + self._dump = os.path.join(self.output_dir, "DBDump.txt") + self._dump_diff = os.path.join(self.output_dir, "DBDump-Diff.txt") + + # Sorting gold before comparing (sort behaves differently in different environments) + new_bb = TskDbDiff._get_tmp_file("GoldBlackboardDump", ".txt") + new_db = TskDbDiff._get_tmp_file("GoldDBDump", ".txt") + if self.gold_bb_dump is not None: + srtcmdlst = ["sort", self.gold_bb_dump, "-o", new_bb] + subprocess.call(srtcmdlst) + srtcmdlst = ["sort", self.gold_dump, "-o", new_db] + subprocess.call(srtcmdlst) + self.gold_bb_dump = new_bb + self.gold_dump = new_db + + + def _cleanup_diff(self): + if self.output_dir is None: + #cleanup temp files + os.remove(self._dump) + os.remove(self._bb_dump) + if os.path.isfile(self._dump_diff): + os.remove(self._dump_diff) + if os.path.isfile(self._bb_dump_diff): + os.remove(self._bb_dump_diff) + + if self.gold_bb_dump is None: + os.remove(self.gold_bb_dump) + os.remove(self.gold_dump) + + + def _diff(self, output_file, gold_file, diff_path): + """Compare two text files. + + Args: + output_file: a pathto_File, the latest text file + gold_file: a pathto_File, the gold text file + diff_path: The file to write the differences to + Returns False if different + """ + + if (not os.path.isfile(output_file)): + return False + + if (not os.path.isfile(gold_file)): + return False + + # It is faster to read the contents in and directly compare + output_data = codecs.open(output_file, "r", "utf_8").read() + gold_data = codecs.open(gold_file, "r", "utf_8").read() + if (gold_data == output_data): + return True + + # If they are different, invoke 'diff' + diff_file = codecs.open(diff_path, "wb", "utf_8") + # Gold needs to be passed in as 1st arg and output as 2nd + dffcmdlst = ["diff", gold_file, output_file] + subprocess.call(dffcmdlst, stdout = diff_file) + + # create file path for gold files inside output folder. In case of diff, both gold and current run files + # are available in the report output folder. Prefix Gold- is added to the filename. + gold_file_in_output_dir = output_file[:output_file.rfind("/")] + "/Gold-" + output_file[output_file.rfind("/")+1:] + shutil.copy(gold_file, gold_file_in_output_dir) + + return False + + + def _dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table): + """Dumps sorted text results to the given output location. + + Smart method that deals with a blackboard comparison to avoid issues + with different IDs based on when artifacts were created. + + Args: + db_file: a pathto_File, the output database. + bb_dump_file: a pathto_File, the sorted dump file to write to + """ + + unsorted_dump = TskDbDiff._get_tmp_file("dump_data", ".txt") + if isMultiUser: + conn, unused_db = db_connect(db_file, isMultiUser, pgSettings) + artifact_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + else: # Use Sqlite + conn = sqlite3.connect(db_file) + conn.text_factory = lambda x: x.decode("utf-8", "ignore") + conn.row_factory = sqlite3.Row + artifact_cursor = conn.cursor() + # Get the list of all artifacts (along with type and associated file) + # @@@ Could add a SORT by parent_path in here since that is how we are going to later sort it. + artifact_cursor.execute("SELECT tsk_files.parent_path, tsk_files.name, blackboard_artifact_types.display_name, blackboard_artifacts.artifact_id FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id") + database_log = codecs.open(unsorted_dump, "wb", "utf_8") + row = artifact_cursor.fetchone() + appnd = False + counter = 0 + artifact_count = 0 + artifact_fail = 0 + + # Cycle through artifacts + try: + while (row != None): + + # File Name and artifact type + # Remove parent object ID from Unalloc file name + normalizedName = re.sub('^Unalloc_[0-9]+_', 'Unalloc_', row["name"]) + if(row["parent_path"] != None): + database_log.write(row["parent_path"] + normalizedName + ' ') + else: + database_log.write(normalizedName + ' ') + + if isMultiUser: + attribute_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + else: + attribute_cursor = conn.cursor() + looptry = True + artifact_count += 1 + try: + art_id = "" + art_id = str(row["artifact_id"]) + + # Get attributes for this artifact + if isMultiUser: + attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id = %s ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id]) + else: + attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id =? ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id]) + + attributes = attribute_cursor.fetchall() + + # Print attributes + if (len(attributes) == 0): + # @@@@ This should be + database_log.write(' \n') + row = artifact_cursor.fetchone() + continue + + src = attributes[0][0] + for attr in attributes: + numvals = 0 + for x in range(3, 6): + if(attr[x] != None): + numvals += 1 + if(numvals > 1): + msg = "There were too many values for attribute type: " + attr["display_name"] + " for artifact with id #" + str(row["artifact_id"]) + ".\n" + + if(not attr["source"] == src): + msg = "There were inconsistent sources for artifact with id #" + str(row["artifact_id"]) + ".\n" + + try: + if attr["value_type"] == 0: + attr_value_as_string = str(attr["value_text"]) + elif attr["value_type"] == 1: + attr_value_as_string = str(attr["value_int32"]) + elif attr["value_type"] == 2: + attr_value_as_string = str(attr["value_int64"]) + if attr["attribute_type_id"] == 36 and id_obj_path_table != -1 and int(attr_value_as_string) > 0: #normalize positive TSK_PATH_IDs from being object id to a path if the obj_id_path_table was generated + attr_value_as_string = id_obj_path_table[int(attr_value_as_string)] + elif attr["value_type"] == 3: + attr_value_as_string = "%20.10f" % float((attr["value_double"])) #use exact format from db schema to avoid python auto format double value to (0E-10) scientific style + elif attr["value_type"] == 4: + attr_value_as_string = "bytes" + elif attr["value_type"] == 5: + attr_value_as_string = str(attr["value_int64"]) + if attr["display_name"] == "Associated Artifact": + attr_value_as_string = getAssociatedArtifactType(attribute_cursor, attr_value_as_string, isMultiUser) + patrn = re.compile("[\n\0\a\b\r\f]") + attr_value_as_string = re.sub(patrn, ' ', attr_value_as_string) + if attr["source"] == "Keyword Search" and attr["display_name"] == "Keyword Preview": + attr_value_as_string = "" + database_log.write('') + except IOError as e: + print("IO error") + raise TskDbDiffException("Unexpected IO error while writing to database log." + str(e)) + + except sqlite3.Error as e: + msg = "Attributes in artifact id (in output DB)# " + str(row["artifact_id"]) + " encountered an error: " + str(e) +" .\n" + print("Attributes in artifact id (in output DB)# ", str(row["artifact_id"]), " encountered an error: ", str(e)) + print() + looptry = False + artifact_fail += 1 + database_log.write('Error Extracting Attributes') + database_log.close() + raise TskDbDiffException(msg) + finally: + attribute_cursor.close() + + + # @@@@ This should be + database_log.write(' \n') + row = artifact_cursor.fetchone() + + if(artifact_fail > 0): + msg ="There were " + str(artifact_count) + " artifacts and " + str(artifact_fail) + " threw an exception while loading.\n" + except Exception as e: + raise TskDbDiffException("Unexpected error while dumping blackboard database: " + str(e)) + finally: + database_log.close() + artifact_cursor.close() + conn.close() + + # Now sort the file + srtcmdlst = ["sort", unsorted_dump, "-o", bb_dump_file] + subprocess.call(srtcmdlst) + + + # for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items(): + # for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items(): + # print(f"{key}: {val}") + + + + + + def _dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings): + """Dumps a database to a text file. + + Does not dump the artifact and attributes. + + Args: + db_file: a pathto_File, the database file to dump + dump_file: a pathto_File, the location to dump the non-blackboard database items + """ + + conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings) + id_files_table = build_id_files_table(conn.cursor(), isMultiUser) + id_vs_parts_table = build_id_vs_parts_table(conn.cursor(), isMultiUser) + id_vs_info_table = build_id_vs_info_table(conn.cursor(), isMultiUser) + id_fs_info_table = build_id_fs_info_table(conn.cursor(), isMultiUser) + id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser) + id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser) + id_legacy_artifact_types = build_id_legacy_artifact_types_table(conn.cursor(), isMultiUser) + id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser) + id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser) + id_accounts_table = build_id_accounts_table(conn.cursor(), isMultiUser) + id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table, id_accounts_table) + + if isMultiUser: # Use PostgreSQL + os.environ['PGPASSWORD']=pgSettings.password + pgDump = ["pg_dump", "--inserts", "-U", pgSettings.username, "-h", pgSettings.pgHost, "-p", pgSettings.pgPort, "-d", db_file, "-E", "utf-8", "-T", "blackboard_artifacts", "-T", "blackboard_attributes", "-f", "postgreSQLDump.sql"] + subprocess.call(pgDump) + postgreSQL_db = codecs.open("postgreSQLDump.sql", "r", "utf-8") + # Write to the database dump + with codecs.open(dump_file, "wb", "utf_8") as db_log: + dump_line = '' + for line in postgreSQL_db: + line = line.strip('\r\n ') + # Deal with pg_dump result file + if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line + continue + elif not line.endswith(';'): # Statement not finished + dump_line += line + continue + else: + dump_line += line + if 'INSERT INTO image_gallery_groups_seen' in dump_line: + dump_line = '' + continue; + dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table) + db_log.write('%s\n' % dump_line) + dump_line = '' + postgreSQL_db.close() + else: # use Sqlite + # Delete the blackboard tables + conn.text_factory = lambda x: x.decode("utf-8", "ignore") + conn.execute("DROP TABLE blackboard_artifacts") + conn.execute("DROP TABLE blackboard_attributes") + # Write to the database dump + with codecs.open(dump_file, "wb", "utf_8") as db_log: + for line in conn.iterdump(): + if 'INSERT INTO "image_gallery_groups_seen"' in line: + continue + line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table) + db_log.write('%s\n' % line) + # Now sort the file + srtcmdlst = ["sort", dump_file, "-o", dump_file] + subprocess.call(srtcmdlst) + + conn.close() + # cleanup the backup + if backup_db_file: + os.remove(backup_db_file) + return id_obj_path_table + + + def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings): + """Dumps the given database to text files for later comparison. + + Args: + db_file: a pathto_File, the database file to dump + dump_file: a pathto_File, the location to dump the non-blackboard database items + bb_dump_file: a pathto_File, the location to dump the blackboard database items + """ + id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings) + TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table) + + + def _get_tmp_file(base, ext): + time = datetime.datetime.now().time().strftime("%H%M%f") + return os.path.join(os.environ['TMP'], base + time + ext) + + +class TskDbDiffException(Exception): + pass + +class PGSettings(object): + def __init__(self, pgHost=None, pgPort=5432, user=None, password=None): + self.pgHost = pgHost + self.pgPort = pgPort + self.username = user + self.password = password + + def get_pgHost(self): + return self.pgHost + + def get_pgPort(self): + return self.pgPort + + def get_username(self): + return self.username + + def get_password(self): + return self.password + + + + + +def get_sqlite_table_columns(conn) -> Dict[str, List[str]]: + """ + Retrieves the sqlite public tables and columns from a sqlite connection. + Args: + conn: The sqlite connection. + + Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value. + """ + cur = conn.cursor() + cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'") + tables = list([table[0] for table in cur.fetchall()]) + cur.close() + + to_ret = {} + for table in tables: + cur = conn.cursor() + cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table]) + to_ret[table] = list([col[0] for col in cur.fetchall()]) + cur.close() + + return to_ret + + +def get_pg_table_columns(conn) -> Dict[str, List[str]]: + """ + Retrieves the postgres public tables and columns from a pg connection. + Args: + conn: The pg connection. + + Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value. + """ + cursor = conn.cursor() + cursor.execute(""" + SELECT cols.table_name, cols.column_name + FROM information_schema.columns cols + WHERE cols.column_name IS NOT NULL + AND cols.table_name IS NOT NULL + AND cols.table_name IN ( + SELECT tables.tablename FROM pg_catalog.pg_tables tables + WHERE LOWER(schemaname) = 'public' + ) + ORDER by cols.table_name, cols.ordinal_position; + """) + mapping = {} + for row in cursor: + mapping.setdefault(row[0], []).append(row[1]) + + cursor.close() + return mapping + + +def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table, artifact_table, accounts_table): + """ Make testing more consistent and reasonable by doctoring certain db entries. + + Args: + line: a String, the line to remove the object id from. + files_table: a map from object ids to file paths. + """ + + # Sqlite statement use double quotes for table name, PostgreSQL doesn't. We check both databases results for normalization. + files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1 + path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1 + object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1 + vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1 + report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1 + layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1 + data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find('INSERT INTO data_source_info ') > -1 + event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find('INSERT INTO tsk_event_descriptions ') > -1 + events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1 + ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1 + examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1 + ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find('INSERT INTO image_gallery_groups ') > -1 + ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find('INSERT INTO image_gallery_groups_seen ') > -1 + os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1 + os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find('INSERT INTO tsk_os_account_attributes') > -1 + os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find('INSERT INTO tsk_os_account_instances') > -1 + data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find('INSERT INTO tsk_data_artifacts') > -1 + + parens = line[line.find('(') + 1 : line.rfind(')')] + no_space_parens = parens.replace(" ", "") + fields_list = list(csv.reader([no_space_parens], quotechar="'"))[0] + #Add back in the quotechar for values that were originally wrapped (csv reader consumes this character) + fields_list_with_quotes = [] + ptr = 0 + for field in fields_list: + if(len(field) == 0): + field = "'" + field + "'" + else: + start = no_space_parens.find(field, ptr) + if((start - 1) >= 0 and no_space_parens[start - 1] == '\''): + if((start + len(field)) < len(no_space_parens) and no_space_parens[start + len(field)] == '\''): + field = "'" + field + "'" + fields_list_with_quotes.append(field) + if(ptr > 0): + #Add one for each comma that is used to separate values in the original string + ptr+=1 + ptr += len(field) + + fields_list = fields_list_with_quotes + + # remove object ID + if files_index: + + # Ignore TIFF size and hash if extracted from PDFs. + # See JIRA-6951 for more details. + # index -3 = 3rd from the end, which is extension + # index -5 = 5th from the end, which is the parent path. + if fields_list[-3] == "'tif'" and fields_list[-5].endswith(".pdf/'"): + fields_list[15] = "'SIZE_IGNORED'" + fields_list[23] = "'MD5_IGNORED'" + fields_list[24] = "'SHA256_IGNORED'" + newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:-1]) + ');') #leave off first (object id) and last (os_account_id) field + # Remove object ID from Unalloc file name + newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine) + return newLine + # remove object ID + elif vs_parts_index: + newLine = ('INSERT INTO "tsk_vs_parts" VALUES(' + ', '.join(fields_list[1:]) + ');') + return newLine + # remove group ID + elif ig_groups_index: + newLine = ('INSERT INTO "image_gallery_groups" VALUES(' + ', '.join(fields_list[1:]) + ');') + return newLine + #remove id field + elif ig_groups_seen_index: + # Only removing the id and group_id fields for now. May need to care about examiner_id and seen fields in future. + newLine = ('INSERT INTO "image_gallery_groups_seen" VALUES(' + ', '.join(fields_list[2:]) + ');') + return newLine + # remove object ID + elif path_index: + obj_id = int(fields_list[0]) + objValue = files_table[obj_id] + # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory + idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor') + if idx_pre > -1: + idx_pos = fields_list[1].find('\\', idx_pre + 2) + dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator + dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')] + pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:] + else: + pathValue = fields_list[1] + # remove localhost from postgres par_obj_name + multiOutput_idx = pathValue.find('ModuleOutput') + if multiOutput_idx > -1: + pathValue = "'" + pathValue[pathValue.find('ModuleOutput'):] #postgres par_obj_name include losthost + + newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') + return newLine + # remove object ID + elif layout_index: + obj_id = fields_list[0] + path= files_table[int(obj_id)] + newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');') + # Remove object ID from Unalloc file name + newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine) + return newLine + # remove object ID + elif object_index: + obj_id = fields_list[0] + parent_id = fields_list[1] + newLine = 'INSERT INTO "tsk_objects" VALUES(' + path = None + parent_path = None + + #if obj_id or parent_id is invalid literal, we simple return the values as it is + try: + obj_id = int(obj_id) + if parent_id != 'NULL': + parent_id = int(parent_id) + except Exception as e: + print(obj_id, parent_id) + return line + + if obj_id in files_table.keys(): + path = files_table[obj_id] + elif obj_id in vs_parts_table.keys(): + path = vs_parts_table[obj_id] + elif obj_id in vs_info_table.keys(): + path = vs_info_table[obj_id] + elif obj_id in fs_info_table.keys(): + path = fs_info_table[obj_id] + elif obj_id in reports_table.keys(): + path = reports_table[obj_id] + # remove host name (for multi-user) and dates/times from path for reports + if path is not None: + if 'ModuleOutput' in path: + # skip past the host name (if any) + path = path[path.find('ModuleOutput'):] + if 'BulkExtractor' in path or 'Smirk' in path: + # chop off the last folder (which contains a date/time) + path = path[:path.rfind('\\')] + if 'Reports\\AutopsyTestCase HTML Report' in path: + path = 'Reports\\AutopsyTestCase HTML Report' + + if parent_id in files_table.keys(): + parent_path = files_table[parent_id] + elif parent_id in vs_parts_table.keys(): + parent_path = vs_parts_table[parent_id] + elif parent_id in vs_info_table.keys(): + parent_path = vs_info_table[parent_id] + elif parent_id in fs_info_table.keys(): + parent_path = fs_info_table[parent_id] + elif parent_id in images_table.keys(): + parent_path = images_table[parent_id] + elif parent_id in accounts_table.keys(): + parent_path = accounts_table[parent_id] + elif parent_id == 'NULL': + parent_path = "NULL" + + # Remove host name (for multi-user) from parent_path + if parent_path is not None: + if 'ModuleOutput' in parent_path: + # skip past the host name (if any) + parent_path = parent_path[parent_path.find('ModuleOutput'):] + + if path and parent_path: + # Remove object ID from Unalloc file names and regripper output + path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', path) + path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', path) + parent_path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', parent_path) + parent_path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', parent_path) + return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');' + else: + return newLine + '"OBJECT IDS OMITTED", ' + ', '.join(fields_list[2:]) + ');' #omit parent object id and object id when we cant annonymize them + # remove time-based information, ie Test_6/11/14 -> Test + elif report_index: + fields_list[1] = "AutopsyTestCase" + fields_list[2] = "0" + newLine = ('INSERT INTO "reports" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id + return newLine + elif data_source_info_index: + fields_list[1] = "{device id}" + fields_list[4] = "{dateTime}" + newLine = ('INSERT INTO "data_source_info" VALUES(' + ','.join(fields_list) + ');') + return newLine + elif ingest_job_index: + fields_list[2] = "{host_name}" + start_time = int(fields_list[3]) + end_time = int(fields_list[4]) + if (start_time <= end_time): + fields_list[3] = "0" + fields_list[4] = "0" + newLine = ('INSERT INTO "ingest_jobs" VALUES(' + ','.join(fields_list) + ');') + return newLine + elif examiners_index: + fields_list[1] = "{examiner_name}" + newLine = ('INSERT INTO "tsk_examiners" VALUES(' + ','.join(fields_list) + ');') + return newLine + # remove all timing dependent columns from events table + elif events_index: + newLine = ('INSERT INTO "tsk_events" VALUES(' + ','.join(fields_list[1:2]) + ');') + return newLine + # remove object ids from event description table + elif event_description_index: + # replace object ids with information that is deterministic + file_obj_id = int(fields_list[5]) + object_id = int(fields_list[4]) + legacy_artifact_id = 'NULL' + if (fields_list[6] != 'NULL'): + legacy_artifact_id = int(fields_list[6]) + if file_obj_id != 'NULL' and file_obj_id in files_table.keys(): + fields_list[5] = files_table[file_obj_id] + if object_id != 'NULL' and object_id in files_table.keys(): + fields_list[4] = files_table[object_id] + if legacy_artifact_id != 'NULL' and legacy_artifact_id in artifact_table.keys(): + fields_list[6] = artifact_table[legacy_artifact_id] + if fields_list[1] == fields_list[2] and fields_list[1] == fields_list[3]: + fields_list[1] = cleanupEventDescription(fields_list[1]) + fields_list[2] = cleanupEventDescription(fields_list[2]) + fields_list[3] = cleanupEventDescription(fields_list[3]) + newLine = ('INSERT INTO "tsk_event_descriptions" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id + return newLine + elif os_account_index: + newLine = ('INSERT INTO "tsk_os_accounts" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id since value that would be substituted is in diff line already + return newLine + elif os_account_attr_index: + #substitue the account object id for a non changing value + os_account_id = int(fields_list[1]) + fields_list[1] = accounts_table[os_account_id] + #substitue the source object id for a non changing value + source_obj_id = int(fields_list[3]) + if source_obj_id in files_table.keys(): + fields_list[3] = files_table[source_obj_id] + elif source_obj_id in vs_parts_table.keys(): + fields_list[3] = vs_parts_table[source_obj_id] + elif source_obj_id in vs_info_table.keys(): + fields_list[3] = vs_info_table[source_obj_id] + elif source_obj_id in fs_info_table.keys(): + fields_list[3] = fs_info_table[source_obj_id] + elif source_obj_id in images_table.keys(): + fields_list[3] = images_table[source_obj_id] + elif source_obj_id in accounts_table.keys(): + fields_list[3] = accounts_table[source_obj_id] + elif source_obj_id == 'NULL': + fields_list[3] = "NULL" + newLine = ('INSERT INTO "tsk_os_account_attributes" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id + return newLine + elif os_account_instances_index: + os_account_id = int(fields_list[1]) + fields_list[1] = accounts_table[os_account_id] + newLine = ('INSERT INTO "tsk_os_account_instances" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id + return newLine + elif data_artifacts_index: + art_obj_id = int(fields_list[0]) + if art_obj_id in files_table.keys(): + fields_list[0] = files_table[art_obj_id] + else: + fields_list[0] = 'Artifact Object ID Omitted' + account_obj_id = int(fields_list[1]) + if account_obj_id in files_table.keys(): + fields_list[1] = files_table[account_obj_id] + else: + fields_list[1] = 'Account Object ID Omitted' + newLine = ('INSERT INTO "tsk_data_artifacts" VALUES(' + ','.join(fields_list[:]) + ');') # remove ids + return newLine + else: + return line + +def cleanupEventDescription(description): + test = re.search("^'\D+:\d+'$", description) + if test is not None: + return re.sub(":\d+", ":", description) + else: + return description + +def getAssociatedArtifactType(cur, artifact_id, isMultiUser): + if isMultiUser: + cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",[artifact_id]) + else: + cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",[artifact_id]) + + info = cur.fetchone() + + return "File path: " + info[0] + " Artifact Type: " + info[1] + +def build_id_files_table(db_cursor, isPostgreSQL): + """Build the map of object ids to file paths. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary + # with the object id as the key and the full file path (parent + name) as the value + mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, parent_path, name FROM tsk_files")]) + return mapping + +def build_id_vs_parts_table(db_cursor, isPostgreSQL): + """Build the map of object ids to vs_parts. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary + # with the object id as the key and (addr + start) as the value + mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, addr, start FROM tsk_vs_parts")]) + return mapping + +def build_id_vs_info_table(db_cursor, isPostgreSQL): + """Build the map of object ids to vs_info. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary + # with the object id as the key and (vs_type + img_offset) as the value + mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")]) + return mapping + + +def build_id_fs_info_table(db_cursor, isPostgreSQL): + """Build the map of object ids to fs_info. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary + # with the object id as the key and (img_offset + fs_type) as the value + mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")]) + return mapping + +def build_id_objects_table(db_cursor, isPostgreSQL): + """Build the map of object ids to par_id. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary + # with the object id as the key and par_obj_id, type as the value + mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")]) + return mapping + +def build_id_image_names_table(db_cursor, isPostgreSQL): + """Build the map of object ids to name. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id and name then create a tuple in the dictionary + # with the object id as the key and name, type as the value + mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")]) + #data_sources which are logical file sets will be found in the files table + return mapping + +def build_id_artifact_types_table(db_cursor, isPostgreSQL): + """Build the map of object ids to artifact ids. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary + # with the object id as the key and artifact type as the value + mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")]) + return mapping + +def build_id_legacy_artifact_types_table(db_cursor, isPostgreSQL): + """Build the map of legacy artifact ids to artifact type. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the legacy artifact id then create a tuple in the dictionary + # with the artifact id as the key and artifact type as the value + mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")]) + return mapping + +def build_id_reports_table(db_cursor, isPostgreSQL): + """Build the map of report object ids to report path. + + Args: + db_cursor: the database cursor + """ + # for each row in the reports table in the db, create an obj_id -> path map + mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, path FROM reports")]) + return mapping + +def build_id_accounts_table(db_cursor, isPostgreSQL): + """Build the map of object ids to OS account SIDs. + + Args: + db_cursor: the database cursor + """ + # for each row in the db, take the object id and account SID then creates a tuple in the dictionary + # with the object id as the key and the OS Account's SID as the value + mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT os_account_obj_id, addr FROM tsk_os_accounts")]) + return mapping + +def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table, accounts_table): + """Build the map of object ids to artifact ids. + + Args: + files_table: obj_id, path + objects_table: obj_id, par_obj_id, type + artifacts_table: obj_id, artifact_type_name + reports_table: obj_id, path + images_table: obj_id, name + accounts_table: obj_id, addr + """ + # make a copy of files_table and update it with new data from artifacts_table and reports_table + mapping = files_table.copy() + for k, v in objects_table.items(): + path = "" + if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id + if k in reports_table.keys(): # For a report we use the report path + par_obj_id = v[0] + if par_obj_id is not None: + mapping[k] = reports_table[k] + elif k in artifacts_table.keys(): # For an artifact we use it's par_obj_id's path+name plus it's artifact_type name + par_obj_id = v[0] # The parent of an artifact can be a file or a report + if par_obj_id in mapping.keys(): + path = mapping[par_obj_id] + elif par_obj_id in reports_table.keys(): + path = reports_table[par_obj_id] + elif par_obj_id in images_table.keys(): + path = images_table[par_obj_id] + mapping[k] = path + "/" + artifacts_table[k] + elif k in accounts_table.keys(): # For an OS Account object ID we use its addr field which is the account SID + mapping[k] = accounts_table[k] + elif v[0] not in mapping.keys(): + if v[0] in artifacts_table.keys(): + par_obj_id = objects_table[v[0]] + path = mapping[par_obj_id] + mapping[k] = path + "/" + artifacts_table[v[0]] + return mapping + +def db_connect(db_file, isMultiUser, pgSettings=None): + if isMultiUser: # use PostgreSQL + try: + return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + pgSettings.pgHost + " password=" + pgSettings.password), None + except: + print("Failed to connect to the database: " + db_file) + else: # Sqlite + # Make a copy that we can modify + backup_db_file = TskDbDiff._get_tmp_file("tsk_backup_db", ".db") + shutil.copy(db_file, backup_db_file) + # We sometimes get situations with messed up permissions + os.chmod (backup_db_file, 0o777) + return sqlite3.connect(backup_db_file), backup_db_file + +def sql_select_execute(cursor, isPostgreSQL, sql_stmt): + if isPostgreSQL: + cursor.execute(sql_stmt) + return cursor.fetchall() + else: + return cursor.execute(sql_stmt) + +def main(): + try: + sys.argv.pop(0) + output_db = sys.argv.pop(0) + gold_db = sys.argv.pop(0) + except: + print("usage: tskdbdiff [OUTPUT DB PATH] [GOLD DB PATH]") + sys.exit(1) + + db_diff = TskDbDiff(output_db, gold_db, output_dir=".") + dump_passed, bb_dump_passed = db_diff.run_diff() + + if dump_passed and bb_dump_passed: + print("Database comparison passed.") + if not dump_passed: + print("Non blackboard database comparison failed.") + if not bb_dump_passed: + print("Blackboard database comparison failed.") + + sys.exit(0) + + +if __name__ == "__main__": + if sys.hexversion < 0x03000000: + print("Python 3 required") + sys.exit(1) + + main() + From 24582c42ffba28c31b4d4f29c978886412b862f7 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Fri, 16 Apr 2021 15:36:46 -0400 Subject: [PATCH 02/30] write to sql statement --- test/script/dbaccesstest.py | 47 +++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py index cfe026395e..380998b793 100644 --- a/test/script/dbaccesstest.py +++ b/test/script/dbaccesstest.py @@ -37,9 +37,48 @@ def get_pg_table_columns(conn) -> Dict[str, List[str]]: mapping.setdefault(row[0], []).append(row[1]) cursor.close() - conn.close() return mapping -#for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items(): -#for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items(): -# print(f"{key}: {val}") \ No newline at end of file + +def get_sql_insert_value(val) -> str: + if not val: + return "NULL" + + if isinstance(val, str): + escaped_val = val.replace('\n', '\\n').replace("'", "''") + return f"'{escaped_val}'" + + return str(val) + + +def write_normalized(output_file, db_conn, table: str, column_names: List[str], normalizer=None): + cursor = db_conn.cursor() + + joined_columns = ",".join([col for col in column_names]) + cursor.execute(f"SELECT {joined_columns} FROM {table}") + for row in cursor: + if len(row) != len(column_names): + print(f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}") + continue + + row_dict = {} + for col_idx in range(0, len(column_names)): + row_dict[column_names[col_idx]] = row[col_idx] + + if normalizer: + row_dict = normalizer(table, row_dict) + + values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names) + insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n' + output_file.write(insert_statement) + + + + +#with sqlite3.connect(r"C:\Users\gregd\Desktop\autopsy_412.db") as conn, \ +with psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345") as conn, \ + open(r"C:\Users\gregd\Desktop\dbdump.sql", mode="w", encoding='utf-8') as output_file: + + for table, cols in get_pg_table_columns(conn).items(): + # for table, cols in get_sqlite_table_columns(conn).items(): + write_normalized(output_file, conn, table, cols) From 2a4d3c0c8f42d8f1b0af7fa393bdbe436ac90679 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Mon, 19 Apr 2021 15:33:45 -0400 Subject: [PATCH 03/30] mapping --- test/script/dbaccesstest.py | 124 +++++++++++++++++++++++++++++++++++- 1 file changed, 123 insertions(+), 1 deletion(-) diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py index 380998b793..7e48bcdaf4 100644 --- a/test/script/dbaccesstest.py +++ b/test/script/dbaccesstest.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Callable, Union import psycopg2 import sqlite3 @@ -19,6 +19,128 @@ def get_sqlite_table_columns(conn) -> Dict[str, List[str]]: return to_ret +IGNORE_TABLE = "IGNORE_TABLE" + + +class TskDbEnvironment: + pass + + +class MaskRow: + row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Dict[str, any]] + + def __init__(self, row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Union[Dict[str, any], None]]): + self.row_masker = row_masker + + def mask(self, db_env: TskDbEnvironment, row: Dict[str, any]) -> Union[Dict[str, any], None]: + return self.row_masker(db_env, row) + + +class MaskColumns(MaskRow): + @classmethod + def _mask_col_vals(cls, + col_mask: Dict[str, Union[any, Callable[[TskDbEnvironment, any], any]]], + db_env: TskDbEnvironment, + row: Dict[str, any]): + + row_copy = dict.copy() + for key, val in col_mask: + # only replace values if present in row + if key in row_copy: + # if a column replacing function, call with original value + if isinstance(val, Callable): + row_copy[key] = val(db_env, row[key]) + # otherwise, just replace with mask value + else: + row_copy[key] = val + + return row_copy + + def __init__(self, col_mask: Dict[str, Union[any, Callable[[any], any]]]): + super().__init__(lambda db_env, row: MaskColumns._mask_col_vals(col_mask, db_env, row)) + + +TableNormalization = Union[IGNORE_TABLE, MaskRow] + + +MASKED_OBJ_ID = "MASKED_OBJ_ID" +MASKED_ID = "MASKED_ID" + +table_masking: Dict[str, TableNormalization] = { + "tsk_files": MaskColumns({ + # TODO + }), + + "tsk_vs_parts": MaskColumns({ + "obj_id": MASKED_OBJ_ID + }), + "image_gallery_groups": MaskColumns({ + "obj_id": MASKED_OBJ_ID + }), + "image_gallery_groups_seen": IGNORE_TABLE, + # NOTE there was code in normalization for this, but the table is ignored? + # "image_gallery_groups_seen": MaskColumns({ + # "id": MASKED_ID, + # "group_id": MASKED_ID, + # }), + # TODO + "tsk_files_path": None, + # TODO + "tsk_file_layout": None, + "tsk_objects": None, + "reports": MaskColumns({ + "obj_id": MASKED_OBJ_ID, + "path": "AutopsyTestCase", + "crtime": 0 + }), + "data_source_info": MaskColumns({ + "device_id": "{device id}", + "added_date_time": "{dateTime}" + }), + # TODO + "ingest_jobs": None, + "tsk_examiners": MaskColumns({ + "login_name": "{examiner_name}" + }), + "tsk_events": MaskColumns({ + "event_id": "MASKED_EVENT_ID", + "time": 0, + }), + # TODO + "event_description_index": None, + "tsk_os_accounts": MaskColumns({ + "os_account_obj_id": MASKED_OBJ_ID + }), + # TODO + "tsk_data_artifacts": None +} + + +# files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1 +# path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1 +# object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1 +# vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1 +# report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1 +# layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1 +# data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find( +# 'INSERT INTO data_source_info ') > -1 +# event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find( +# 'INSERT INTO tsk_event_descriptions ') > -1 +# events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1 +# ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1 +# examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1 +# ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find( +# 'INSERT INTO image_gallery_groups ') > -1 +# ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find( +# 'INSERT INTO image_gallery_groups_seen ') > -1 +# os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1 +# os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find( +# 'INSERT INTO tsk_os_account_attributes') > -1 +# os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find( +# 'INSERT INTO tsk_os_account_instances') > -1 +# data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find( +# 'INSERT INTO tsk_data_artifacts') > -1 + def get_pg_table_columns(conn) -> Dict[str, List[str]]: cursor = conn.cursor() cursor.execute(""" From 9d30b408467a7b69a3b88fb9b3440d21fa7ba271 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Tue, 20 Apr 2021 21:16:16 -0400 Subject: [PATCH 04/30] integrated into tskdbdiff --- test/script/dbaccesstest.py | 206 ------- test/script/tskdbdiff.py | 1150 ++++++++++++++++++++--------------- test/script/tskdbdiff2.py | 969 ----------------------------- 3 files changed, 662 insertions(+), 1663 deletions(-) delete mode 100644 test/script/dbaccesstest.py delete mode 100644 test/script/tskdbdiff2.py diff --git a/test/script/dbaccesstest.py b/test/script/dbaccesstest.py deleted file mode 100644 index 7e48bcdaf4..0000000000 --- a/test/script/dbaccesstest.py +++ /dev/null @@ -1,206 +0,0 @@ -from typing import List, Dict, Callable, Union - -import psycopg2 -import sqlite3 - - -def get_sqlite_table_columns(conn) -> Dict[str, List[str]]: - cur = conn.cursor() - cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'") - tables = list([table[0] for table in cur.fetchall()]) - cur.close() - - to_ret = {} - for table in tables: - cur = conn.cursor() - cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table]) - to_ret[table] = list([col[0] for col in cur.fetchall()]) - - return to_ret - - -IGNORE_TABLE = "IGNORE_TABLE" - - -class TskDbEnvironment: - pass - - -class MaskRow: - row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Dict[str, any]] - - def __init__(self, row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Union[Dict[str, any], None]]): - self.row_masker = row_masker - - def mask(self, db_env: TskDbEnvironment, row: Dict[str, any]) -> Union[Dict[str, any], None]: - return self.row_masker(db_env, row) - - -class MaskColumns(MaskRow): - @classmethod - def _mask_col_vals(cls, - col_mask: Dict[str, Union[any, Callable[[TskDbEnvironment, any], any]]], - db_env: TskDbEnvironment, - row: Dict[str, any]): - - row_copy = dict.copy() - for key, val in col_mask: - # only replace values if present in row - if key in row_copy: - # if a column replacing function, call with original value - if isinstance(val, Callable): - row_copy[key] = val(db_env, row[key]) - # otherwise, just replace with mask value - else: - row_copy[key] = val - - return row_copy - - def __init__(self, col_mask: Dict[str, Union[any, Callable[[any], any]]]): - super().__init__(lambda db_env, row: MaskColumns._mask_col_vals(col_mask, db_env, row)) - - -TableNormalization = Union[IGNORE_TABLE, MaskRow] - - -MASKED_OBJ_ID = "MASKED_OBJ_ID" -MASKED_ID = "MASKED_ID" - -table_masking: Dict[str, TableNormalization] = { - "tsk_files": MaskColumns({ - # TODO - }), - - "tsk_vs_parts": MaskColumns({ - "obj_id": MASKED_OBJ_ID - }), - "image_gallery_groups": MaskColumns({ - "obj_id": MASKED_OBJ_ID - }), - "image_gallery_groups_seen": IGNORE_TABLE, - # NOTE there was code in normalization for this, but the table is ignored? - # "image_gallery_groups_seen": MaskColumns({ - # "id": MASKED_ID, - # "group_id": MASKED_ID, - # }), - # TODO - "tsk_files_path": None, - # TODO - "tsk_file_layout": None, - "tsk_objects": None, - "reports": MaskColumns({ - "obj_id": MASKED_OBJ_ID, - "path": "AutopsyTestCase", - "crtime": 0 - }), - "data_source_info": MaskColumns({ - "device_id": "{device id}", - "added_date_time": "{dateTime}" - }), - # TODO - "ingest_jobs": None, - "tsk_examiners": MaskColumns({ - "login_name": "{examiner_name}" - }), - "tsk_events": MaskColumns({ - "event_id": "MASKED_EVENT_ID", - "time": 0, - }), - # TODO - "event_description_index": None, - "tsk_os_accounts": MaskColumns({ - "os_account_obj_id": MASKED_OBJ_ID - }), - # TODO - "tsk_data_artifacts": None -} - - -# files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1 -# path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1 -# object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1 -# vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1 -# report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1 -# layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1 -# data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find( -# 'INSERT INTO data_source_info ') > -1 -# event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find( -# 'INSERT INTO tsk_event_descriptions ') > -1 -# events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1 -# ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1 -# examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1 -# ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find( -# 'INSERT INTO image_gallery_groups ') > -1 -# ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find( -# 'INSERT INTO image_gallery_groups_seen ') > -1 -# os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1 -# os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find( -# 'INSERT INTO tsk_os_account_attributes') > -1 -# os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find( -# 'INSERT INTO tsk_os_account_instances') > -1 -# data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find( -# 'INSERT INTO tsk_data_artifacts') > -1 - -def get_pg_table_columns(conn) -> Dict[str, List[str]]: - cursor = conn.cursor() - cursor.execute(""" - SELECT cols.table_name, cols.column_name - FROM information_schema.columns cols - WHERE cols.column_name IS NOT NULL - AND cols.table_name IS NOT NULL - AND cols.table_name IN ( - SELECT tables.tablename FROM pg_catalog.pg_tables tables - WHERE LOWER(schemaname) = 'public' - ) - ORDER by cols.table_name, cols.ordinal_position; - """) - mapping = {} - for row in cursor: - mapping.setdefault(row[0], []).append(row[1]) - - cursor.close() - return mapping - - -def get_sql_insert_value(val) -> str: - if not val: - return "NULL" - - if isinstance(val, str): - escaped_val = val.replace('\n', '\\n').replace("'", "''") - return f"'{escaped_val}'" - - return str(val) - - -def write_normalized(output_file, db_conn, table: str, column_names: List[str], normalizer=None): - cursor = db_conn.cursor() - - joined_columns = ",".join([col for col in column_names]) - cursor.execute(f"SELECT {joined_columns} FROM {table}") - for row in cursor: - if len(row) != len(column_names): - print(f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}") - continue - - row_dict = {} - for col_idx in range(0, len(column_names)): - row_dict[column_names[col_idx]] = row[col_idx] - - if normalizer: - row_dict = normalizer(table, row_dict) - - values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names) - insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n' - output_file.write(insert_statement) - - - - -#with sqlite3.connect(r"C:\Users\gregd\Desktop\autopsy_412.db") as conn, \ -with psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345") as conn, \ - open(r"C:\Users\gregd\Desktop\dbdump.sql", mode="w", encoding='utf-8') as output_file: - - for table, cols in get_pg_table_columns(conn).items(): - # for table, cols in get_sqlite_table_columns(conn).items(): - write_normalized(output_file, conn, table, cols) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index cec54316d2..3bd516801c 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -8,6 +8,8 @@ import os import codecs import datetime import sys +from typing import Callable, Dict, Union, List + import psycopg2 import psycopg2.extras import socket @@ -319,83 +321,32 @@ class TskDbDiff(object): dump_file: a pathto_File, the location to dump the non-blackboard database items """ - conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings) - id_files_table = build_id_files_table(conn.cursor(), isMultiUser) - id_vs_parts_table = build_id_vs_parts_table(conn.cursor(), isMultiUser) - id_vs_info_table = build_id_vs_info_table(conn.cursor(), isMultiUser) - id_fs_info_table = build_id_fs_info_table(conn.cursor(), isMultiUser) - id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser) - id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser) - id_legacy_artifact_types = build_id_legacy_artifact_types_table(conn.cursor(), isMultiUser) - id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser) - id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser) - id_accounts_table = build_id_accounts_table(conn.cursor(), isMultiUser) - id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table, id_accounts_table) + conn, output_file = db_connect(db_file, isMultiUser, pgSettings) + guid_utils = TskGuidUtils.create(conn) - if isMultiUser: # Use PostgreSQL - os.environ['PGPASSWORD']=pgSettings.password - pgDump = ["pg_dump", "--inserts", "-U", pgSettings.username, "-h", pgSettings.pgHost, "-p", pgSettings.pgPort, "-d", db_file, "-E", "utf-8", "-T", "blackboard_artifacts", "-T", "blackboard_attributes", "-f", "postgreSQLDump.sql"] - subprocess.call(pgDump) - postgreSQL_db = codecs.open("postgreSQLDump.sql", "r", "utf-8") - # Write to the database dump - with codecs.open(dump_file, "wb", "utf_8") as db_log: - dump_line = '' - for line in postgreSQL_db: - line = line.strip('\r\n ') - # Deal with pg_dump result file - if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line - continue - elif not line.endswith(';'): # Statement not finished - dump_line += line - continue - else: - dump_line += line - if 'INSERT INTO image_gallery_groups_seen' in dump_line: - dump_line = '' - continue; - dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table) - db_log.write('%s\n' % dump_line) - dump_line = '' - postgreSQL_db.close() - else: # use Sqlite - # Delete the blackboard tables - conn.text_factory = lambda x: x.decode("utf-8", "ignore") - conn.execute("DROP TABLE blackboard_artifacts") - conn.execute("DROP TABLE blackboard_attributes") - # Write to the database dump - with codecs.open(dump_file, "wb", "utf_8") as db_log: - for line in conn.iterdump(): - if 'INSERT INTO "image_gallery_groups_seen"' in line: - continue - line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table) - db_log.write('%s\n' % line) - # Now sort the file - srtcmdlst = ["sort", dump_file, "-o", dump_file] - subprocess.call(srtcmdlst) + if isMultiUser: + table_cols = get_pg_table_columns(conn) + schema = get_pg_schema(pgSettings.username, pgSettings.password, pgSettings.pgHost, pgSettings.pgPort) + else: + table_cols = get_sqlite_table_columns(conn) + schema = get_sqlite_schema(conn) + + output_file.write(schema + "\n") + for table, cols in sorted(table_cols.items(), key=lambda pr: pr[0]): + normalizer = TABLE_NORMALIZATIONS[table] if table in TABLE_NORMALIZATIONS else None + write_normalized(guid_utils, output_file, conn, table, cols, normalizer) + + # Now sort the file + # srtcmdlst = ["sort", dump_file, "-o", dump_file] + # subprocess.call(srtcmdlst) conn.close() # cleanup the backup - if backup_db_file: - os.remove(backup_db_file) - return id_obj_path_table + # if backup_db_file: + # os.remove(backup_db_file) + return guid_utils.obj_id_guids - def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings): - """Dumps the given database to text files for later comparison. - - Args: - db_file: a pathto_File, the database file to dump - dump_file: a pathto_File, the location to dump the non-blackboard database items - bb_dump_file: a pathto_File, the location to dump the blackboard database items - """ - id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings) - TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table) - - - def _get_tmp_file(base, ext): - time = datetime.datetime.now().time().strftime("%H%M%f") - return os.path.join(os.environ['TMP'], base + time + ext) - class TskDbDiffException(Exception): pass @@ -407,451 +358,680 @@ class PGSettings(object): self.username = user self.password = password - def get_pgHost(): + def get_pgHost(self): return self.pgHost - def get_pgPort(): + def get_pgPort(self): return self.pgPort - def get_username(): + def get_username(self): return self.username - def get_password(): + def get_password(self): return self.password -def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table, artifact_table, accounts_table): - """ Make testing more consistent and reasonable by doctoring certain db entries. - - Args: - line: a String, the line to remove the object id from. - files_table: a map from object ids to file paths. +class TskGuidUtils: + """ + This class provides guids for potentially volatile data. """ - # Sqlite statement use double quotes for table name, PostgreSQL doesn't. We check both databases results for normalization. - files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1 - path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1 - object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1 - vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1 - report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1 - layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1 - data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find('INSERT INTO data_source_info ') > -1 - event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find('INSERT INTO tsk_event_descriptions ') > -1 - events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1 - ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1 - examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1 - ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find('INSERT INTO image_gallery_groups ') > -1 - ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find('INSERT INTO image_gallery_groups_seen ') > -1 - os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1 - os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find('INSERT INTO tsk_os_account_attributes') > -1 - os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find('INSERT INTO tsk_os_account_instances') > -1 - data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find('INSERT INTO tsk_data_artifacts') > -1 - - parens = line[line.find('(') + 1 : line.rfind(')')] - no_space_parens = parens.replace(" ", "") - fields_list = list(csv.reader([no_space_parens], quotechar="'"))[0] - #Add back in the quotechar for values that were originally wrapped (csv reader consumes this character) - fields_list_with_quotes = [] - ptr = 0 - for field in fields_list: - if(len(field) == 0): - field = "'" + field + "'" - else: - start = no_space_parens.find(field, ptr) - if((start - 1) >= 0 and no_space_parens[start - 1] == '\''): - if((start + len(field)) < len(no_space_parens) and no_space_parens[start + len(field)] == '\''): - field = "'" + field + "'" - fields_list_with_quotes.append(field) - if(ptr > 0): - #Add one for each comma that is used to separate values in the original string - ptr+=1 - ptr += len(field) + @staticmethod + def _get_guid_dict(db_conn, select_statement, delim=""): + """ + Retrieves a dictionary mapping the first item selected to a concatenation of the remaining values. + Args: + db_conn: The database connection. + select_statement: The select statement. + delim: The delimiter for how row data from index 1 to end shall be concatenated. - fields_list = fields_list_with_quotes + Returns: A dictionary mapping the key (the first item in the select statement) to a concatenation of the remaining values. - # remove object ID - if files_index: - - # Ignore TIFF size and hash if extracted from PDFs. - # See JIRA-6951 for more details. - # index -3 = 3rd from the end, which is extension - # index -5 = 5th from the end, which is the parent path. - if fields_list[-3] == "'tif'" and fields_list[-5].endswith(".pdf/'"): - fields_list[15] = "'SIZE_IGNORED'" - fields_list[23] = "'MD5_IGNORED'" - fields_list[24] = "'SHA256_IGNORED'" - newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:-1]) + ');') #leave off first (object id) and last (os_account_id) field - # Remove object ID from Unalloc file name - newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine) - return newLine - # remove object ID - elif vs_parts_index: - newLine = ('INSERT INTO "tsk_vs_parts" VALUES(' + ', '.join(fields_list[1:]) + ');') - return newLine - # remove group ID - elif ig_groups_index: - newLine = ('INSERT INTO "image_gallery_groups" VALUES(' + ', '.join(fields_list[1:]) + ');') - return newLine - #remove id field - elif ig_groups_seen_index: - # Only removing the id and group_id fields for now. May need to care about examiner_id and seen fields in future. - newLine = ('INSERT INTO "image_gallery_groups_seen" VALUES(' + ', '.join(fields_list[2:]) + ');') - return newLine - # remove object ID - elif path_index: - obj_id = int(fields_list[0]) - objValue = files_table[obj_id] - # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory - idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor') - if idx_pre > -1: - idx_pos = fields_list[1].find('\\', idx_pre + 2) - dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator - dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')] - pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:] - else: - pathValue = fields_list[1] - # remove localhost from postgres par_obj_name - multiOutput_idx = pathValue.find('ModuleOutput') - if multiOutput_idx > -1: - pathValue = "'" + pathValue[pathValue.find('ModuleOutput'):] #postgres par_obj_name include losthost + """ + cursor = db_conn.cursor() + cursor.execute(select_statement) + ret_dict = {} + for row in cursor: + ret_dict[row[0]] = delim.join([str(col) for col in row[1:]]) - newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') - return newLine - # remove object ID - elif layout_index: - obj_id = fields_list[0] - path= files_table[int(obj_id)] - newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');') - # Remove object ID from Unalloc file name - newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine) - return newLine - # remove object ID - elif object_index: - obj_id = fields_list[0] - parent_id = fields_list[1] - newLine = 'INSERT INTO "tsk_objects" VALUES(' - path = None - parent_path = None + return ret_dict - #if obj_id or parent_id is invalid literal, we simple return the values as it is - try: - obj_id = int(obj_id) - if parent_id != 'NULL': - parent_id = int(parent_id) - except Exception as e: - print(obj_id, parent_id) - return line + @staticmethod + def create(db_conn): + """ + Creates an instance of this class by querying for relevant guid data. + Args: + db_conn: The database connection. - if obj_id in files_table.keys(): - path = files_table[obj_id] - elif obj_id in vs_parts_table.keys(): - path = vs_parts_table[obj_id] - elif obj_id in vs_info_table.keys(): - path = vs_info_table[obj_id] - elif obj_id in fs_info_table.keys(): - path = fs_info_table[obj_id] - elif obj_id in reports_table.keys(): - path = reports_table[obj_id] - # remove host name (for multi-user) and dates/times from path for reports - if path is not None: - if 'ModuleOutput' in path: - # skip past the host name (if any) - path = path[path.find('ModuleOutput'):] - if 'BulkExtractor' in path or 'Smirk' in path: - # chop off the last folder (which contains a date/time) - path = path[:path.rfind('\\')] - if 'Reports\\AutopsyTestCase HTML Report' in path: - path = 'Reports\\AutopsyTestCase HTML Report' + Returns: The instance of this class. - if parent_id in files_table.keys(): - parent_path = files_table[parent_id] - elif parent_id in vs_parts_table.keys(): - parent_path = vs_parts_table[parent_id] - elif parent_id in vs_info_table.keys(): - parent_path = vs_info_table[parent_id] - elif parent_id in fs_info_table.keys(): - parent_path = fs_info_table[parent_id] - elif parent_id in images_table.keys(): - parent_path = images_table[parent_id] - elif parent_id in accounts_table.keys(): - parent_path = accounts_table[parent_id] - elif parent_id == 'NULL': - parent_path = "NULL" - - # Remove host name (for multi-user) from parent_path - if parent_path is not None: - if 'ModuleOutput' in parent_path: - # skip past the host name (if any) - parent_path = parent_path[parent_path.find('ModuleOutput'):] + """ + guid_files = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, parent_path, name FROM tsk_files") + guid_vs_parts = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, addr, start FROM tsk_vs_parts", "_") + guid_fs_info = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info", "_") + guid_image_names = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, name FROM tsk_image_names " + "WHERE sequence=0") + guid_os_accounts = TskGuidUtils._get_guid_dict(db_conn, "SELECT os_account_obj_id, addr FROM tsk_os_accounts") + guid_reports = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, path FROM reports") - if path and parent_path: - # Remove object ID from Unalloc file names and regripper output - path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', path) - path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', path) - parent_path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', parent_path) - parent_path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', parent_path) - return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');' - else: - return newLine + '"OBJECT IDS OMITTED", ' + ', '.join(fields_list[2:]) + ');' #omit parent object id and object id when we cant annonymize them - # remove time-based information, ie Test_6/11/14 -> Test - elif report_index: - fields_list[1] = "AutopsyTestCase" - fields_list[2] = "0" - newLine = ('INSERT INTO "reports" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id - return newLine - elif data_source_info_index: - fields_list[1] = "{device id}" - fields_list[4] = "{dateTime}" - newLine = ('INSERT INTO "data_source_info" VALUES(' + ','.join(fields_list) + ');') - return newLine - elif ingest_job_index: - fields_list[2] = "{host_name}" - start_time = int(fields_list[3]) - end_time = int(fields_list[4]) - if (start_time <= end_time): - fields_list[3] = "0" - fields_list[4] = "0" - newLine = ('INSERT INTO "ingest_jobs" VALUES(' + ','.join(fields_list) + ');') - return newLine - elif examiners_index: - fields_list[1] = "{examiner_name}" - newLine = ('INSERT INTO "tsk_examiners" VALUES(' + ','.join(fields_list) + ');') - return newLine - # remove all timing dependent columns from events table - elif events_index: - newLine = ('INSERT INTO "tsk_events" VALUES(' + ','.join(fields_list[1:2]) + ');') - return newLine - # remove object ids from event description table - elif event_description_index: - # replace object ids with information that is deterministic - file_obj_id = int(fields_list[5]) - object_id = int(fields_list[4]) - legacy_artifact_id = 'NULL' - if (fields_list[6] != 'NULL'): - legacy_artifact_id = int(fields_list[6]) - if file_obj_id != 'NULL' and file_obj_id in files_table.keys(): - fields_list[5] = files_table[file_obj_id] - if object_id != 'NULL' and object_id in files_table.keys(): - fields_list[4] = files_table[object_id] - if legacy_artifact_id != 'NULL' and legacy_artifact_id in artifact_table.keys(): - fields_list[6] = artifact_table[legacy_artifact_id] - if fields_list[1] == fields_list[2] and fields_list[1] == fields_list[3]: - fields_list[1] = cleanupEventDescription(fields_list[1]) - fields_list[2] = cleanupEventDescription(fields_list[2]) - fields_list[3] = cleanupEventDescription(fields_list[3]) - newLine = ('INSERT INTO "tsk_event_descriptions" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id - return newLine - elif os_account_index: - newLine = ('INSERT INTO "tsk_os_accounts" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id since value that would be substituted is in diff line already - return newLine - elif os_account_attr_index: - #substitue the account object id for a non changing value - os_account_id = int(fields_list[1]) - fields_list[1] = accounts_table[os_account_id] - #substitue the source object id for a non changing value - source_obj_id = int(fields_list[3]) - if source_obj_id in files_table.keys(): - fields_list[3] = files_table[source_obj_id] - elif source_obj_id in vs_parts_table.keys(): - fields_list[3] = vs_parts_table[source_obj_id] - elif source_obj_id in vs_info_table.keys(): - fields_list[3] = vs_info_table[source_obj_id] - elif source_obj_id in fs_info_table.keys(): - fields_list[3] = fs_info_table[source_obj_id] - elif source_obj_id in images_table.keys(): - fields_list[3] = images_table[source_obj_id] - elif source_obj_id in accounts_table.keys(): - fields_list[3] = accounts_table[source_obj_id] - elif source_obj_id == 'NULL': - fields_list[3] = "NULL" - newLine = ('INSERT INTO "tsk_os_account_attributes" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id - return newLine - elif os_account_instances_index: - os_account_id = int(fields_list[1]) - fields_list[1] = accounts_table[os_account_id] - newLine = ('INSERT INTO "tsk_os_account_instances" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id - return newLine - elif data_artifacts_index: - art_obj_id = int(fields_list[0]) - if art_obj_id in files_table.keys(): - fields_list[0] = files_table[art_obj_id] - else: - fields_list[0] = 'Artifact Object ID Omitted' - account_obj_id = int(fields_list[1]) - if account_obj_id in files_table.keys(): - fields_list[1] = files_table[account_obj_id] - else: - fields_list[1] = 'Account Object ID Omitted' - newLine = ('INSERT INTO "tsk_data_artifacts" VALUES(' + ','.join(fields_list[:]) + ');') # remove ids - return newLine + objid_artifacts = TskGuidUtils._get_guid_dict(db_conn, + "SELECT " + "blackboard_artifacts.artifact_obj_id, " + "blackboard_artifact_types.type_name FROM " + "blackboard_artifacts INNER JOIN blackboard_artifact_types " + "ON blackboard_artifact_types.artifact_type_id = " + "blackboard_artifacts.artifact_type_id") + + cursor = db_conn.cursor() + cursor.execute("SELECT obj_id, par_obj_id FROM tsk_objects") + par_obj_objects = dict([(row[0], row[1]) for row in cursor]) + + guid_artifacts = {} + for k, v in objid_artifacts.items(): + if k in par_obj_objects: + par_obj_id = par_obj_objects[k] + + # check for artifact parent in files, images, reports + path = '' + for artifact_parent_dict in [guid_files, guid_image_names, guid_reports]: + if par_obj_id in artifact_parent_dict: + path = artifact_parent_dict[par_obj_id] + break + + guid_artifacts[par_obj_id] = "/".join([path, v]) + + return TskGuidUtils( + obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts, + **guid_fs_info, **guid_fs_info, **guid_image_names}, + artifact_types=objid_artifacts) + + artifact_types: Dict[int, str] + obj_id_guids: Dict[int, any] + + def __init__(self, obj_id_guids: Dict[int, any], artifact_types: Dict[int, str]): + """ + Main constructor. + Args: + obj_id_guids: A dictionary mapping object ids to their guids. + artifact_types: A dictionary mapping artifact ids to their types. + """ + self.artifact_types = artifact_types + self.obj_id_guids = obj_id_guids + + def get_guid_for_objid(self, obj_id, omitted_value: Union[str, None] = 'Object ID Omitted'): + """ + Returns the guid for the specified object id or returns omitted value if the object id is not found. + Args: + obj_id: The object id. + omitted_value: The value if no object id mapping is found. + + Returns: The relevant guid or the omitted_value. + + """ + return self.obj_id_guids[obj_id] if obj_id in self.obj_id_guids else omitted_value + + def get_guid_for_file_objid(self, obj_id, omitted_value: Union[str, None] = 'Object ID Omitted'): + # TODO this is just an alias; could probably be removed + return self.get_guid_for_objid(obj_id, omitted_value) + + def get_guid_for_accountid(self, account_id, omitted_value: Union[str, None] = 'Account ID Omitted'): + # TODO this is just an alias; could probably be removed + return self.get_guid_for_objid(account_id, omitted_value) + + def get_guid_for_artifactid(self, artifact_id, omitted_value: Union[str, None] = 'Artifact ID Omitted'): + """ + Returns the guid for the specified artifact id or returns omitted value if the artifact id is not found. + Args: + artifact_id: The artifact id. + omitted_value: The value if no object id mapping is found. + + Returns: The relevant guid or the omitted_value. + """ + return self.artifact_types[artifact_id] if artifact_id in self.artifact_types else omitted_value + + +class NormalizeRow: + """ + Given a dictionary representing a row (i.e. column name mapped to value), returns a normalized representation of + that row such that the values should be less volatile from run to run. + """ + row_masker: Callable[[TskGuidUtils, Dict[str, any]], Dict[str, any]] + + def __init__(self, row_masker: Callable[[TskGuidUtils, Dict[str, any]], Union[Dict[str, any], None]]): + """ + Main constructor. + Args: + row_masker: The function to be called to mask the specified row. + """ + self.row_masker = row_masker + + def normalize(self, guid_util: TskGuidUtils, row: Dict[str, any]) -> Union[Dict[str, any], None]: + """ + Normalizes a row such that the values should be less volatile from run to run. + Args: + guid_util: The TskGuidUtils instance providing guids for volatile ids. + row: The row values mapping column name to value. + + Returns: The normalized row or None if the row should be ignored. + + """ + return self.row_masker(guid_util, row) + + +class NormalizeColumns(NormalizeRow): + """ + Utility for normalizing specific column values of a row so they are not volatile values that will change from run + to run. + """ + + @classmethod + def _normalize_col_vals(cls, + col_mask: Dict[str, Union[any, Callable[[TskGuidUtils, any], any]]], + guid_util: TskGuidUtils, + row: Dict[str, any]): + """ + Normalizes column values for each column rule provided. + Args: + col_mask: A dictionary mapping columns to either the replacement value or a function to retrieve the + replacement value given the TskGuidUtils instance and original value as arguments. + guid_util: The TskGuidUtil used to provide guids for volatile values. + row: The dictionary representing the row mapping column names to values. + + Returns: The new row representation. + + """ + row_copy = row.copy() + for key, val in col_mask.items(): + # only replace values if present in row + if key in row_copy: + # if a column replacing function, call with original value + if isinstance(val, Callable): + row_copy[key] = val(guid_util, row[key]) + # otherwise, just replace with mask value + else: + row_copy[key] = val + + return row_copy + + def __init__(self, col_mask: Dict[str, Union[any, Callable[[any], any]]]): + super().__init__(lambda guid_util, row: NormalizeColumns._normalize_col_vals(col_mask, guid_util, row)) + + +def get_path_segs(path: Union[str, None]) -> Union[List[str], None]: + """ + Breaks a path string into its folders and filenames. + Args: + path: The path string or None. + + Returns: The path segments or None. + + """ + if path: + return list(filter(lambda x: len(x.strip()) > 0, [path for path in os.path.normpath(path).split(os.sep)])) else: - return line - -def cleanupEventDescription(description): - test = re.search("^'\D+:\d+'$", description) - if test is not None: - return re.sub(":\d+", ":", description) + return None + + +def index_of(lst, search_item) -> int: + """ + Returns the index of the item in the list or -1. + Args: + lst: The list. + search_item: The item to search for. + + Returns: The index in the list of the item or -1. + + """ + for idx, item in enumerate(lst): + if item == search_item: + return idx + + return -1 + + +def get_sql_insert_value(val) -> str: + """ + Returns the value that would appear in a sql insert statement (i.e. string becomes 'string', None becomes NULL) + Args: + val: The original value. + + Returns: The sql insert equivalent value. + + """ + if val is None: + return "NULL" + + if isinstance(val, str): + escaped_val = val.replace('\n', '\\n').replace("'", "''") + return f"'{escaped_val}'" + + return str(val) + + +def get_sqlite_table_columns(conn) -> Dict[str, List[str]]: + """ + Retrieves a dictionary mapping table names to a list of all the columns for that table + where the columns are in ordinal value. + Args: + conn: The database connection. + + Returns: A dictionary of the form { table_name: [col_name1, col_name2...col_nameN] } + + """ + cur = conn.cursor() + cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'") + tables = list([table[0] for table in cur.fetchall()]) + cur.close() + + to_ret = {} + for table in tables: + cur = conn.cursor() + cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table]) + to_ret[table] = list([col[0] for col in cur.fetchall()]) + + return to_ret + + +def get_pg_table_columns(conn) -> Dict[str, List[str]]: + """ + Returns a dictionary mapping table names to the list of their columns in ordinal order. + Args: + conn: The pg database connection. + + Returns: The dictionary of tables mapped to a list of their ordinal-orderd column names. + """ + cursor = conn.cursor() + cursor.execute(""" + SELECT cols.table_name, cols.column_name + FROM information_schema.columns cols + WHERE cols.column_name IS NOT NULL + AND cols.table_name IS NOT NULL + AND cols.table_name IN ( + SELECT tables.tablename FROM pg_catalog.pg_tables tables + WHERE LOWER(schemaname) = 'public' + ) + ORDER by cols.table_name, cols.ordinal_position; + """) + mapping = {} + for row in cursor: + mapping.setdefault(row[0], []).append(row[1]) + + cursor.close() + return mapping + + +def sanitize_schema(original: str) -> str: + """ + Sanitizes sql script representing table/index creations. + Args: + original: The original sql schema creation script. + + Returns: The sanitized schema. + """ + sanitized_lines = [] + dump_line = '' + for line in original.splitlines(): + line = line.strip('\r\n ') + # It's comment or alter statement or catalog entry or set idle entry or empty line + if (line.startswith('--') or line.lower().startswith( + 'alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): + continue + elif line.endswith(';'): # Statement not finished + dump_line += line + sanitized_lines.append(dump_line) + dump_line = '' + else: + dump_line += line + + if len(dump_line.strip()) > 0: + sanitized_lines.append(dump_line) + + return "\n".join(sanitized_lines) + + +def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str): + """ + Gets the schema to be added to the dump text from the postgres database. + Args: + pg_username: The postgres user name. + pg_pword: The postgres password. + pg_host: The postgres host. + pg_port: The postgres port. + + Returns: The normalized schema. + + """ + os.environ['PGPASSWORD'] = pg_pword + pg_dump = ["pg_dump", "--inserts", "-U", pg_username, "-h", pg_host, "-p", pg_port, + "-T", "blackboard_artifacts", "-T", "blackboard_attributes"] + output = subprocess.check_output(pg_dump) + return sanitize_schema(output) + + +def get_sqlite_schema(db_conn): + """ + Gets the schema to be added to the dump text from the sqlite database. + Args: + db_conn: The database connection. + + Returns: The normalized schema. + + """ + cursor = db_conn.cursor() + query = "SELECT sql FROM sqlite_master " \ + "WHERE type IN ('table', 'index') AND sql IS NOT NULL " \ + "ORDER BY type DESC, tbl_name ASC" + + cursor.execute(query) + schema = '\n'.join([str(row[0]) + ';' for row in cursor]) + return sanitize_schema(schema) + + +def _mask_event_desc(desc: str) -> str: + """ + Masks dynamic event descriptions of the form ":" so the artifact id is no longer + present. + Args: + desc: The original description. + + Returns: The normalized description. + + """ + match = re.search(r"^\s*(\D+):\d+\s*$", desc.strip()) + if match: + return f"{match.group(1)}:" + + return desc + + +def normalize_tsk_event_descriptions(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: + """ + Normalizes event description rows masking possibly changing column values. + Args: + guid_util: Provides guids for ids that may change from run to run. + row: A dictionary mapping column names to values. + + Returns: The normalized event description row. + """ + row_copy = row.copy() + # replace object ids with information that is deterministic + row_copy['content_obj_id'] = guid_util.get_guid_for_file_objid(row['content_obj_id']) + row_copy['data_source_obj_id'] = guid_util.get_guid_for_file_objid(row['data_source_obj_id']) + row_copy['artifact_id'] = guid_util.get_guid_for_artifactid(row['artifact_id']) + + if row['full_description'] == row['med_description'] == row['short_description']: + row_copy['full_description'] = _mask_event_desc(row['full_description']) + row_copy['med_description'] = _mask_event_desc(row['med_description']) + row_copy['short_description'] = _mask_event_desc(row['short_description']) + + return row_copy + + +def normalize_ingest_jobs(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: + """ + Normalizes ingest jobs table rows. + Args: + guid_util: Provides guids for ids that may change from run to run. + row: A dictionary mapping column names to values. + + Returns: The normalized ingest job row. + + """ + row_copy = row.copy() + row_copy['host_name'] = "{host_name}" + + start_time = row['start_date_time'] + end_time = row['end_date_time'] + if start_time <= end_time: + row_copy['start_date_time'] = 0 + row_copy['end_date_time'] = 0 + + return row_copy + + +def normalize_unalloc_files(path_str: Union[str, None]) -> Union[str, None]: + """ + Normalizes a path string removing timestamps from unalloc files. + Args: + path_str: The original path string. + + Returns: The path string where timestamps are removed from unalloc strings. + + """ + return re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str) if path_str else None + + +def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]: + """ + Normalizes a path string removing timestamps from regripper files. + Args: + path_str: The original path string. + + Returns: The path string where timestamps are removed from regripper paths. + + """ + return re.sub(r'regripper\-[0-9]+\-full', 'regripper-full', path_str) if path_str else None + + +def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: + """ + Normalizes files table rows. + Args: + guid_util: Provides guids for ids that may change from run to run. + row: A dictionary mapping column names to values. + + Returns: The normalized files table row. + + """ + # Ignore TIFF size and hash if extracted from PDFs. + # See JIRA-6951 for more details. + row_copy = row.copy() + if row['extension'] and row['extension'].strip().lower() == 'tif' and \ + row['parent_path'] and row['parent_path'].strip().lower().endswith('.pdf/'): + row_copy['size'] = "SIZE_IGNORED" + row_copy['md5'] = "MD5_IGNORED" + row_copy['sha256'] = "SHA256_IGNORED" + + row_copy['obj_id'] = MASKED_OBJ_ID + row_copy['os_account_obj_id'] = 'MASKED_OS_ACCOUNT_OBJ_ID' + row_copy['parent_path'] = normalize_unalloc_files(row['parent_path']) + row_copy['name'] = normalize_unalloc_files(row['name']) + return row_copy + + +def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: + """ + Normalizes file path table rows. + Args: + guid_util: Provides guids for ids that may change from run to run. + row: A dictionary mapping column names to values. + + Returns: The normalized file path table row. + """ + row_copy = row.copy() + path = row['path'] + if path: + path_parts = get_path_segs(path) + module_output_idx = index_of(path_parts, 'ModuleOutput') + if module_output_idx >= 0: + # remove everything up to and including ModuleOutput if ModuleOutput present + path_parts = path_parts[module_output_idx:] + if len(path_parts) > 1 and path_parts[0] == 'Embedded File Extractor': + match = re.match(r'^(.+?)_[0-9]*$', path_parts[1]) + if match: + path_parts[1] = match.group(1) + + row_copy['path'] = os.path.join(*path_parts) if len(path_parts) > 0 else '/' + + row_copy['obj_id'] = guid_util.get_guid_for_file_objid(row['obj_id']) + return row_copy + + +def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: + """ + Normalizes object table rows. + Args: + guid_util: Provides guids for ids that may change from run to run. + row: A dictionary mapping column names to values. + + Returns: The normalized object table row. + """ + parent_id = row['par_obj_id'] + path = guid_util.get_guid_for_objid(row['obj_id'], omitted_value=None) + row_copy = row.copy() + + # remove host name (for multi-user) and dates/times from path for reports + if path is not None: + path_parts = get_path_segs(path) + module_output_idx = index_of(path_parts, 'ModuleOutput') + if module_output_idx >= 0: + # remove everything up to and including ModuleOutput if ModuleOutput present + path_parts = path_parts[module_output_idx:] + + if "BulkExtractor" in path_parts or "Smirk" in path_parts: + # chop off the last folder (which contains a date/time) + path_parts = path_parts[:-1] + + for idx in range(0, len(path_parts) - 1): + if path_parts[idx] == "Reports" and path_parts[idx + 1] == "AutopsyTestCase HTML Report": + path_parts = ["Reports", "AutopsyTestCase HTML Report"] + + path = os.path.join(*path_parts) if len(path_parts) > 0 else '/' + + parent_path = guid_util.get_guid_for_objid(parent_id, omitted_value=None) + + # Remove host name (for multi-user) from parent_path + if parent_path is not None: + parent_path_parts = get_path_segs(parent_path) + module_output_idx = index_of(parent_path_parts, 'ModuleOutput') + if module_output_idx >= 0: + parent_path_parts = parent_path_parts[module_output_idx:] + + parent_path = os.path.join(*parent_path_parts) if len(parent_path_parts) > 0 else '/' + + # handle regripper and unalloc file replacements + if path and parent_path: + row_copy['obj_id'] = normalize_regripper_files(normalize_unalloc_files(path)) + row_copy['par_obj_id'] = normalize_regripper_files(normalize_unalloc_files(parent_path)) else: - return description + row_copy['obj_id'] = MASKED_OBJ_ID + row_copy['par_obj_id'] = "MASKED_PARENT_OBJ_ID" -def getAssociatedArtifactType(cur, artifact_id, isMultiUser): - if isMultiUser: - cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",[artifact_id]) - else: - cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",[artifact_id]) + return row_copy - info = cur.fetchone() - - return "File path: " + info[0] + " Artifact Type: " + info[1] -def build_id_files_table(db_cursor, isPostgreSQL): - """Build the map of object ids to file paths. +MASKED_OBJ_ID = "MASKED_OBJ_ID" +MASKED_ID = "MASKED_ID" - Args: - db_cursor: the database cursor +IGNORE_TABLE = "IGNORE_TABLE" + +TableNormalization = Union[IGNORE_TABLE, NormalizeRow] + +""" +This dictionary maps tables where data should be specially handled to how they should be handled. +""" +TABLE_NORMALIZATIONS: Dict[str, TableNormalization] = { + "image_gallery_groups_seen": IGNORE_TABLE, + "blackboard_artifacts": IGNORE_TABLE, + "blackboard_attributes": IGNORE_TABLE, + "tsk_files": NormalizeRow(normalize_tsk_files), + "tsk_vs_parts": NormalizeColumns({ + "obj_id": MASKED_OBJ_ID + }), + "image_gallery_groups": NormalizeColumns({ + "obj_id": MASKED_OBJ_ID + }), + "tsk_files_path": NormalizeRow(normalize_tsk_files_path), + "tsk_file_layout": NormalizeColumns({ + "obj_id": lambda guid_util, col: guid_util.get_guid_for_file_objid(col) + }), + "tsk_objects": NormalizeRow(normalize_tsk_objects), + "reports": NormalizeColumns({ + "obj_id": MASKED_OBJ_ID, + "path": "AutopsyTestCase", + "crtime": 0 + }), + "data_source_info": NormalizeColumns({ + "device_id": "{device id}", + "added_date_time": "{dateTime}" + }), + "ingest_jobs": NormalizeRow(normalize_ingest_jobs), + "tsk_examiners": NormalizeColumns({ + "login_name": "{examiner_name}" + }), + "tsk_events": NormalizeColumns({ + "event_id": "MASKED_EVENT_ID", + "event_description_id": None, + "time": None, + }), + "tsk_event_descriptions": NormalizeRow(normalize_tsk_event_descriptions), + "tsk_os_accounts": NormalizeColumns({ + "os_account_obj_id": MASKED_OBJ_ID + }), + "tsk_os_account_attributes": NormalizeColumns({ + "id": MASKED_ID, + "os_account_obj_id": lambda guid_util, col: guid_util.get_guid_for_accountid(col), + "source_obj_id": lambda guid_util, col: guid_util.get_guid_for_objid(col) + }), + "tsk_os_account_instances": NormalizeColumns({ + "id": MASKED_ID, + "os_account_obj_id": lambda guid_util, col: guid_util.get_guid_for_accountid(col) + }), + "tsk_data_artifacts": NormalizeColumns({ + "artifact_obj_id": + lambda guid_util, col: guid_util.get_guid_for_file_objid(col, omitted_value="Artifact Object ID Omitted"), + "os_account_obj_id": + lambda guid_util, col: guid_util.get_guid_for_file_objid(col, omitted_value="Account Object ID Omitted"), + }) +} + + +def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, column_names: List[str], + normalizer: Union[TableNormalization, None] = None): """ - # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary - # with the object id as the key and the full file path (parent + name) as the value - mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, parent_path, name FROM tsk_files")]) - return mapping - -def build_id_vs_parts_table(db_cursor, isPostgreSQL): - """Build the map of object ids to vs_parts. - + Outputs rows of a file as their normalized values (where values should not change from run to run). Args: - db_cursor: the database cursor + guid_utils: Provides guids to replace values that would potentially change from run to run. + output_file: The file where the normalized dump will be written. + db_conn: The database connection. + table: The name of the table. + column_names: The name of the columns in the table in ordinal order. + normalizer: The normalizer (if any) to use so that data is properly normalized. """ - # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary - # with the object id as the key and (addr + start) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, addr, start FROM tsk_vs_parts")]) - return mapping + if normalizer == IGNORE_TABLE: + return -def build_id_vs_info_table(db_cursor, isPostgreSQL): - """Build the map of object ids to vs_info. + cursor = db_conn.cursor() - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary - # with the object id as the key and (vs_type + img_offset) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")]) - return mapping + joined_columns = ",".join([col for col in column_names]) + cursor.execute(f"SELECT {joined_columns} FROM {table}") + for row in cursor: + if len(row) != len(column_names): + print( + f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}") + continue - -def build_id_fs_info_table(db_cursor, isPostgreSQL): - """Build the map of object ids to fs_info. + row_dict = {} + for col_idx in range(0, len(column_names)): + row_dict[column_names[col_idx]] = row[col_idx] - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary - # with the object id as the key and (img_offset + fs_type) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")]) - return mapping + if normalizer and isinstance(normalizer, NormalizeRow): + row_masker: NormalizeRow = normalizer + row_dict = row_masker.normalize(guid_utils, row_dict) -def build_id_objects_table(db_cursor, isPostgreSQL): - """Build the map of object ids to par_id. + if row_dict is not None: + # entries = [] + # for idx in range(0, len(column_names)): + # column = column_names[idx] + # value = get_sql_insert_value(row_dict[column] if column in row_dict else None) + # entries.append((column, value)) + # insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries]) + # insert_statement = f"{table}: {{{insert_values}}}\n" - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary - # with the object id as the key and par_obj_id, type as the value - mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")]) - return mapping + values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names) + insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n' + output_file.write(insert_statement) -def build_id_image_names_table(db_cursor, isPostgreSQL): - """Build the map of object ids to name. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id and name then create a tuple in the dictionary - # with the object id as the key and name, type as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")]) - #data_sources which are logical file sets will be found in the files table - return mapping - -def build_id_artifact_types_table(db_cursor, isPostgreSQL): - """Build the map of object ids to artifact ids. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary - # with the object id as the key and artifact type as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")]) - return mapping - -def build_id_legacy_artifact_types_table(db_cursor, isPostgreSQL): - """Build the map of legacy artifact ids to artifact type. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the legacy artifact id then create a tuple in the dictionary - # with the artifact id as the key and artifact type as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")]) - return mapping - -def build_id_reports_table(db_cursor, isPostgreSQL): - """Build the map of report object ids to report path. - - Args: - db_cursor: the database cursor - """ - # for each row in the reports table in the db, create an obj_id -> path map - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, path FROM reports")]) - return mapping - -def build_id_accounts_table(db_cursor, isPostgreSQL): - """Build the map of object ids to OS account SIDs. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id and account SID then creates a tuple in the dictionary - # with the object id as the key and the OS Account's SID as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT os_account_obj_id, addr FROM tsk_os_accounts")]) - return mapping - -def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table, accounts_table): - """Build the map of object ids to artifact ids. - - Args: - files_table: obj_id, path - objects_table: obj_id, par_obj_id, type - artifacts_table: obj_id, artifact_type_name - reports_table: obj_id, path - images_table: obj_id, name - accounts_table: obj_id, addr - """ - # make a copy of files_table and update it with new data from artifacts_table and reports_table - mapping = files_table.copy() - for k, v in objects_table.items(): - path = "" - if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id - if k in reports_table.keys(): # For a report we use the report path - par_obj_id = v[0] - if par_obj_id is not None: - mapping[k] = reports_table[k] - elif k in artifacts_table.keys(): # For an artifact we use it's par_obj_id's path+name plus it's artifact_type name - par_obj_id = v[0] # The parent of an artifact can be a file or a report - if par_obj_id in mapping.keys(): - path = mapping[par_obj_id] - elif par_obj_id in reports_table.keys(): - path = reports_table[par_obj_id] - elif par_obj_id in images_table.keys(): - path = images_table[par_obj_id] - mapping[k] = path + "/" + artifacts_table[k] - elif k in accounts_table.keys(): # For an OS Account object ID we use its addr field which is the account SID - mapping[k] = accounts_table[k] - elif v[0] not in mapping.keys(): - if v[0] in artifacts_table.keys(): - par_obj_id = objects_table[v[0]] - path = mapping[par_obj_id] - mapping[k] = path + "/" + artifacts_table[v[0]] - return mapping def db_connect(db_file, isMultiUser, pgSettings=None): if isMultiUser: # use PostgreSQL @@ -867,12 +1047,6 @@ def db_connect(db_file, isMultiUser, pgSettings=None): os.chmod (backup_db_file, 0o777) return sqlite3.connect(backup_db_file), backup_db_file -def sql_select_execute(cursor, isPostgreSQL, sql_stmt): - if isPostgreSQL: - cursor.execute(sql_stmt) - return cursor.fetchall() - else: - return cursor.execute(sql_stmt) def main(): try: diff --git a/test/script/tskdbdiff2.py b/test/script/tskdbdiff2.py deleted file mode 100644 index 7ff02d0c30..0000000000 --- a/test/script/tskdbdiff2.py +++ /dev/null @@ -1,969 +0,0 @@ -# Requires python3 - -import re -import sqlite3 -import subprocess -import shutil -import os -import codecs -import datetime -import sys -from typing import Dict, List - -import psycopg2 -import psycopg2.extras -import socket -import csv - -class TskDbDiff(object): - """Compares two TSK/Autospy SQLite databases. - - Attributes: - gold_artifacts: - autopsy_artifacts: - gold_attributes: - autopsy_attributes: - gold_objects: - autopsy_objects: - artifact_comparison: - attribute_comparision: - report_errors: a listof_listof_String, the error messages that will be - printed to screen in the run_diff method - passed: a boolean, did the diff pass? - autopsy_db_file: - gold_db_file: - """ - def __init__(self, output_db, gold_db, output_dir=None, gold_bb_dump=None, gold_dump=None, verbose=False, isMultiUser=False, pgSettings=None): - """Constructor for TskDbDiff. - - Args: - output_db_path: path to output database (non-gold standard) - gold_db_path: path to gold database - output_dir: (optional) Path to folder where generated files will be put. - gold_bb_dump: (optional) path to file where the gold blackboard dump is located - gold_dump: (optional) path to file where the gold non-blackboard dump is located - verbose: (optional) a boolean, if true, diff results are sent to stdout. - """ - - self.output_db_file = output_db - self.gold_db_file = gold_db - self.output_dir = output_dir - self.gold_bb_dump = gold_bb_dump - self.gold_dump = gold_dump - self._generate_gold_dump = False - self._generate_gold_bb_dump = False - self._bb_dump_diff = "" - self._dump_diff = "" - self._bb_dump = "" - self._dump = "" - self.verbose = verbose - self.isMultiUser = isMultiUser - self.pgSettings = pgSettings - - if self.isMultiUser and not self.pgSettings: - print("Missing PostgreSQL database connection settings data.") - sys.exit(1) - - if self.gold_bb_dump is None: - self._generate_gold_bb_dump = True - if self.gold_dump is None: - self._generate_gold_dump = True - - def run_diff(self): - """Compare the databases. - - Raises: - TskDbDiffException: if an error occurs while diffing or dumping the database - """ - - self._init_diff() - id_obj_path_table = -1 - # generate the gold database dumps if necessary - if self._generate_gold_dump: - id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.gold_db_file, self.gold_dump, self.isMultiUser, self.pgSettings) - if self._generate_gold_bb_dump: - TskDbDiff._dump_output_db_bb(self.gold_db_file, self.gold_bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table) - - # generate the output database dumps (both DB and BB) - id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.output_db_file, self._dump, self.isMultiUser, self.pgSettings) - TskDbDiff._dump_output_db_bb(self.output_db_file, self._bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table) - - # Compare non-BB - dump_diff_pass = self._diff(self._dump, self.gold_dump, self._dump_diff) - - # Compare BB - bb_dump_diff_pass = self._diff(self._bb_dump, self.gold_bb_dump, self._bb_dump_diff) - - self._cleanup_diff() - return dump_diff_pass, bb_dump_diff_pass - - - def _init_diff(self): - """Set up the necessary files based on the arguments given at construction""" - if self.output_dir is None: - # No stored files - self._bb_dump = TskDbDiff._get_tmp_file("BlackboardDump", ".txt") - self._bb_dump_diff = TskDbDiff._get_tmp_file("BlackboardDump-Diff", ".txt") - self._dump = TskDbDiff._get_tmp_file("DBDump", ".txt") - self._dump_diff = TskDbDiff._get_tmp_file("DBDump-Diff", ".txt") - else: - self._bb_dump = os.path.join(self.output_dir, "BlackboardDump.txt") - self._bb_dump_diff = os.path.join(self.output_dir, "BlackboardDump-Diff.txt") - self._dump = os.path.join(self.output_dir, "DBDump.txt") - self._dump_diff = os.path.join(self.output_dir, "DBDump-Diff.txt") - - # Sorting gold before comparing (sort behaves differently in different environments) - new_bb = TskDbDiff._get_tmp_file("GoldBlackboardDump", ".txt") - new_db = TskDbDiff._get_tmp_file("GoldDBDump", ".txt") - if self.gold_bb_dump is not None: - srtcmdlst = ["sort", self.gold_bb_dump, "-o", new_bb] - subprocess.call(srtcmdlst) - srtcmdlst = ["sort", self.gold_dump, "-o", new_db] - subprocess.call(srtcmdlst) - self.gold_bb_dump = new_bb - self.gold_dump = new_db - - - def _cleanup_diff(self): - if self.output_dir is None: - #cleanup temp files - os.remove(self._dump) - os.remove(self._bb_dump) - if os.path.isfile(self._dump_diff): - os.remove(self._dump_diff) - if os.path.isfile(self._bb_dump_diff): - os.remove(self._bb_dump_diff) - - if self.gold_bb_dump is None: - os.remove(self.gold_bb_dump) - os.remove(self.gold_dump) - - - def _diff(self, output_file, gold_file, diff_path): - """Compare two text files. - - Args: - output_file: a pathto_File, the latest text file - gold_file: a pathto_File, the gold text file - diff_path: The file to write the differences to - Returns False if different - """ - - if (not os.path.isfile(output_file)): - return False - - if (not os.path.isfile(gold_file)): - return False - - # It is faster to read the contents in and directly compare - output_data = codecs.open(output_file, "r", "utf_8").read() - gold_data = codecs.open(gold_file, "r", "utf_8").read() - if (gold_data == output_data): - return True - - # If they are different, invoke 'diff' - diff_file = codecs.open(diff_path, "wb", "utf_8") - # Gold needs to be passed in as 1st arg and output as 2nd - dffcmdlst = ["diff", gold_file, output_file] - subprocess.call(dffcmdlst, stdout = diff_file) - - # create file path for gold files inside output folder. In case of diff, both gold and current run files - # are available in the report output folder. Prefix Gold- is added to the filename. - gold_file_in_output_dir = output_file[:output_file.rfind("/")] + "/Gold-" + output_file[output_file.rfind("/")+1:] - shutil.copy(gold_file, gold_file_in_output_dir) - - return False - - - def _dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table): - """Dumps sorted text results to the given output location. - - Smart method that deals with a blackboard comparison to avoid issues - with different IDs based on when artifacts were created. - - Args: - db_file: a pathto_File, the output database. - bb_dump_file: a pathto_File, the sorted dump file to write to - """ - - unsorted_dump = TskDbDiff._get_tmp_file("dump_data", ".txt") - if isMultiUser: - conn, unused_db = db_connect(db_file, isMultiUser, pgSettings) - artifact_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) - else: # Use Sqlite - conn = sqlite3.connect(db_file) - conn.text_factory = lambda x: x.decode("utf-8", "ignore") - conn.row_factory = sqlite3.Row - artifact_cursor = conn.cursor() - # Get the list of all artifacts (along with type and associated file) - # @@@ Could add a SORT by parent_path in here since that is how we are going to later sort it. - artifact_cursor.execute("SELECT tsk_files.parent_path, tsk_files.name, blackboard_artifact_types.display_name, blackboard_artifacts.artifact_id FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id") - database_log = codecs.open(unsorted_dump, "wb", "utf_8") - row = artifact_cursor.fetchone() - appnd = False - counter = 0 - artifact_count = 0 - artifact_fail = 0 - - # Cycle through artifacts - try: - while (row != None): - - # File Name and artifact type - # Remove parent object ID from Unalloc file name - normalizedName = re.sub('^Unalloc_[0-9]+_', 'Unalloc_', row["name"]) - if(row["parent_path"] != None): - database_log.write(row["parent_path"] + normalizedName + ' ') - else: - database_log.write(normalizedName + ' ') - - if isMultiUser: - attribute_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) - else: - attribute_cursor = conn.cursor() - looptry = True - artifact_count += 1 - try: - art_id = "" - art_id = str(row["artifact_id"]) - - # Get attributes for this artifact - if isMultiUser: - attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id = %s ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id]) - else: - attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id =? ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id]) - - attributes = attribute_cursor.fetchall() - - # Print attributes - if (len(attributes) == 0): - # @@@@ This should be - database_log.write(' \n') - row = artifact_cursor.fetchone() - continue - - src = attributes[0][0] - for attr in attributes: - numvals = 0 - for x in range(3, 6): - if(attr[x] != None): - numvals += 1 - if(numvals > 1): - msg = "There were too many values for attribute type: " + attr["display_name"] + " for artifact with id #" + str(row["artifact_id"]) + ".\n" - - if(not attr["source"] == src): - msg = "There were inconsistent sources for artifact with id #" + str(row["artifact_id"]) + ".\n" - - try: - if attr["value_type"] == 0: - attr_value_as_string = str(attr["value_text"]) - elif attr["value_type"] == 1: - attr_value_as_string = str(attr["value_int32"]) - elif attr["value_type"] == 2: - attr_value_as_string = str(attr["value_int64"]) - if attr["attribute_type_id"] == 36 and id_obj_path_table != -1 and int(attr_value_as_string) > 0: #normalize positive TSK_PATH_IDs from being object id to a path if the obj_id_path_table was generated - attr_value_as_string = id_obj_path_table[int(attr_value_as_string)] - elif attr["value_type"] == 3: - attr_value_as_string = "%20.10f" % float((attr["value_double"])) #use exact format from db schema to avoid python auto format double value to (0E-10) scientific style - elif attr["value_type"] == 4: - attr_value_as_string = "bytes" - elif attr["value_type"] == 5: - attr_value_as_string = str(attr["value_int64"]) - if attr["display_name"] == "Associated Artifact": - attr_value_as_string = getAssociatedArtifactType(attribute_cursor, attr_value_as_string, isMultiUser) - patrn = re.compile("[\n\0\a\b\r\f]") - attr_value_as_string = re.sub(patrn, ' ', attr_value_as_string) - if attr["source"] == "Keyword Search" and attr["display_name"] == "Keyword Preview": - attr_value_as_string = "" - database_log.write('') - except IOError as e: - print("IO error") - raise TskDbDiffException("Unexpected IO error while writing to database log." + str(e)) - - except sqlite3.Error as e: - msg = "Attributes in artifact id (in output DB)# " + str(row["artifact_id"]) + " encountered an error: " + str(e) +" .\n" - print("Attributes in artifact id (in output DB)# ", str(row["artifact_id"]), " encountered an error: ", str(e)) - print() - looptry = False - artifact_fail += 1 - database_log.write('Error Extracting Attributes') - database_log.close() - raise TskDbDiffException(msg) - finally: - attribute_cursor.close() - - - # @@@@ This should be - database_log.write(' \n') - row = artifact_cursor.fetchone() - - if(artifact_fail > 0): - msg ="There were " + str(artifact_count) + " artifacts and " + str(artifact_fail) + " threw an exception while loading.\n" - except Exception as e: - raise TskDbDiffException("Unexpected error while dumping blackboard database: " + str(e)) - finally: - database_log.close() - artifact_cursor.close() - conn.close() - - # Now sort the file - srtcmdlst = ["sort", unsorted_dump, "-o", bb_dump_file] - subprocess.call(srtcmdlst) - - - # for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items(): - # for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items(): - # print(f"{key}: {val}") - - - - - - def _dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings): - """Dumps a database to a text file. - - Does not dump the artifact and attributes. - - Args: - db_file: a pathto_File, the database file to dump - dump_file: a pathto_File, the location to dump the non-blackboard database items - """ - - conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings) - id_files_table = build_id_files_table(conn.cursor(), isMultiUser) - id_vs_parts_table = build_id_vs_parts_table(conn.cursor(), isMultiUser) - id_vs_info_table = build_id_vs_info_table(conn.cursor(), isMultiUser) - id_fs_info_table = build_id_fs_info_table(conn.cursor(), isMultiUser) - id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser) - id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser) - id_legacy_artifact_types = build_id_legacy_artifact_types_table(conn.cursor(), isMultiUser) - id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser) - id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser) - id_accounts_table = build_id_accounts_table(conn.cursor(), isMultiUser) - id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table, id_accounts_table) - - if isMultiUser: # Use PostgreSQL - os.environ['PGPASSWORD']=pgSettings.password - pgDump = ["pg_dump", "--inserts", "-U", pgSettings.username, "-h", pgSettings.pgHost, "-p", pgSettings.pgPort, "-d", db_file, "-E", "utf-8", "-T", "blackboard_artifacts", "-T", "blackboard_attributes", "-f", "postgreSQLDump.sql"] - subprocess.call(pgDump) - postgreSQL_db = codecs.open("postgreSQLDump.sql", "r", "utf-8") - # Write to the database dump - with codecs.open(dump_file, "wb", "utf_8") as db_log: - dump_line = '' - for line in postgreSQL_db: - line = line.strip('\r\n ') - # Deal with pg_dump result file - if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line - continue - elif not line.endswith(';'): # Statement not finished - dump_line += line - continue - else: - dump_line += line - if 'INSERT INTO image_gallery_groups_seen' in dump_line: - dump_line = '' - continue; - dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table) - db_log.write('%s\n' % dump_line) - dump_line = '' - postgreSQL_db.close() - else: # use Sqlite - # Delete the blackboard tables - conn.text_factory = lambda x: x.decode("utf-8", "ignore") - conn.execute("DROP TABLE blackboard_artifacts") - conn.execute("DROP TABLE blackboard_attributes") - # Write to the database dump - with codecs.open(dump_file, "wb", "utf_8") as db_log: - for line in conn.iterdump(): - if 'INSERT INTO "image_gallery_groups_seen"' in line: - continue - line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table) - db_log.write('%s\n' % line) - # Now sort the file - srtcmdlst = ["sort", dump_file, "-o", dump_file] - subprocess.call(srtcmdlst) - - conn.close() - # cleanup the backup - if backup_db_file: - os.remove(backup_db_file) - return id_obj_path_table - - - def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings): - """Dumps the given database to text files for later comparison. - - Args: - db_file: a pathto_File, the database file to dump - dump_file: a pathto_File, the location to dump the non-blackboard database items - bb_dump_file: a pathto_File, the location to dump the blackboard database items - """ - id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings) - TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table) - - - def _get_tmp_file(base, ext): - time = datetime.datetime.now().time().strftime("%H%M%f") - return os.path.join(os.environ['TMP'], base + time + ext) - - -class TskDbDiffException(Exception): - pass - -class PGSettings(object): - def __init__(self, pgHost=None, pgPort=5432, user=None, password=None): - self.pgHost = pgHost - self.pgPort = pgPort - self.username = user - self.password = password - - def get_pgHost(self): - return self.pgHost - - def get_pgPort(self): - return self.pgPort - - def get_username(self): - return self.username - - def get_password(self): - return self.password - - - - - -def get_sqlite_table_columns(conn) -> Dict[str, List[str]]: - """ - Retrieves the sqlite public tables and columns from a sqlite connection. - Args: - conn: The sqlite connection. - - Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value. - """ - cur = conn.cursor() - cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'") - tables = list([table[0] for table in cur.fetchall()]) - cur.close() - - to_ret = {} - for table in tables: - cur = conn.cursor() - cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table]) - to_ret[table] = list([col[0] for col in cur.fetchall()]) - cur.close() - - return to_ret - - -def get_pg_table_columns(conn) -> Dict[str, List[str]]: - """ - Retrieves the postgres public tables and columns from a pg connection. - Args: - conn: The pg connection. - - Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value. - """ - cursor = conn.cursor() - cursor.execute(""" - SELECT cols.table_name, cols.column_name - FROM information_schema.columns cols - WHERE cols.column_name IS NOT NULL - AND cols.table_name IS NOT NULL - AND cols.table_name IN ( - SELECT tables.tablename FROM pg_catalog.pg_tables tables - WHERE LOWER(schemaname) = 'public' - ) - ORDER by cols.table_name, cols.ordinal_position; - """) - mapping = {} - for row in cursor: - mapping.setdefault(row[0], []).append(row[1]) - - cursor.close() - return mapping - - -def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table, artifact_table, accounts_table): - """ Make testing more consistent and reasonable by doctoring certain db entries. - - Args: - line: a String, the line to remove the object id from. - files_table: a map from object ids to file paths. - """ - - # Sqlite statement use double quotes for table name, PostgreSQL doesn't. We check both databases results for normalization. - files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1 - path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1 - object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1 - vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1 - report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1 - layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1 - data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find('INSERT INTO data_source_info ') > -1 - event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find('INSERT INTO tsk_event_descriptions ') > -1 - events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1 - ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1 - examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1 - ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find('INSERT INTO image_gallery_groups ') > -1 - ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find('INSERT INTO image_gallery_groups_seen ') > -1 - os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1 - os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find('INSERT INTO tsk_os_account_attributes') > -1 - os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find('INSERT INTO tsk_os_account_instances') > -1 - data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find('INSERT INTO tsk_data_artifacts') > -1 - - parens = line[line.find('(') + 1 : line.rfind(')')] - no_space_parens = parens.replace(" ", "") - fields_list = list(csv.reader([no_space_parens], quotechar="'"))[0] - #Add back in the quotechar for values that were originally wrapped (csv reader consumes this character) - fields_list_with_quotes = [] - ptr = 0 - for field in fields_list: - if(len(field) == 0): - field = "'" + field + "'" - else: - start = no_space_parens.find(field, ptr) - if((start - 1) >= 0 and no_space_parens[start - 1] == '\''): - if((start + len(field)) < len(no_space_parens) and no_space_parens[start + len(field)] == '\''): - field = "'" + field + "'" - fields_list_with_quotes.append(field) - if(ptr > 0): - #Add one for each comma that is used to separate values in the original string - ptr+=1 - ptr += len(field) - - fields_list = fields_list_with_quotes - - # remove object ID - if files_index: - - # Ignore TIFF size and hash if extracted from PDFs. - # See JIRA-6951 for more details. - # index -3 = 3rd from the end, which is extension - # index -5 = 5th from the end, which is the parent path. - if fields_list[-3] == "'tif'" and fields_list[-5].endswith(".pdf/'"): - fields_list[15] = "'SIZE_IGNORED'" - fields_list[23] = "'MD5_IGNORED'" - fields_list[24] = "'SHA256_IGNORED'" - newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:-1]) + ');') #leave off first (object id) and last (os_account_id) field - # Remove object ID from Unalloc file name - newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine) - return newLine - # remove object ID - elif vs_parts_index: - newLine = ('INSERT INTO "tsk_vs_parts" VALUES(' + ', '.join(fields_list[1:]) + ');') - return newLine - # remove group ID - elif ig_groups_index: - newLine = ('INSERT INTO "image_gallery_groups" VALUES(' + ', '.join(fields_list[1:]) + ');') - return newLine - #remove id field - elif ig_groups_seen_index: - # Only removing the id and group_id fields for now. May need to care about examiner_id and seen fields in future. - newLine = ('INSERT INTO "image_gallery_groups_seen" VALUES(' + ', '.join(fields_list[2:]) + ');') - return newLine - # remove object ID - elif path_index: - obj_id = int(fields_list[0]) - objValue = files_table[obj_id] - # remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory - idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor') - if idx_pre > -1: - idx_pos = fields_list[1].find('\\', idx_pre + 2) - dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator - dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')] - pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:] - else: - pathValue = fields_list[1] - # remove localhost from postgres par_obj_name - multiOutput_idx = pathValue.find('ModuleOutput') - if multiOutput_idx > -1: - pathValue = "'" + pathValue[pathValue.find('ModuleOutput'):] #postgres par_obj_name include losthost - - newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');') - return newLine - # remove object ID - elif layout_index: - obj_id = fields_list[0] - path= files_table[int(obj_id)] - newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');') - # Remove object ID from Unalloc file name - newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine) - return newLine - # remove object ID - elif object_index: - obj_id = fields_list[0] - parent_id = fields_list[1] - newLine = 'INSERT INTO "tsk_objects" VALUES(' - path = None - parent_path = None - - #if obj_id or parent_id is invalid literal, we simple return the values as it is - try: - obj_id = int(obj_id) - if parent_id != 'NULL': - parent_id = int(parent_id) - except Exception as e: - print(obj_id, parent_id) - return line - - if obj_id in files_table.keys(): - path = files_table[obj_id] - elif obj_id in vs_parts_table.keys(): - path = vs_parts_table[obj_id] - elif obj_id in vs_info_table.keys(): - path = vs_info_table[obj_id] - elif obj_id in fs_info_table.keys(): - path = fs_info_table[obj_id] - elif obj_id in reports_table.keys(): - path = reports_table[obj_id] - # remove host name (for multi-user) and dates/times from path for reports - if path is not None: - if 'ModuleOutput' in path: - # skip past the host name (if any) - path = path[path.find('ModuleOutput'):] - if 'BulkExtractor' in path or 'Smirk' in path: - # chop off the last folder (which contains a date/time) - path = path[:path.rfind('\\')] - if 'Reports\\AutopsyTestCase HTML Report' in path: - path = 'Reports\\AutopsyTestCase HTML Report' - - if parent_id in files_table.keys(): - parent_path = files_table[parent_id] - elif parent_id in vs_parts_table.keys(): - parent_path = vs_parts_table[parent_id] - elif parent_id in vs_info_table.keys(): - parent_path = vs_info_table[parent_id] - elif parent_id in fs_info_table.keys(): - parent_path = fs_info_table[parent_id] - elif parent_id in images_table.keys(): - parent_path = images_table[parent_id] - elif parent_id in accounts_table.keys(): - parent_path = accounts_table[parent_id] - elif parent_id == 'NULL': - parent_path = "NULL" - - # Remove host name (for multi-user) from parent_path - if parent_path is not None: - if 'ModuleOutput' in parent_path: - # skip past the host name (if any) - parent_path = parent_path[parent_path.find('ModuleOutput'):] - - if path and parent_path: - # Remove object ID from Unalloc file names and regripper output - path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', path) - path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', path) - parent_path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', parent_path) - parent_path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', parent_path) - return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');' - else: - return newLine + '"OBJECT IDS OMITTED", ' + ', '.join(fields_list[2:]) + ');' #omit parent object id and object id when we cant annonymize them - # remove time-based information, ie Test_6/11/14 -> Test - elif report_index: - fields_list[1] = "AutopsyTestCase" - fields_list[2] = "0" - newLine = ('INSERT INTO "reports" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id - return newLine - elif data_source_info_index: - fields_list[1] = "{device id}" - fields_list[4] = "{dateTime}" - newLine = ('INSERT INTO "data_source_info" VALUES(' + ','.join(fields_list) + ');') - return newLine - elif ingest_job_index: - fields_list[2] = "{host_name}" - start_time = int(fields_list[3]) - end_time = int(fields_list[4]) - if (start_time <= end_time): - fields_list[3] = "0" - fields_list[4] = "0" - newLine = ('INSERT INTO "ingest_jobs" VALUES(' + ','.join(fields_list) + ');') - return newLine - elif examiners_index: - fields_list[1] = "{examiner_name}" - newLine = ('INSERT INTO "tsk_examiners" VALUES(' + ','.join(fields_list) + ');') - return newLine - # remove all timing dependent columns from events table - elif events_index: - newLine = ('INSERT INTO "tsk_events" VALUES(' + ','.join(fields_list[1:2]) + ');') - return newLine - # remove object ids from event description table - elif event_description_index: - # replace object ids with information that is deterministic - file_obj_id = int(fields_list[5]) - object_id = int(fields_list[4]) - legacy_artifact_id = 'NULL' - if (fields_list[6] != 'NULL'): - legacy_artifact_id = int(fields_list[6]) - if file_obj_id != 'NULL' and file_obj_id in files_table.keys(): - fields_list[5] = files_table[file_obj_id] - if object_id != 'NULL' and object_id in files_table.keys(): - fields_list[4] = files_table[object_id] - if legacy_artifact_id != 'NULL' and legacy_artifact_id in artifact_table.keys(): - fields_list[6] = artifact_table[legacy_artifact_id] - if fields_list[1] == fields_list[2] and fields_list[1] == fields_list[3]: - fields_list[1] = cleanupEventDescription(fields_list[1]) - fields_list[2] = cleanupEventDescription(fields_list[2]) - fields_list[3] = cleanupEventDescription(fields_list[3]) - newLine = ('INSERT INTO "tsk_event_descriptions" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id - return newLine - elif os_account_index: - newLine = ('INSERT INTO "tsk_os_accounts" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id since value that would be substituted is in diff line already - return newLine - elif os_account_attr_index: - #substitue the account object id for a non changing value - os_account_id = int(fields_list[1]) - fields_list[1] = accounts_table[os_account_id] - #substitue the source object id for a non changing value - source_obj_id = int(fields_list[3]) - if source_obj_id in files_table.keys(): - fields_list[3] = files_table[source_obj_id] - elif source_obj_id in vs_parts_table.keys(): - fields_list[3] = vs_parts_table[source_obj_id] - elif source_obj_id in vs_info_table.keys(): - fields_list[3] = vs_info_table[source_obj_id] - elif source_obj_id in fs_info_table.keys(): - fields_list[3] = fs_info_table[source_obj_id] - elif source_obj_id in images_table.keys(): - fields_list[3] = images_table[source_obj_id] - elif source_obj_id in accounts_table.keys(): - fields_list[3] = accounts_table[source_obj_id] - elif source_obj_id == 'NULL': - fields_list[3] = "NULL" - newLine = ('INSERT INTO "tsk_os_account_attributes" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id - return newLine - elif os_account_instances_index: - os_account_id = int(fields_list[1]) - fields_list[1] = accounts_table[os_account_id] - newLine = ('INSERT INTO "tsk_os_account_instances" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id - return newLine - elif data_artifacts_index: - art_obj_id = int(fields_list[0]) - if art_obj_id in files_table.keys(): - fields_list[0] = files_table[art_obj_id] - else: - fields_list[0] = 'Artifact Object ID Omitted' - account_obj_id = int(fields_list[1]) - if account_obj_id in files_table.keys(): - fields_list[1] = files_table[account_obj_id] - else: - fields_list[1] = 'Account Object ID Omitted' - newLine = ('INSERT INTO "tsk_data_artifacts" VALUES(' + ','.join(fields_list[:]) + ');') # remove ids - return newLine - else: - return line - -def cleanupEventDescription(description): - test = re.search("^'\D+:\d+'$", description) - if test is not None: - return re.sub(":\d+", ":", description) - else: - return description - -def getAssociatedArtifactType(cur, artifact_id, isMultiUser): - if isMultiUser: - cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",[artifact_id]) - else: - cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",[artifact_id]) - - info = cur.fetchone() - - return "File path: " + info[0] + " Artifact Type: " + info[1] - -def build_id_files_table(db_cursor, isPostgreSQL): - """Build the map of object ids to file paths. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary - # with the object id as the key and the full file path (parent + name) as the value - mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, parent_path, name FROM tsk_files")]) - return mapping - -def build_id_vs_parts_table(db_cursor, isPostgreSQL): - """Build the map of object ids to vs_parts. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary - # with the object id as the key and (addr + start) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, addr, start FROM tsk_vs_parts")]) - return mapping - -def build_id_vs_info_table(db_cursor, isPostgreSQL): - """Build the map of object ids to vs_info. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary - # with the object id as the key and (vs_type + img_offset) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")]) - return mapping - - -def build_id_fs_info_table(db_cursor, isPostgreSQL): - """Build the map of object ids to fs_info. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary - # with the object id as the key and (img_offset + fs_type) as the value - mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")]) - return mapping - -def build_id_objects_table(db_cursor, isPostgreSQL): - """Build the map of object ids to par_id. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary - # with the object id as the key and par_obj_id, type as the value - mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")]) - return mapping - -def build_id_image_names_table(db_cursor, isPostgreSQL): - """Build the map of object ids to name. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id and name then create a tuple in the dictionary - # with the object id as the key and name, type as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")]) - #data_sources which are logical file sets will be found in the files table - return mapping - -def build_id_artifact_types_table(db_cursor, isPostgreSQL): - """Build the map of object ids to artifact ids. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary - # with the object id as the key and artifact type as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")]) - return mapping - -def build_id_legacy_artifact_types_table(db_cursor, isPostgreSQL): - """Build the map of legacy artifact ids to artifact type. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the legacy artifact id then create a tuple in the dictionary - # with the artifact id as the key and artifact type as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")]) - return mapping - -def build_id_reports_table(db_cursor, isPostgreSQL): - """Build the map of report object ids to report path. - - Args: - db_cursor: the database cursor - """ - # for each row in the reports table in the db, create an obj_id -> path map - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, path FROM reports")]) - return mapping - -def build_id_accounts_table(db_cursor, isPostgreSQL): - """Build the map of object ids to OS account SIDs. - - Args: - db_cursor: the database cursor - """ - # for each row in the db, take the object id and account SID then creates a tuple in the dictionary - # with the object id as the key and the OS Account's SID as the value - mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT os_account_obj_id, addr FROM tsk_os_accounts")]) - return mapping - -def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table, accounts_table): - """Build the map of object ids to artifact ids. - - Args: - files_table: obj_id, path - objects_table: obj_id, par_obj_id, type - artifacts_table: obj_id, artifact_type_name - reports_table: obj_id, path - images_table: obj_id, name - accounts_table: obj_id, addr - """ - # make a copy of files_table and update it with new data from artifacts_table and reports_table - mapping = files_table.copy() - for k, v in objects_table.items(): - path = "" - if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id - if k in reports_table.keys(): # For a report we use the report path - par_obj_id = v[0] - if par_obj_id is not None: - mapping[k] = reports_table[k] - elif k in artifacts_table.keys(): # For an artifact we use it's par_obj_id's path+name plus it's artifact_type name - par_obj_id = v[0] # The parent of an artifact can be a file or a report - if par_obj_id in mapping.keys(): - path = mapping[par_obj_id] - elif par_obj_id in reports_table.keys(): - path = reports_table[par_obj_id] - elif par_obj_id in images_table.keys(): - path = images_table[par_obj_id] - mapping[k] = path + "/" + artifacts_table[k] - elif k in accounts_table.keys(): # For an OS Account object ID we use its addr field which is the account SID - mapping[k] = accounts_table[k] - elif v[0] not in mapping.keys(): - if v[0] in artifacts_table.keys(): - par_obj_id = objects_table[v[0]] - path = mapping[par_obj_id] - mapping[k] = path + "/" + artifacts_table[v[0]] - return mapping - -def db_connect(db_file, isMultiUser, pgSettings=None): - if isMultiUser: # use PostgreSQL - try: - return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + pgSettings.pgHost + " password=" + pgSettings.password), None - except: - print("Failed to connect to the database: " + db_file) - else: # Sqlite - # Make a copy that we can modify - backup_db_file = TskDbDiff._get_tmp_file("tsk_backup_db", ".db") - shutil.copy(db_file, backup_db_file) - # We sometimes get situations with messed up permissions - os.chmod (backup_db_file, 0o777) - return sqlite3.connect(backup_db_file), backup_db_file - -def sql_select_execute(cursor, isPostgreSQL, sql_stmt): - if isPostgreSQL: - cursor.execute(sql_stmt) - return cursor.fetchall() - else: - return cursor.execute(sql_stmt) - -def main(): - try: - sys.argv.pop(0) - output_db = sys.argv.pop(0) - gold_db = sys.argv.pop(0) - except: - print("usage: tskdbdiff [OUTPUT DB PATH] [GOLD DB PATH]") - sys.exit(1) - - db_diff = TskDbDiff(output_db, gold_db, output_dir=".") - dump_passed, bb_dump_passed = db_diff.run_diff() - - if dump_passed and bb_dump_passed: - print("Database comparison passed.") - if not dump_passed: - print("Non blackboard database comparison failed.") - if not bb_dump_passed: - print("Blackboard database comparison failed.") - - sys.exit(0) - - -if __name__ == "__main__": - if sys.hexversion < 0x03000000: - print("Python 3 required") - sys.exit(1) - - main() - From 6b86cb53b1ab42c78b0063fe9ed9e7d93e7d4646 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Tue, 20 Apr 2021 21:45:26 -0400 Subject: [PATCH 05/30] updates --- test/script/tskdbdiff.py | 42 ++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 3bd516801c..0b0371db2b 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -169,12 +169,29 @@ class TskDbDiff(object): # create file path for gold files inside output folder. In case of diff, both gold and current run files # are available in the report output folder. Prefix Gold- is added to the filename. - gold_file_in_output_dir = output_file[:output_file.rfind("/")] + "/Gold-" + output_file[output_file.rfind("/")+1:] + gold_file_in_output_dir = os.path.join(os.path.dirname(output_file), "Gold-" + os.path.basename(output_file)) shutil.copy(gold_file, gold_file_in_output_dir) return False + @staticmethod + def _get_associated_artifact_type(cur, artifact_id, isMultiUser): + if isMultiUser: + cur.execute( + "SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s", + [artifact_id]) + else: + cur.execute( + "SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?", + [artifact_id]) + + info = cur.fetchone() + + return "File path: " + info[0] + " Artifact Type: " + info[1] + + + @staticmethod def _dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table): """Dumps sorted text results to the given output location. @@ -270,7 +287,7 @@ class TskDbDiff(object): elif attr["value_type"] == 5: attr_value_as_string = str(attr["value_int64"]) if attr["display_name"] == "Associated Artifact": - attr_value_as_string = getAssociatedArtifactType(attribute_cursor, attr_value_as_string, isMultiUser) + attr_value_as_string = TskDbDiff._get_associated_artifact_type(attribute_cursor, attr_value_as_string, isMultiUser) patrn = re.compile("[\n\0\a\b\r\f]") attr_value_as_string = re.sub(patrn, ' ', attr_value_as_string) if attr["source"] == "Keyword Search" and attr["display_name"] == "Keyword Preview": @@ -310,7 +327,7 @@ class TskDbDiff(object): srtcmdlst = ["sort", unsorted_dump, "-o", bb_dump_file] subprocess.call(srtcmdlst) - + @staticmethod def _dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings): """Dumps a database to a text file. @@ -321,7 +338,7 @@ class TskDbDiff(object): dump_file: a pathto_File, the location to dump the non-blackboard database items """ - conn, output_file = db_connect(db_file, isMultiUser, pgSettings) + conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings) guid_utils = TskGuidUtils.create(conn) if isMultiUser: @@ -331,14 +348,15 @@ class TskDbDiff(object): table_cols = get_sqlite_table_columns(conn) schema = get_sqlite_schema(conn) - output_file.write(schema + "\n") - for table, cols in sorted(table_cols.items(), key=lambda pr: pr[0]): - normalizer = TABLE_NORMALIZATIONS[table] if table in TABLE_NORMALIZATIONS else None - write_normalized(guid_utils, output_file, conn, table, cols, normalizer) + with codecs.open(dump_file, "wb", "utf_8") as output_file: + output_file.write(schema + "\n") + for table, cols in sorted(table_cols.items(), key=lambda pr: pr[0]): + normalizer = TABLE_NORMALIZATIONS[table] if table in TABLE_NORMALIZATIONS else None + write_normalized(guid_utils, output_file, conn, table, cols, normalizer) # Now sort the file - # srtcmdlst = ["sort", dump_file, "-o", dump_file] - # subprocess.call(srtcmdlst) + srtcmdlst = ["sort", dump_file, "-o", dump_file] + subprocess.call(srtcmdlst) conn.close() # cleanup the backup @@ -346,6 +364,10 @@ class TskDbDiff(object): # os.remove(backup_db_file) return guid_utils.obj_id_guids + @staticmethod + def _get_tmp_file(base, ext): + time = datetime.datetime.now().time().strftime("%H%M%f") + return os.path.join(os.environ['TMP'], base + time + ext) class TskDbDiffException(Exception): From 35ad98ca30bc6cbb823e28af621eec9ec4b53437 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 21 Apr 2021 09:40:03 -0400 Subject: [PATCH 06/30] bug fixes --- test/script/tskdbdiff.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 0b0371db2b..21807bf136 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -414,7 +414,7 @@ class TskGuidUtils: cursor.execute(select_statement) ret_dict = {} for row in cursor: - ret_dict[row[0]] = delim.join([str(col) for col in row[1:]]) + ret_dict[row[0]] = delim.join([str(col) if col else '' for col in row[1:]]) return ret_dict @@ -460,11 +460,11 @@ class TskGuidUtils: path = artifact_parent_dict[par_obj_id] break - guid_artifacts[par_obj_id] = "/".join([path, v]) + guid_artifacts[k] = "/".join([path, v]) return TskGuidUtils( obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts, - **guid_fs_info, **guid_fs_info, **guid_image_names}, + **guid_fs_info, **guid_fs_info, **guid_image_names, **guid_artifacts}, artifact_types=objid_artifacts) artifact_types: Dict[int, str] @@ -777,9 +777,9 @@ def normalize_tsk_event_descriptions(guid_util: TskGuidUtils, row: Dict[str, any """ row_copy = row.copy() # replace object ids with information that is deterministic + row_copy['event_description_id'] = MASKED_ID row_copy['content_obj_id'] = guid_util.get_guid_for_file_objid(row['content_obj_id']) - row_copy['data_source_obj_id'] = guid_util.get_guid_for_file_objid(row['data_source_obj_id']) - row_copy['artifact_id'] = guid_util.get_guid_for_artifactid(row['artifact_id']) + row_copy['artifact_id'] = guid_util.get_guid_for_artifactid(row['artifact_id']) if row['artifact_id'] else None if row['full_description'] == row['med_description'] == row['short_description']: row_copy['full_description'] = _mask_event_desc(row['full_description']) @@ -961,11 +961,11 @@ TABLE_NORMALIZATIONS: Dict[str, TableNormalization] = { "obj_id": MASKED_OBJ_ID }), "image_gallery_groups": NormalizeColumns({ - "obj_id": MASKED_OBJ_ID + "group_id": MASKED_ID }), "tsk_files_path": NormalizeRow(normalize_tsk_files_path), "tsk_file_layout": NormalizeColumns({ - "obj_id": lambda guid_util, col: guid_util.get_guid_for_file_objid(col) + "obj_id": lambda guid_util, col: normalize_unalloc_files(guid_util.get_guid_for_file_objid(col)) }), "tsk_objects": NormalizeRow(normalize_tsk_objects), "reports": NormalizeColumns({ @@ -1042,6 +1042,7 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, row_dict = row_masker.normalize(guid_utils, row_dict) if row_dict is not None: + # NOTE: This is an alternate approach to representing values as json-like lines # entries = [] # for idx in range(0, len(column_names)): # column = column_names[idx] @@ -1051,7 +1052,7 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, # insert_statement = f"{table}: {{{insert_values}}}\n" values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names) - insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n' + insert_statement = f'INSERT INTO "{table}" VALUES({values_statement});\n' output_file.write(insert_statement) From 991d1985c371ccf5965de305113736c7016fb9b8 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 21 Apr 2021 09:52:18 -0400 Subject: [PATCH 07/30] bug fix --- test/script/tskdbdiff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 21807bf136..5c944efb88 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -727,7 +727,7 @@ def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str): pg_dump = ["pg_dump", "--inserts", "-U", pg_username, "-h", pg_host, "-p", pg_port, "-T", "blackboard_artifacts", "-T", "blackboard_attributes"] output = subprocess.check_output(pg_dump) - return sanitize_schema(output) + return sanitize_schema(str(output)) def get_sqlite_schema(db_conn): From 37e3087f5b0a6ac2288d72fb072d8a9cd47cc891 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 21 Apr 2021 11:40:04 -0400 Subject: [PATCH 08/30] bug fixes --- test/script/tskdbdiff.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 5c944efb88..a5fffe72e7 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -589,7 +589,7 @@ def get_path_segs(path: Union[str, None]) -> Union[List[str], None]: """ if path: - return list(filter(lambda x: len(x.strip()) > 0, [path for path in os.path.normpath(path).split(os.sep)])) + return list(filter(lambda x: len(x.strip()) > 0, [s for s in re.split(r"[\\/]", path)])) else: return None @@ -759,7 +759,7 @@ def _mask_event_desc(desc: str) -> str: Returns: The normalized description. """ - match = re.search(r"^\s*(\D+):\d+\s*$", desc.strip()) + match = re.search(r"^\s*(.+?)\s*:\s*\d+\s*$", desc.strip()) if match: return f"{match.group(1)}:" @@ -878,10 +878,10 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di if module_output_idx >= 0: # remove everything up to and including ModuleOutput if ModuleOutput present path_parts = path_parts[module_output_idx:] - if len(path_parts) > 1 and path_parts[0] == 'Embedded File Extractor': - match = re.match(r'^(.+?)_[0-9]*$', path_parts[1]) + if len(path_parts) > 1 and path_parts[1] == 'Embedded File Extractor': + match = re.match(r'^(.+?)_\d*$', path_parts[2]) if match: - path_parts[1] = match.group(1) + path_parts[2] = match.group(1) row_copy['path'] = os.path.join(*path_parts) if len(path_parts) > 0 else '/' From 6c3920cf69dbb4b988a4b2efc4ec6722ba3ba0ca Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Fri, 23 Apr 2021 13:00:14 -0400 Subject: [PATCH 09/30] comment update --- test/script/tskdbdiff.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index a5fffe72e7..d82a9d110b 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -1044,12 +1044,13 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, if row_dict is not None: # NOTE: This is an alternate approach to representing values as json-like lines # entries = [] - # for idx in range(0, len(column_names)): - # column = column_names[idx] - # value = get_sql_insert_value(row_dict[column] if column in row_dict else None) - # entries.append((column, value)) + # for column in column_names: + # value = get_sql_insert_value(row_dict[column] if column in row_dict and row_dict[column] else None) + # if value: + # entries.append((column, value)) # insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries]) # insert_statement = f"{table}: {{{insert_values}}}\n" + # output_file.write(insert_statement) values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names) insert_statement = f'INSERT INTO "{table}" VALUES({values_statement});\n' From 9e4289c4535bde62baa9b9d1a38ef2b40e226795 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Tue, 4 May 2021 16:50:47 -0400 Subject: [PATCH 10/30] add missing method --- test/script/tskdbdiff.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index d82a9d110b..3e067476d8 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -364,6 +364,18 @@ class TskDbDiff(object): # os.remove(backup_db_file) return guid_utils.obj_id_guids + @staticmethod + def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings): + """Dumps the given database to text files for later comparison. + + Args: + db_file: a pathto_File, the database file to dump + dump_file: a pathto_File, the location to dump the non-blackboard database items + bb_dump_file: a pathto_File, the location to dump the blackboard database items + """ + id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings) + TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table) + @staticmethod def _get_tmp_file(base, ext): time = datetime.datetime.now().time().strftime("%H%M%f") From c42314308e9d622e8d2e09ada58330499c2c0093 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 5 May 2021 11:41:33 -0400 Subject: [PATCH 11/30] commenting on regex --- test/script/tskdbdiff.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 3e067476d8..bb10177b06 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -601,6 +601,7 @@ def get_path_segs(path: Union[str, None]) -> Union[List[str], None]: """ if path: + # split on backslash or forward slash return list(filter(lambda x: len(x.strip()) > 0, [s for s in re.split(r"[\\/]", path)])) else: return None @@ -771,6 +772,8 @@ def _mask_event_desc(desc: str) -> str: Returns: The normalized description. """ + + # Takes a string like "Shell Bags: 30840" and replaces with "ShellBags:" match = re.search(r"^\s*(.+?)\s*:\s*\d+\s*$", desc.strip()) if match: return f"{match.group(1)}:" @@ -832,6 +835,9 @@ def normalize_unalloc_files(path_str: Union[str, None]) -> Union[str, None]: Returns: The path string where timestamps are removed from unalloc strings. """ + + # takes a file name like "Unalloc_30580_7466496_2980941312" and removes the object id to become + # "Unalloc_7466496_2980941312" return re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str) if path_str else None @@ -844,6 +850,7 @@ def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]: Returns: The path string where timestamps are removed from regripper paths. """ + # takes a file name like "regripper-12345-full" and removes the id to become "regripper-full" return re.sub(r'regripper\-[0-9]+\-full', 'regripper-full', path_str) if path_str else None @@ -891,6 +898,9 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di # remove everything up to and including ModuleOutput if ModuleOutput present path_parts = path_parts[module_output_idx:] if len(path_parts) > 1 and path_parts[1] == 'Embedded File Extractor': + # Takes a folder like ModuleOutput\Embedded File Extractor/f_000168_4435\f_000168 + # and fixes the folder after 'Embedded File Extractor', 'f_000168_4435' to remove the last number + # to become 'f_000168' match = re.match(r'^(.+?)_\d*$', path_parts[2]) if match: path_parts[2] = match.group(1) From 6cdb168a050b85871a0730984ce4dc64627d7827 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Thu, 6 May 2021 08:23:54 -0400 Subject: [PATCH 12/30] pg_dump fix --- test/script/tskdbdiff.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index bb10177b06..073aeef88f 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -343,7 +343,8 @@ class TskDbDiff(object): if isMultiUser: table_cols = get_pg_table_columns(conn) - schema = get_pg_schema(pgSettings.username, pgSettings.password, pgSettings.pgHost, pgSettings.pgPort) + schema = get_pg_schema(db_file, pgSettings.username, pgSettings.password, + pgSettings.pgHost, pgSettings.pgPort) else: table_cols = get_sqlite_table_columns(conn) schema = get_sqlite_schema(conn) @@ -707,9 +708,14 @@ def sanitize_schema(original: str) -> str: dump_line = '' for line in original.splitlines(): line = line.strip('\r\n ') + lower_line = line.lower() # It's comment or alter statement or catalog entry or set idle entry or empty line - if (line.startswith('--') or line.lower().startswith( - 'alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): + if (not line or + line.startswith('--') or + lower_line.startswith('set') or + lower_line.startswith('alter') or + "pg_catalog" in line or + "idle_in_transaction_session_timeout" in line): continue elif line.endswith(';'): # Statement not finished dump_line += line @@ -724,10 +730,11 @@ def sanitize_schema(original: str) -> str: return "\n".join(sanitized_lines) -def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str): +def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg_port: str): """ Gets the schema to be added to the dump text from the postgres database. Args: + dbname: The name of the database. pg_username: The postgres user name. pg_pword: The postgres password. pg_host: The postgres host. @@ -737,10 +744,11 @@ def get_pg_schema(pg_username: str, pg_pword: str, pg_host: str, pg_port: str): """ os.environ['PGPASSWORD'] = pg_pword - pg_dump = ["pg_dump", "--inserts", "-U", pg_username, "-h", pg_host, "-p", pg_port, - "-T", "blackboard_artifacts", "-T", "blackboard_attributes"] + pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port, "--schema-only", "-d", dbname, "-t", + "public.*"] output = subprocess.check_output(pg_dump) - return sanitize_schema(str(output)) + output_str = output.decode('UTF-8') + return sanitize_schema(output_str) def get_sqlite_schema(db_conn): @@ -1052,7 +1060,8 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, for row in cursor: if len(row) != len(column_names): print( - f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}") + f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are" + f" {len(column_names)} with {str(column_names)}") continue row_dict = {} @@ -1082,7 +1091,8 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, def db_connect(db_file, isMultiUser, pgSettings=None): if isMultiUser: # use PostgreSQL try: - return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + pgSettings.pgHost + " password=" + pgSettings.password), None + return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + + pgSettings.pgHost + " password=" + pgSettings.password), None except: print("Failed to connect to the database: " + db_file) else: # Sqlite @@ -1122,4 +1132,3 @@ if __name__ == "__main__": sys.exit(1) main() - From 46c4017fe7ffbdbbcc52bb9b6b2d405094f374ff Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Thu, 6 May 2021 11:20:02 -0400 Subject: [PATCH 13/30] small fix --- test/script/tskdbdiff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 073aeef88f..108c168b88 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -744,8 +744,8 @@ def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg """ os.environ['PGPASSWORD'] = pg_pword - pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port, "--schema-only", "-d", dbname, "-t", - "public.*"] + pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port, + "--schema-only", "-d", dbname, "-t", "public.*"] output = subprocess.check_output(pg_dump) output_str = output.decode('UTF-8') return sanitize_schema(output_str) From e2c0a08ac4533bfe292e740ad01c33bc744e4a3d Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Fri, 7 May 2021 11:23:12 -0400 Subject: [PATCH 14/30] formatting, guid fixes --- test/script/tskdbdiff.py | 42 +++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 108c168b88..76b3cb5693 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -443,6 +443,7 @@ class TskGuidUtils: """ guid_files = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, parent_path, name FROM tsk_files") guid_vs_parts = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, addr, start FROM tsk_vs_parts", "_") + guid_vs_info = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info", "_") guid_fs_info = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info", "_") guid_image_names = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, name FROM tsk_image_names " "WHERE sequence=0") @@ -450,13 +451,21 @@ class TskGuidUtils: guid_reports = TskGuidUtils._get_guid_dict(db_conn, "SELECT obj_id, path FROM reports") objid_artifacts = TskGuidUtils._get_guid_dict(db_conn, - "SELECT " - "blackboard_artifacts.artifact_obj_id, " - "blackboard_artifact_types.type_name FROM " - "blackboard_artifacts INNER JOIN blackboard_artifact_types " + "SELECT blackboard_artifacts.artifact_obj_id, " + "blackboard_artifact_types.type_name " + "FROM blackboard_artifacts " + "INNER JOIN blackboard_artifact_types " "ON blackboard_artifact_types.artifact_type_id = " "blackboard_artifacts.artifact_type_id") + artifact_objid_artifacts = TskGuidUtils._get_guid_dict(db_conn, + "SELECT blackboard_artifacts.artifact_id, " + "blackboard_artifact_types.type_name " + "FROM blackboard_artifacts " + "INNER JOIN blackboard_artifact_types " + "ON blackboard_artifact_types.artifact_type_id = " + "blackboard_artifacts.artifact_type_id") + cursor = db_conn.cursor() cursor.execute("SELECT obj_id, par_obj_id FROM tsk_objects") par_obj_objects = dict([(row[0], row[1]) for row in cursor]) @@ -476,9 +485,10 @@ class TskGuidUtils: guid_artifacts[k] = "/".join([path, v]) return TskGuidUtils( - obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts, + # aggregate all the object id dictionaries together + obj_id_guids={**guid_files, **guid_reports, **guid_os_accounts, **guid_vs_parts, **guid_vs_info, **guid_fs_info, **guid_fs_info, **guid_image_names, **guid_artifacts}, - artifact_types=objid_artifacts) + artifact_types=artifact_objid_artifacts) artifact_types: Dict[int, str] obj_id_guids: Dict[int, any] @@ -506,11 +516,11 @@ class TskGuidUtils: return self.obj_id_guids[obj_id] if obj_id in self.obj_id_guids else omitted_value def get_guid_for_file_objid(self, obj_id, omitted_value: Union[str, None] = 'Object ID Omitted'): - # TODO this is just an alias; could probably be removed + # this method is just an alias for get_guid_for_objid return self.get_guid_for_objid(obj_id, omitted_value) def get_guid_for_accountid(self, account_id, omitted_value: Union[str, None] = 'Account ID Omitted'): - # TODO this is just an alias; could probably be removed + # this method is just an alias for get_guid_for_objid return self.get_guid_for_objid(account_id, omitted_value) def get_guid_for_artifactid(self, artifact_id, omitted_value: Union[str, None] = 'Artifact ID Omitted'): @@ -859,7 +869,7 @@ def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]: """ # takes a file name like "regripper-12345-full" and removes the id to become "regripper-full" - return re.sub(r'regripper\-[0-9]+\-full', 'regripper-full', path_str) if path_str else None + return re.sub(r'regripper-[0-9]+-full', 'regripper-full', path_str) if path_str else None def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: @@ -1088,19 +1098,19 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, output_file.write(insert_statement) -def db_connect(db_file, isMultiUser, pgSettings=None): - if isMultiUser: # use PostgreSQL +def db_connect(db_file, is_multi_user, pg_settings=None): + if is_multi_user: # use PostgreSQL try: - return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + - pgSettings.pgHost + " password=" + pgSettings.password), None + return psycopg2.connect("dbname=" + db_file + " user=" + pg_settings.username + " host=" + + pg_settings.pgHost + " password=" + pg_settings.password), None except: print("Failed to connect to the database: " + db_file) - else: # Sqlite + else: # Sqlite # Make a copy that we can modify backup_db_file = TskDbDiff._get_tmp_file("tsk_backup_db", ".db") shutil.copy(db_file, backup_db_file) # We sometimes get situations with messed up permissions - os.chmod (backup_db_file, 0o777) + os.chmod(backup_db_file, 0o777) return sqlite3.connect(backup_db_file), backup_db_file @@ -1113,7 +1123,7 @@ def main(): print("usage: tskdbdiff [OUTPUT DB PATH] [GOLD DB PATH]") sys.exit(1) - db_diff = TskDbDiff(output_db, gold_db, output_dir=".") + db_diff = TskDbDiff(output_db, gold_db, output_dir=".") dump_passed, bb_dump_passed = db_diff.run_diff() if dump_passed and bb_dump_passed: From 32f4492a7b042ff94d85c4f771411777be82de46 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Fri, 7 May 2021 14:39:57 -0400 Subject: [PATCH 15/30] tsk_objects fix --- test/script/tskdbdiff.py | 54 ++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 76b3cb5693..9ce406a5ce 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -929,21 +929,24 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di return row_copy -def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: +def normalize_tsk_objects_path(guid_util: TskGuidUtils, objid: int, + no_path_placeholder: Union[str, None]) -> Union[str, None]: """ - Normalizes object table rows. + Returns a normalized path to be used in a tsk_objects table row. Args: - guid_util: Provides guids for ids that may change from run to run. - row: A dictionary mapping column names to values. + guid_util: The utility for fetching guids. + objid: The object id of the item. + no_path_placeholder: text to return if no path value found. + + Returns: The 'no_path_placeholder' text if no path. Otherwise, the normalized path. - Returns: The normalized object table row. """ - parent_id = row['par_obj_id'] - path = guid_util.get_guid_for_objid(row['obj_id'], omitted_value=None) - row_copy = row.copy() + path = guid_util.get_guid_for_objid(objid, omitted_value=None) - # remove host name (for multi-user) and dates/times from path for reports - if path is not None: + if not path: + return no_path_placeholder + else: + # remove host name (for multi-user) and dates/times from path for reports path_parts = get_path_segs(path) module_output_idx = index_of(path_parts, 'ModuleOutput') if module_output_idx >= 0: @@ -955,30 +958,27 @@ def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[ path_parts = path_parts[:-1] for idx in range(0, len(path_parts) - 1): - if path_parts[idx] == "Reports" and path_parts[idx + 1] == "AutopsyTestCase HTML Report": + if path_parts[idx].lower() == "reports" and \ + path_parts[idx + 1].lower().startswith("autopsytestcase html report"): path_parts = ["Reports", "AutopsyTestCase HTML Report"] path = os.path.join(*path_parts) if len(path_parts) > 0 else '/' - parent_path = guid_util.get_guid_for_objid(parent_id, omitted_value=None) + return normalize_regripper_files(normalize_unalloc_files(path)) - # Remove host name (for multi-user) from parent_path - if parent_path is not None: - parent_path_parts = get_path_segs(parent_path) - module_output_idx = index_of(parent_path_parts, 'ModuleOutput') - if module_output_idx >= 0: - parent_path_parts = parent_path_parts[module_output_idx:] - parent_path = os.path.join(*parent_path_parts) if len(parent_path_parts) > 0 else '/' - - # handle regripper and unalloc file replacements - if path and parent_path: - row_copy['obj_id'] = normalize_regripper_files(normalize_unalloc_files(path)) - row_copy['par_obj_id'] = normalize_regripper_files(normalize_unalloc_files(parent_path)) - else: - row_copy['obj_id'] = MASKED_OBJ_ID - row_copy['par_obj_id'] = "MASKED_PARENT_OBJ_ID" +def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: + """ + Normalizes object table rows. + Args: + guid_util: Provides guids for ids that may change from run to run. + row: A dictionary mapping column names to values. + Returns: The normalized object table row. + """ + row_copy = row.copy() + row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID) + row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID') return row_copy From 971c1d54b3b788fcedffb3fd712f21d5c2722b34 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Mon, 10 May 2021 12:56:10 -0400 Subject: [PATCH 16/30] bug fixes --- test/script/tskdbdiff.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 9ce406a5ce..aa28181f8f 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -723,16 +723,24 @@ def sanitize_schema(original: str) -> str: if (not line or line.startswith('--') or lower_line.startswith('set') or - lower_line.startswith('alter') or - "pg_catalog" in line or - "idle_in_transaction_session_timeout" in line): + " set default nextval" in lower_line or + " owner to " in lower_line or + " owned by " in lower_line or + "pg_catalog" in lower_line or + "idle_in_transaction_session_timeout" in lower_line): continue - elif line.endswith(';'): # Statement not finished - dump_line += line + + # if there is no white space or parenthesis delimiter, add a space + if re.match(r'^.+?[^\s()]$', dump_line) and re.match(r'^[^\s()]', line): + dump_line += ' ' + + # append the line to the outputted line + dump_line += line + + # if line ends with ';' then this will be one statement in diff + if line.endswith(';'): sanitized_lines.append(dump_line) dump_line = '' - else: - dump_line += line if len(dump_line.strip()) > 0: sanitized_lines.append(dump_line) @@ -740,7 +748,7 @@ def sanitize_schema(original: str) -> str: return "\n".join(sanitized_lines) -def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg_port: str): +def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg_port: Union[str, int]): """ Gets the schema to be added to the dump text from the postgres database. Args: @@ -754,7 +762,7 @@ def get_pg_schema(dbname: str, pg_username: str, pg_pword: str, pg_host: str, pg """ os.environ['PGPASSWORD'] = pg_pword - pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", pg_port, + pg_dump = ["pg_dump", "-U", pg_username, "-h", pg_host, "-p", str(pg_port), "--schema-only", "-d", dbname, "-t", "public.*"] output = subprocess.check_output(pg_dump) output_str = output.decode('UTF-8') @@ -957,10 +965,12 @@ def normalize_tsk_objects_path(guid_util: TskGuidUtils, objid: int, # chop off the last folder (which contains a date/time) path_parts = path_parts[:-1] - for idx in range(0, len(path_parts) - 1): - if path_parts[idx].lower() == "reports" and \ - path_parts[idx + 1].lower().startswith("autopsytestcase html report"): - path_parts = ["Reports", "AutopsyTestCase HTML Report"] + if path_parts and len(path_parts) >= 2: + for idx in range(0, len(path_parts) - 1): + if path_parts[idx].lower() == "reports" and \ + path_parts[idx + 1].lower().startswith("autopsytestcase html report"): + path_parts = ["Reports", "AutopsyTestCase HTML Report"] + break path = os.path.join(*path_parts) if len(path_parts) > 0 else '/' From 9ffe631a955b6ceb981228cc9462dcf29a9f53f7 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Mon, 10 May 2021 13:53:52 -0400 Subject: [PATCH 17/30] null fix --- test/script/tskdbdiff.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index aa28181f8f..9113059e1d 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -987,8 +987,9 @@ def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[ Returns: The normalized object table row. """ row_copy = row.copy() - row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID) - row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID') + row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID) if row['obj_id'] else None + row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID') \ + if row['par_obj_id'] else None return row_copy From a9fea75770de9149185e442bbb965c818cd92e0f Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Mon, 10 May 2021 14:12:27 -0400 Subject: [PATCH 18/30] hashsetCountFix --- Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java | 1 - 1 file changed, 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java b/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java index 5318a99a00..5906b01a78 100644 --- a/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java +++ b/Core/src/org/sleuthkit/autopsy/datamodel/HashsetHits.java @@ -179,7 +179,6 @@ public class HashsetHits implements AutopsyVisitableItem { TSK_HASHSET_HIT); super.setName(HASHSET_HITS); - super.setDisplayName(DISPLAY_NAME); this.setIconBaseWithExtension("org/sleuthkit/autopsy/images/hashset_hits.png"); //NON-NLS } From 4a1f3259b39bed16145d37d2bc55387752d61934 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Mon, 10 May 2021 14:53:43 -0400 Subject: [PATCH 19/30] artifact type constructor deprecation --- .../ArtifactSelectionDialog.java | 18 ++++-------------- .../infrastructure/ReportVisualPanel2.java | 18 ++++-------------- .../infrastructure/TableReportGenerator.java | 18 ++++-------------- .../datamodel/DataSourceInfoUtilitiesTest.java | 2 +- 4 files changed, 13 insertions(+), 43 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java index 78ae4479a5..715e18eb81 100644 --- a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java +++ b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ArtifactSelectionDialog.java @@ -72,20 +72,10 @@ class ArtifactSelectionDialog extends javax.swing.JDialog { private void populateList() { try { ArrayList doNotReport = new ArrayList<>(); - doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getDisplayName())); - doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getDisplayName())); // output is too unstructured for table review - doNotReport.add(new BlackboardArtifact.Type( - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getDisplayName())); - doNotReport.add(new BlackboardArtifact.Type( - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getDisplayName())); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO)); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT)); // output is too unstructured for table review + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT)); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT)); artifactTypes = Case.getCurrentCaseThrows().getSleuthkitCase().getArtifactTypesInUse(); artifactTypes.removeAll(doNotReport); diff --git a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java index c7dbcfd5b1..a596eb6c38 100644 --- a/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java +++ b/Core/src/org/sleuthkit/autopsy/report/infrastructure/ReportVisualPanel2.java @@ -200,20 +200,10 @@ final class ReportVisualPanel2 extends JPanel { try { Case openCase = Case.getCurrentCaseThrows(); ArrayList doNotReport = new ArrayList<>(); - doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getDisplayName())); - doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getDisplayName())); // output is too unstructured for table review - doNotReport.add(new BlackboardArtifact.Type( - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getDisplayName())); - doNotReport.add(new BlackboardArtifact.Type( - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getDisplayName())); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO)); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT)); // output is too unstructured for table review + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT)); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT)); // get artifact types that exist in the current case artifacts = openCase.getSleuthkitCase().getArtifactTypesInUse(); diff --git a/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java b/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java index ca6722911e..34186c13b2 100644 --- a/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java +++ b/Core/src/org/sleuthkit/autopsy/report/infrastructure/TableReportGenerator.java @@ -102,20 +102,10 @@ class TableReportGenerator { private void getAllExistingArtiactTypes() throws NoCurrentCaseException, TskCoreException { // get all possible artifact types ArrayList doNotReport = new ArrayList<>(); - doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO.getDisplayName())); - doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT.getDisplayName())); // output is too unstructured for table review - doNotReport.add(new BlackboardArtifact.Type( - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT.getDisplayName())); - doNotReport.add(new BlackboardArtifact.Type( - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getTypeID(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getLabel(), - BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT.getDisplayName())); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_GEN_INFO)); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TOOL_OUTPUT)); // output is too unstructured for table review + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_ASSOCIATED_OBJECT)); + doNotReport.add(new BlackboardArtifact.Type(BlackboardArtifact.ARTIFACT_TYPE.TSK_TL_EVENT)); Case.getCurrentCaseThrows().getSleuthkitCase().getArtifactTypes().forEach(artifactTypes::add); artifactTypes.removeAll(doNotReport); diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java index 28b55155c1..7d2453c221 100644 --- a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java +++ b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java @@ -287,7 +287,7 @@ public class DataSourceInfoUtilitiesTest { @Test public void getArtifacts_failOnBytes() throws TskCoreException { testFailOnBadAttrType( - new BlackboardArtifact.Type(999, "BYTE_ARRAY_TYPE", "Byte Array Type"), + new BlackboardArtifact.Type(999, "BYTE_ARRAY_TYPE", "Byte Array Type", BlackboardArtifact.Category.DATA_ARTIFACT), new BlackboardAttribute.Type(999, "BYTE_ARR_ATTR_TYPE", "Byte Array Attribute Type", TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.BYTE), new byte[]{0x0, 0x1, 0x2}, BlackboardAttribute::new); From 234c6f34e3d80040154705006060a95d17d7115a Mon Sep 17 00:00:00 2001 From: Kelly Kelly Date: Tue, 11 May 2021 13:33:59 -0400 Subject: [PATCH 20/30] Moved the calls to isSupport and isPreferred to a Swingworker in DataContentPanel --- .../corecomponents/DataContentPanel.java | 212 +++++++++++++----- 1 file changed, 160 insertions(+), 52 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java index 8ffabd27e6..eef00e31a3 100644 --- a/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java +++ b/Core/src/org/sleuthkit/autopsy/corecomponents/DataContentPanel.java @@ -1,15 +1,15 @@ /* * Autopsy Forensic Browser - * + * * Copyright 2011-2018 Basis Technology Corp. * Contact: carrier sleuthkit org - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -23,8 +23,10 @@ import java.beans.PropertyChangeEvent; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.concurrent.ExecutionException; import java.util.logging.Level; import javax.swing.JTabbedPane; +import javax.swing.SwingWorker; import javax.swing.event.ChangeEvent; import javax.swing.event.ChangeListener; import org.openide.nodes.Node; @@ -49,6 +51,8 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent, private final boolean isMain; private boolean listeningToTabbedPane = false; + private DataContentPanelWorker workerThread; + /** * Creates new DataContentPanel panel The main data content panel can only * be created by the data content top component, thus this constructor is @@ -132,43 +136,54 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent, public void setNode(Node selectedNode) { // change the cursor to "waiting cursor" for this operation this.setCursor(Cursor.getPredefinedCursor(Cursor.WAIT_CURSOR)); - try { - String defaultName = NbBundle.getMessage(DataContentTopComponent.class, "CTL_DataContentTopComponent"); - // set the file path - if (selectedNode == null) { - setName(defaultName); - } else { - Content content = selectedNode.getLookup().lookup(Content.class); - if (content != null) { - //String path = DataConversion.getformattedPath(ContentUtils.getDisplayPath(selectedNode.getLookup().lookup(Content.class)), 0); - String path = defaultName; - try { - path = content.getUniquePath(); - } catch (TskCoreException ex) { - logger.log(Level.SEVERE, "Exception while calling Content.getUniquePath() for {0}", content); //NON-NLS - } - setName(path); - } else { - setName(defaultName); + // Reset everything + for (int index = 0; index < jTabbedPane1.getTabCount(); index++) { + jTabbedPane1.setEnabledAt(index, false); + viewers.get(index).resetComponent(); + } + + String defaultName = NbBundle.getMessage(DataContentTopComponent.class, "CTL_DataContentTopComponent"); + // set the file path + if (selectedNode == null) { + setName(defaultName); + } else { + Content content = selectedNode.getLookup().lookup(Content.class); + if (content != null) { + //String path = DataConversion.getformattedPath(ContentUtils.getDisplayPath(selectedNode.getLookup().lookup(Content.class)), 0); + String path = defaultName; + try { + path = content.getUniquePath(); + } catch (TskCoreException ex) { + logger.log(Level.SEVERE, "Exception while calling Content.getUniquePath() for {0}", content); //NON-NLS } + setName(path); + } else { + setName(defaultName); } + } - currentNode = selectedNode; + currentNode = selectedNode; - setupTabs(selectedNode); - } finally { - this.setCursor(null); + if (workerThread != null) { + workerThread.cancel(true); + } + + if (selectedNode != null) { + workerThread = new DataContentPanelWorker(currentNode); + workerThread.execute(); } } /** - * Resets the tabs based on the selected Node. If the selected node is null - * or not supported, disable that tab as well. + * Update the state of the tabs based on the given data. * - * @param selectedNode the selected content Node + * @param selectedNode The currently selected node. + * @param supportedIndices The indices of the tabs that are supported by + * this node type. + * @param preferredIndex The index of the tab which is preferred. */ - public void setupTabs(Node selectedNode) { + private void updateTabs(Node selectedNode, List supportedIndices, int preferredIndex) { // Deferring becoming a listener to the tabbed pane until this point // eliminates handling a superfluous stateChanged event during construction. if (listeningToTabbedPane == false) { @@ -176,31 +191,12 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent, listeningToTabbedPane = true; } - int currTabIndex = jTabbedPane1.getSelectedIndex(); - int totalTabs = jTabbedPane1.getTabCount(); - int maxPreferred = 0; - int preferredViewerIndex = 0; - for (int i = 0; i < totalTabs; ++i) { - UpdateWrapper dcv = viewers.get(i); - dcv.resetComponent(); - - // disable an unsupported tab (ex: picture viewer) - if ((selectedNode == null) || (dcv.isSupported(selectedNode) == false)) { - jTabbedPane1.setEnabledAt(i, false); - } else { - jTabbedPane1.setEnabledAt(i, true); - - // remember the viewer with the highest preference value - int currentPreferred = dcv.isPreferred(selectedNode); - if (currentPreferred > maxPreferred) { - preferredViewerIndex = i; - maxPreferred = currentPreferred; - } - } + for (Integer index : supportedIndices) { + jTabbedPane1.setEnabledAt(index, true); } // let the user decide if we should stay with the current viewer - int tabIndex = UserPreferences.keepPreferredContentViewer() ? currTabIndex : preferredViewerIndex; + int tabIndex = UserPreferences.keepPreferredContentViewer() ? jTabbedPane1.getSelectedIndex() : preferredIndex; UpdateWrapper dcv = viewers.get(tabIndex); // this is really only needed if no tabs were enabled @@ -272,4 +268,116 @@ public class DataContentPanel extends javax.swing.JPanel implements DataContent, } } + /** + * SwingWorker class to determine which tabs should be enabled for the given + * node. + */ + private class DataContentPanelWorker extends SwingWorker { + + private final Node node; + + /** + * Worker constructor. + * + * @param node + */ + DataContentPanelWorker(Node node) { + this.node = node; + } + + @Override + protected WorkerResults doInBackground() throws Exception { + if (node == null) { + return null; + } + + List supportedViewers = new ArrayList<>(); + int preferredViewerIndex = 0; + int maxPreferred = 0; + + for (int index = 0; index < viewers.size(); index++) { + UpdateWrapper dcv = viewers.get(index); + if (dcv.isSupported(node)) { + supportedViewers.add(index); + + int currentPreferred = dcv.isPreferred(node); + if (currentPreferred > maxPreferred) { + preferredViewerIndex = index; + maxPreferred = currentPreferred; + } + } + + if (this.isCancelled()) { + return null; + } + + } + + return new WorkerResults(node, supportedViewers, preferredViewerIndex); + } + + @Override + protected void done() { + // Do nothing if the thread was cancelled. + if (isCancelled()) { + return; + } + + try { + WorkerResults results = get(); + + if (results != null) { + updateTabs(results.getNode(), results.getSupportedIndices(), results.getPreferredViewerIndex()); + } + + } catch (InterruptedException | ExecutionException ex) { + logger.log(Level.SEVERE, "Failed to updated data content panel for node " + node.getName(), ex); + } finally { + setCursor(Cursor.getPredefinedCursor(Cursor.DEFAULT_CURSOR)); + } + } + } + + /** + * Utility class to store all of the data the SwingWorker collected. + */ + private class WorkerResults { + + private final Node node; + private final List supportedViewerIndices; + private final int preferredViewerIndex; + + WorkerResults(Node node, List supportedViewerIndices, int preferredViewerIndex) { + this.node = node; + this.supportedViewerIndices = supportedViewerIndices; + this.preferredViewerIndex = preferredViewerIndex; + } + + /** + * Returns the selected node. + * + * @return + */ + Node getNode() { + return node; + } + + /** + * A list of tab indices that are supported by this node type. + * + * @return A list of indices. + */ + List getSupportedIndices() { + return supportedViewerIndices; + } + + /** + * Returns the preferred tab index for the given node type. + * + * @return A valid tab index. + */ + int getPreferredViewerIndex() { + return preferredViewerIndex; + } + } } From 2e6fca968cf497a3080fbde09805700c9fc30b25 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 12 May 2021 16:36:14 -0400 Subject: [PATCH 21/30] is none changes --- test/script/tskdbdiff.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 9113059e1d..a6f24e1695 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -427,7 +427,8 @@ class TskGuidUtils: cursor.execute(select_statement) ret_dict = {} for row in cursor: - ret_dict[row[0]] = delim.join([str(col) if col else '' for col in row[1:]]) + # concatenate value rows with delimiter filtering out any null values. + ret_dict[row[0]] = delim.join(filter(lambda col: col is not None, [str(col) for col in row[1:]])) return ret_dict @@ -864,7 +865,7 @@ def normalize_unalloc_files(path_str: Union[str, None]) -> Union[str, None]: # takes a file name like "Unalloc_30580_7466496_2980941312" and removes the object id to become # "Unalloc_7466496_2980941312" - return re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str) if path_str else None + return None if path_str is None else re.sub('Unalloc_[0-9]+_', 'Unalloc_', path_str) def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]: @@ -877,7 +878,7 @@ def normalize_regripper_files(path_str: Union[str, None]) -> Union[str, None]: """ # takes a file name like "regripper-12345-full" and removes the id to become "regripper-full" - return re.sub(r'regripper-[0-9]+-full', 'regripper-full', path_str) if path_str else None + return None if path_str is None else re.sub(r'regripper-[0-9]+-full', 'regripper-full', path_str) def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[str, any]: @@ -893,8 +894,8 @@ def normalize_tsk_files(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[st # Ignore TIFF size and hash if extracted from PDFs. # See JIRA-6951 for more details. row_copy = row.copy() - if row['extension'] and row['extension'].strip().lower() == 'tif' and \ - row['parent_path'] and row['parent_path'].strip().lower().endswith('.pdf/'): + if row['extension'] is not None and row['extension'].strip().lower() == 'tif' and \ + row['parent_path'] is not None and row['parent_path'].strip().lower().endswith('.pdf/'): row_copy['size'] = "SIZE_IGNORED" row_copy['md5'] = "MD5_IGNORED" row_copy['sha256'] = "SHA256_IGNORED" @@ -917,7 +918,7 @@ def normalize_tsk_files_path(guid_util: TskGuidUtils, row: Dict[str, any]) -> Di """ row_copy = row.copy() path = row['path'] - if path: + if path is not None: path_parts = get_path_segs(path) module_output_idx = index_of(path_parts, 'ModuleOutput') if module_output_idx >= 0: @@ -951,7 +952,7 @@ def normalize_tsk_objects_path(guid_util: TskGuidUtils, objid: int, """ path = guid_util.get_guid_for_objid(objid, omitted_value=None) - if not path: + if path is None: return no_path_placeholder else: # remove host name (for multi-user) and dates/times from path for reports @@ -987,9 +988,12 @@ def normalize_tsk_objects(guid_util: TskGuidUtils, row: Dict[str, any]) -> Dict[ Returns: The normalized object table row. """ row_copy = row.copy() - row_copy['obj_id'] = normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID) if row['obj_id'] else None - row_copy['par_obj_id'] = normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID') \ - if row['par_obj_id'] else None + row_copy['obj_id'] = None if row['obj_id'] is None else \ + normalize_tsk_objects_path(guid_util, row['obj_id'], MASKED_OBJ_ID) + + row_copy['par_obj_id'] = None if row['par_obj_id'] is None else \ + normalize_tsk_objects_path(guid_util, row['par_obj_id'], 'MASKED_PARENT_OBJ_ID') + return row_copy From 4463592c7a1395701b780b9fbb707630d5ea35ed Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Wed, 12 May 2021 16:47:38 -0400 Subject: [PATCH 22/30] fix --- test/script/tskdbdiff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index a6f24e1695..7e22f5009a 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -428,7 +428,7 @@ class TskGuidUtils: ret_dict = {} for row in cursor: # concatenate value rows with delimiter filtering out any null values. - ret_dict[row[0]] = delim.join(filter(lambda col: col is not None, [str(col) for col in row[1:]])) + ret_dict[row[0]] = delim.join([str(col) for col in filter(lambda col: col is not None, row[1:])]) return ret_dict From 7479aeec9855ca17a6428bd9157c24eb1f6ab238 Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Thu, 13 May 2021 11:05:23 -0400 Subject: [PATCH 23/30] json-like output --- test/script/tskdbdiff.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/test/script/tskdbdiff.py b/test/script/tskdbdiff.py index 7e22f5009a..ac0f4cb044 100644 --- a/test/script/tskdbdiff.py +++ b/test/script/tskdbdiff.py @@ -1098,18 +1098,14 @@ def write_normalized(guid_utils: TskGuidUtils, output_file, db_conn, table: str, row_dict = row_masker.normalize(guid_utils, row_dict) if row_dict is not None: - # NOTE: This is an alternate approach to representing values as json-like lines - # entries = [] - # for column in column_names: - # value = get_sql_insert_value(row_dict[column] if column in row_dict and row_dict[column] else None) - # if value: - # entries.append((column, value)) - # insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries]) - # insert_statement = f"{table}: {{{insert_values}}}\n" - # output_file.write(insert_statement) - - values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names) - insert_statement = f'INSERT INTO "{table}" VALUES({values_statement});\n' + # show row as json-like value + entries = [] + for column in column_names: + value = get_sql_insert_value(row_dict[column] if column in row_dict and row_dict[column] else None) + if value is not None: + entries.append((column, value)) + insert_values = ", ".join([f"{pr[0]}: {pr[1]}" for pr in entries]) + insert_statement = f"{table}: {{{insert_values}}}\n" output_file.write(insert_statement) From 1042f2844ffc71b07ea1215af859d7b2196bd649 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Thu, 13 May 2021 17:09:04 -0400 Subject: [PATCH 24/30] 7608 fix other occurrences viewer --- .../contentviewer/OtherOccurrencesPanel.java | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java index d6588c2313..12d676200e 100644 --- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java +++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java @@ -380,13 +380,12 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { int totalCount = 0; Set dataSources = new HashSet<>(); if (CentralRepository.isEnabled()) { - try { - List instances; - instances = CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value); + correlationAttributes.addAll(CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value)); HashMap nodeDataMap = new HashMap<>(); String caseUUID = Case.getCurrentCase().getName(); - for (CorrelationAttributeInstance artifactInstance : instances) { + // get the attributes we can correlate on + for (CorrelationAttributeInstance artifactInstance : correlationAttributes) { // Only add the attribute if it isn't the object the user selected. // We consider it to be a different object if at least one of the following is true: @@ -395,10 +394,9 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { // - the data source device ID is different // - the file path is different if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID) - || (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName)) - || (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId)) - || (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { - correlationAttributes.add(artifactInstance); + && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName)) + && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId)) + && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { continue; } OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, aType, value); @@ -510,7 +508,7 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { * artifact. If the central repo is not enabled, this will only return files * from the current case with matching MD5 hashes. * - * @param corAttr CorrelationAttribute to query for + * @param corAttr CorrelationAttribute to query for * * @return A collection of correlated artifact instances */ @@ -533,9 +531,9 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { // - the data source device ID is different // - the file path is different if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID) - || (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName)) - || (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId)) - || (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { + && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName)) + && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId)) + && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { continue; } OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, corAttr.getCorrelationType(), corAttr.getCorrelationValue()); From abfbc3106e15cf86c5ef66ce625f903713bdcf88 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Thu, 13 May 2021 17:34:54 -0400 Subject: [PATCH 25/30] 7608 fix discovery use case and add comment --- .../contentviewer/OtherOccurrencesPanel.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java index 12d676200e..b0d24cad06 100644 --- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java +++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java @@ -381,11 +381,12 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { Set dataSources = new HashSet<>(); if (CentralRepository.isEnabled()) { try { - correlationAttributes.addAll(CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value)); + List instances; + instances = CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value); HashMap nodeDataMap = new HashMap<>(); String caseUUID = Case.getCurrentCase().getName(); // get the attributes we can correlate on - for (CorrelationAttributeInstance artifactInstance : correlationAttributes) { + for (CorrelationAttributeInstance artifactInstance : instances) { // Only add the attribute if it isn't the object the user selected. // We consider it to be a different object if at least one of the following is true: @@ -396,7 +397,9 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID) && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName)) && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId)) - && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { + && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { + //because we are only correlating on one type we can add that only when everything is the same + correlationAttributes.add(artifactInstance); continue; } OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, aType, value); From f5b017362c43c3ec681d1497cb4a5e65b7d42498 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Thu, 13 May 2021 18:44:03 -0400 Subject: [PATCH 26/30] 7608 fix the discovery version --- .../contentviewer/OtherOccurrencesPanel.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java index b0d24cad06..f1e724c092 100644 --- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java +++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java @@ -397,11 +397,10 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { if (artifactInstance.getCorrelationCase().getCaseUUID().equals(caseUUID) && (!StringUtils.isBlank(dataSourceName) && artifactInstance.getCorrelationDataSource().getName().equals(dataSourceName)) && (!StringUtils.isBlank(deviceId) && artifactInstance.getCorrelationDataSource().getDeviceID().equals(deviceId)) - && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { - //because we are only correlating on one type we can add that only when everything is the same - correlationAttributes.add(artifactInstance); + && (file != null && artifactInstance.getFilePath().equalsIgnoreCase(file.getParentPath() + file.getName()))) { continue; } + correlationAttributes.add(artifactInstance); OtherOccurrenceNodeInstanceData newNode = new OtherOccurrenceNodeInstanceData(artifactInstance, aType, value); UniquePathKey uniquePathKey = new UniquePathKey(newNode); nodeDataMap.put(uniquePathKey, newNode); From 9ecf7c77ddb17eed8d200edeb4a50a07c9524aa8 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Thu, 13 May 2021 18:45:55 -0400 Subject: [PATCH 27/30] 7608 remove wrong comment --- .../centralrepository/contentviewer/OtherOccurrencesPanel.java | 1 - 1 file changed, 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java index f1e724c092..1be181bff2 100644 --- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java +++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java @@ -385,7 +385,6 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { instances = CentralRepository.getInstance().getArtifactInstancesByTypeValue(aType, value); HashMap nodeDataMap = new HashMap<>(); String caseUUID = Case.getCurrentCase().getName(); - // get the attributes we can correlate on for (CorrelationAttributeInstance artifactInstance : instances) { // Only add the attribute if it isn't the object the user selected. From a914ab6295f771809dcd02cb95f77071f4498c11 Mon Sep 17 00:00:00 2001 From: William Schaefer Date: Fri, 14 May 2021 10:50:40 -0400 Subject: [PATCH 28/30] 7613 add pop up menu back --- .../centralrepository/contentviewer/OtherOccurrencesPanel.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java index d6588c2313..45bbceed2b 100644 --- a/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java +++ b/Core/src/org/sleuthkit/autopsy/centralrepository/contentviewer/OtherOccurrencesPanel.java @@ -130,7 +130,7 @@ public final class OtherOccurrencesPanel extends javax.swing.JPanel { exportToCSVMenuItem.addActionListener(actList); showCaseDetailsMenuItem.addActionListener(actList); showCommonalityMenuItem.addActionListener(actList); - + filesTable.setComponentPopupMenu(rightClickPopupMenu); // Configure column sorting. TableRowSorter sorter = new TableRowSorter<>(filesTable.getModel()); filesTable.setRowSorter(sorter); From 2f5790c2fee67a749a374b9e723c68ebaa80dfa6 Mon Sep 17 00:00:00 2001 From: Mark McKinnon Date: Sat, 15 May 2021 22:19:41 -0400 Subject: [PATCH 29/30] Update ExtractRegistry.java Install date from regripper is UTC time, when it is parsed it does not recognize a timezone so it defaults to current timezone of pc, or that appears to happen. When UTC timezone added to parse it stores the epoch time correctly. --- .../org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java index b0d7fe08c3..7875dd794a 100644 --- a/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java +++ b/RecentActivity/src/org/sleuthkit/autopsy/recentactivity/ExtractRegistry.java @@ -631,7 +631,7 @@ class ExtractRegistry extends Extract { case "InstallDate": //NON-NLS if (value != null && !value.isEmpty()) { try { - installtime = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyy", US).parse(value).getTime(); + installtime = new SimpleDateFormat("EEE MMM d HH:mm:ss yyyyZ", US).parse(value+"+0000").getTime(); String Tempdate = installtime.toString(); installtime = Long.valueOf(Tempdate) / MS_IN_SEC; } catch (ParseException e) { From cc66187e22b7f915eaa36574a7dd79d1c919dbdc Mon Sep 17 00:00:00 2001 From: Greg DiCristofaro Date: Tue, 18 May 2021 08:06:39 -0400 Subject: [PATCH 30/30] unit test fixes for changes in blackboard artifact type constructors --- .../datamodel/DataSourceInfoUtilitiesTest.java | 2 +- .../datasourcesummary/datamodel/UserActivitySummaryTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java index 7d2453c221..1b5d809b1f 100644 --- a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java +++ b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/DataSourceInfoUtilitiesTest.java @@ -287,7 +287,7 @@ public class DataSourceInfoUtilitiesTest { @Test public void getArtifacts_failOnBytes() throws TskCoreException { testFailOnBadAttrType( - new BlackboardArtifact.Type(999, "BYTE_ARRAY_TYPE", "Byte Array Type", BlackboardArtifact.Category.DATA_ARTIFACT), + BlackboardArtifact.Type.TSK_YARA_HIT, new BlackboardAttribute.Type(999, "BYTE_ARR_ATTR_TYPE", "Byte Array Attribute Type", TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE.BYTE), new byte[]{0x0, 0x1, 0x2}, BlackboardAttribute::new); diff --git a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java index 572d4f86b3..2b6a1c6600 100644 --- a/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java +++ b/Core/test/unit/src/org/sleuthkit/autopsy/datasourcesummary/datamodel/UserActivitySummaryTest.java @@ -353,7 +353,7 @@ public class UserActivitySummaryTest { List results = summary.getRecentDevices(dataSource, 10); Assert.assertEquals(1, results.size()); - Assert.assertEquals((long) (DAY_SECONDS + 2), results.get(0).getLastAccessed().getTime() / 1000); + Assert.assertEquals((DAY_SECONDS + 2), results.get(0).getLastAccessed().getTime() / 1000); Assert.assertTrue("ID1".equalsIgnoreCase(results.get(0).getDeviceId())); Assert.assertTrue("MAKE1".equalsIgnoreCase(results.get(0).getDeviceMake())); Assert.assertTrue("MODEL1".equalsIgnoreCase(results.get(0).getDeviceModel()));