mirror of
https://github.com/overcuriousity/autopsy-flatpak.git
synced 2025-07-06 21:00:22 +00:00
integrated into tskdbdiff
This commit is contained in:
parent
2a4d3c0c8f
commit
9d30b40846
@ -1,206 +0,0 @@
|
||||
from typing import List, Dict, Callable, Union
|
||||
|
||||
import psycopg2
|
||||
import sqlite3
|
||||
|
||||
|
||||
def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'")
|
||||
tables = list([table[0] for table in cur.fetchall()])
|
||||
cur.close()
|
||||
|
||||
to_ret = {}
|
||||
for table in tables:
|
||||
cur = conn.cursor()
|
||||
cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table])
|
||||
to_ret[table] = list([col[0] for col in cur.fetchall()])
|
||||
|
||||
return to_ret
|
||||
|
||||
|
||||
IGNORE_TABLE = "IGNORE_TABLE"
|
||||
|
||||
|
||||
class TskDbEnvironment:
|
||||
pass
|
||||
|
||||
|
||||
class MaskRow:
|
||||
row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Dict[str, any]]
|
||||
|
||||
def __init__(self, row_masker: Callable[[TskDbEnvironment, Dict[str, any]], Union[Dict[str, any], None]]):
|
||||
self.row_masker = row_masker
|
||||
|
||||
def mask(self, db_env: TskDbEnvironment, row: Dict[str, any]) -> Union[Dict[str, any], None]:
|
||||
return self.row_masker(db_env, row)
|
||||
|
||||
|
||||
class MaskColumns(MaskRow):
|
||||
@classmethod
|
||||
def _mask_col_vals(cls,
|
||||
col_mask: Dict[str, Union[any, Callable[[TskDbEnvironment, any], any]]],
|
||||
db_env: TskDbEnvironment,
|
||||
row: Dict[str, any]):
|
||||
|
||||
row_copy = dict.copy()
|
||||
for key, val in col_mask:
|
||||
# only replace values if present in row
|
||||
if key in row_copy:
|
||||
# if a column replacing function, call with original value
|
||||
if isinstance(val, Callable):
|
||||
row_copy[key] = val(db_env, row[key])
|
||||
# otherwise, just replace with mask value
|
||||
else:
|
||||
row_copy[key] = val
|
||||
|
||||
return row_copy
|
||||
|
||||
def __init__(self, col_mask: Dict[str, Union[any, Callable[[any], any]]]):
|
||||
super().__init__(lambda db_env, row: MaskColumns._mask_col_vals(col_mask, db_env, row))
|
||||
|
||||
|
||||
TableNormalization = Union[IGNORE_TABLE, MaskRow]
|
||||
|
||||
|
||||
MASKED_OBJ_ID = "MASKED_OBJ_ID"
|
||||
MASKED_ID = "MASKED_ID"
|
||||
|
||||
table_masking: Dict[str, TableNormalization] = {
|
||||
"tsk_files": MaskColumns({
|
||||
# TODO
|
||||
}),
|
||||
|
||||
"tsk_vs_parts": MaskColumns({
|
||||
"obj_id": MASKED_OBJ_ID
|
||||
}),
|
||||
"image_gallery_groups": MaskColumns({
|
||||
"obj_id": MASKED_OBJ_ID
|
||||
}),
|
||||
"image_gallery_groups_seen": IGNORE_TABLE,
|
||||
# NOTE there was code in normalization for this, but the table is ignored?
|
||||
# "image_gallery_groups_seen": MaskColumns({
|
||||
# "id": MASKED_ID,
|
||||
# "group_id": MASKED_ID,
|
||||
# }),
|
||||
# TODO
|
||||
"tsk_files_path": None,
|
||||
# TODO
|
||||
"tsk_file_layout": None,
|
||||
"tsk_objects": None,
|
||||
"reports": MaskColumns({
|
||||
"obj_id": MASKED_OBJ_ID,
|
||||
"path": "AutopsyTestCase",
|
||||
"crtime": 0
|
||||
}),
|
||||
"data_source_info": MaskColumns({
|
||||
"device_id": "{device id}",
|
||||
"added_date_time": "{dateTime}"
|
||||
}),
|
||||
# TODO
|
||||
"ingest_jobs": None,
|
||||
"tsk_examiners": MaskColumns({
|
||||
"login_name": "{examiner_name}"
|
||||
}),
|
||||
"tsk_events": MaskColumns({
|
||||
"event_id": "MASKED_EVENT_ID",
|
||||
"time": 0,
|
||||
}),
|
||||
# TODO
|
||||
"event_description_index": None,
|
||||
"tsk_os_accounts": MaskColumns({
|
||||
"os_account_obj_id": MASKED_OBJ_ID
|
||||
}),
|
||||
# TODO
|
||||
"tsk_data_artifacts": None
|
||||
}
|
||||
|
||||
|
||||
# files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1
|
||||
# path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1
|
||||
# object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1
|
||||
# vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1
|
||||
# report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1
|
||||
# layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1
|
||||
# data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find(
|
||||
# 'INSERT INTO data_source_info ') > -1
|
||||
# event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find(
|
||||
# 'INSERT INTO tsk_event_descriptions ') > -1
|
||||
# events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1
|
||||
# ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1
|
||||
# examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1
|
||||
# ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find(
|
||||
# 'INSERT INTO image_gallery_groups ') > -1
|
||||
# ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find(
|
||||
# 'INSERT INTO image_gallery_groups_seen ') > -1
|
||||
# os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1
|
||||
# os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find(
|
||||
# 'INSERT INTO tsk_os_account_attributes') > -1
|
||||
# os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find(
|
||||
# 'INSERT INTO tsk_os_account_instances') > -1
|
||||
# data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find(
|
||||
# 'INSERT INTO tsk_data_artifacts') > -1
|
||||
|
||||
def get_pg_table_columns(conn) -> Dict[str, List[str]]:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT cols.table_name, cols.column_name
|
||||
FROM information_schema.columns cols
|
||||
WHERE cols.column_name IS NOT NULL
|
||||
AND cols.table_name IS NOT NULL
|
||||
AND cols.table_name IN (
|
||||
SELECT tables.tablename FROM pg_catalog.pg_tables tables
|
||||
WHERE LOWER(schemaname) = 'public'
|
||||
)
|
||||
ORDER by cols.table_name, cols.ordinal_position;
|
||||
""")
|
||||
mapping = {}
|
||||
for row in cursor:
|
||||
mapping.setdefault(row[0], []).append(row[1])
|
||||
|
||||
cursor.close()
|
||||
return mapping
|
||||
|
||||
|
||||
def get_sql_insert_value(val) -> str:
|
||||
if not val:
|
||||
return "NULL"
|
||||
|
||||
if isinstance(val, str):
|
||||
escaped_val = val.replace('\n', '\\n').replace("'", "''")
|
||||
return f"'{escaped_val}'"
|
||||
|
||||
return str(val)
|
||||
|
||||
|
||||
def write_normalized(output_file, db_conn, table: str, column_names: List[str], normalizer=None):
|
||||
cursor = db_conn.cursor()
|
||||
|
||||
joined_columns = ",".join([col for col in column_names])
|
||||
cursor.execute(f"SELECT {joined_columns} FROM {table}")
|
||||
for row in cursor:
|
||||
if len(row) != len(column_names):
|
||||
print(f"ERROR: in {table}, number of columns retrieved: {len(row)} but columns are {len(column_names)} with {str(column_names)}")
|
||||
continue
|
||||
|
||||
row_dict = {}
|
||||
for col_idx in range(0, len(column_names)):
|
||||
row_dict[column_names[col_idx]] = row[col_idx]
|
||||
|
||||
if normalizer:
|
||||
row_dict = normalizer(table, row_dict)
|
||||
|
||||
values_statement = ",".join(get_sql_insert_value(row_dict[col]) for col in column_names)
|
||||
insert_statement = f'INSERT INTO "{table}" VALUES({values_statement})\n'
|
||||
output_file.write(insert_statement)
|
||||
|
||||
|
||||
|
||||
|
||||
#with sqlite3.connect(r"C:\Users\gregd\Desktop\autopsy_412.db") as conn, \
|
||||
with psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345") as conn, \
|
||||
open(r"C:\Users\gregd\Desktop\dbdump.sql", mode="w", encoding='utf-8') as output_file:
|
||||
|
||||
for table, cols in get_pg_table_columns(conn).items():
|
||||
# for table, cols in get_sqlite_table_columns(conn).items():
|
||||
write_normalized(output_file, conn, table, cols)
|
File diff suppressed because it is too large
Load Diff
@ -1,969 +0,0 @@
|
||||
# Requires python3
|
||||
|
||||
import re
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import shutil
|
||||
import os
|
||||
import codecs
|
||||
import datetime
|
||||
import sys
|
||||
from typing import Dict, List
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import socket
|
||||
import csv
|
||||
|
||||
class TskDbDiff(object):
|
||||
"""Compares two TSK/Autospy SQLite databases.
|
||||
|
||||
Attributes:
|
||||
gold_artifacts:
|
||||
autopsy_artifacts:
|
||||
gold_attributes:
|
||||
autopsy_attributes:
|
||||
gold_objects:
|
||||
autopsy_objects:
|
||||
artifact_comparison:
|
||||
attribute_comparision:
|
||||
report_errors: a listof_listof_String, the error messages that will be
|
||||
printed to screen in the run_diff method
|
||||
passed: a boolean, did the diff pass?
|
||||
autopsy_db_file:
|
||||
gold_db_file:
|
||||
"""
|
||||
def __init__(self, output_db, gold_db, output_dir=None, gold_bb_dump=None, gold_dump=None, verbose=False, isMultiUser=False, pgSettings=None):
|
||||
"""Constructor for TskDbDiff.
|
||||
|
||||
Args:
|
||||
output_db_path: path to output database (non-gold standard)
|
||||
gold_db_path: path to gold database
|
||||
output_dir: (optional) Path to folder where generated files will be put.
|
||||
gold_bb_dump: (optional) path to file where the gold blackboard dump is located
|
||||
gold_dump: (optional) path to file where the gold non-blackboard dump is located
|
||||
verbose: (optional) a boolean, if true, diff results are sent to stdout.
|
||||
"""
|
||||
|
||||
self.output_db_file = output_db
|
||||
self.gold_db_file = gold_db
|
||||
self.output_dir = output_dir
|
||||
self.gold_bb_dump = gold_bb_dump
|
||||
self.gold_dump = gold_dump
|
||||
self._generate_gold_dump = False
|
||||
self._generate_gold_bb_dump = False
|
||||
self._bb_dump_diff = ""
|
||||
self._dump_diff = ""
|
||||
self._bb_dump = ""
|
||||
self._dump = ""
|
||||
self.verbose = verbose
|
||||
self.isMultiUser = isMultiUser
|
||||
self.pgSettings = pgSettings
|
||||
|
||||
if self.isMultiUser and not self.pgSettings:
|
||||
print("Missing PostgreSQL database connection settings data.")
|
||||
sys.exit(1)
|
||||
|
||||
if self.gold_bb_dump is None:
|
||||
self._generate_gold_bb_dump = True
|
||||
if self.gold_dump is None:
|
||||
self._generate_gold_dump = True
|
||||
|
||||
def run_diff(self):
|
||||
"""Compare the databases.
|
||||
|
||||
Raises:
|
||||
TskDbDiffException: if an error occurs while diffing or dumping the database
|
||||
"""
|
||||
|
||||
self._init_diff()
|
||||
id_obj_path_table = -1
|
||||
# generate the gold database dumps if necessary
|
||||
if self._generate_gold_dump:
|
||||
id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.gold_db_file, self.gold_dump, self.isMultiUser, self.pgSettings)
|
||||
if self._generate_gold_bb_dump:
|
||||
TskDbDiff._dump_output_db_bb(self.gold_db_file, self.gold_bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table)
|
||||
|
||||
# generate the output database dumps (both DB and BB)
|
||||
id_obj_path_table = TskDbDiff._dump_output_db_nonbb(self.output_db_file, self._dump, self.isMultiUser, self.pgSettings)
|
||||
TskDbDiff._dump_output_db_bb(self.output_db_file, self._bb_dump, self.isMultiUser, self.pgSettings, id_obj_path_table)
|
||||
|
||||
# Compare non-BB
|
||||
dump_diff_pass = self._diff(self._dump, self.gold_dump, self._dump_diff)
|
||||
|
||||
# Compare BB
|
||||
bb_dump_diff_pass = self._diff(self._bb_dump, self.gold_bb_dump, self._bb_dump_diff)
|
||||
|
||||
self._cleanup_diff()
|
||||
return dump_diff_pass, bb_dump_diff_pass
|
||||
|
||||
|
||||
def _init_diff(self):
|
||||
"""Set up the necessary files based on the arguments given at construction"""
|
||||
if self.output_dir is None:
|
||||
# No stored files
|
||||
self._bb_dump = TskDbDiff._get_tmp_file("BlackboardDump", ".txt")
|
||||
self._bb_dump_diff = TskDbDiff._get_tmp_file("BlackboardDump-Diff", ".txt")
|
||||
self._dump = TskDbDiff._get_tmp_file("DBDump", ".txt")
|
||||
self._dump_diff = TskDbDiff._get_tmp_file("DBDump-Diff", ".txt")
|
||||
else:
|
||||
self._bb_dump = os.path.join(self.output_dir, "BlackboardDump.txt")
|
||||
self._bb_dump_diff = os.path.join(self.output_dir, "BlackboardDump-Diff.txt")
|
||||
self._dump = os.path.join(self.output_dir, "DBDump.txt")
|
||||
self._dump_diff = os.path.join(self.output_dir, "DBDump-Diff.txt")
|
||||
|
||||
# Sorting gold before comparing (sort behaves differently in different environments)
|
||||
new_bb = TskDbDiff._get_tmp_file("GoldBlackboardDump", ".txt")
|
||||
new_db = TskDbDiff._get_tmp_file("GoldDBDump", ".txt")
|
||||
if self.gold_bb_dump is not None:
|
||||
srtcmdlst = ["sort", self.gold_bb_dump, "-o", new_bb]
|
||||
subprocess.call(srtcmdlst)
|
||||
srtcmdlst = ["sort", self.gold_dump, "-o", new_db]
|
||||
subprocess.call(srtcmdlst)
|
||||
self.gold_bb_dump = new_bb
|
||||
self.gold_dump = new_db
|
||||
|
||||
|
||||
def _cleanup_diff(self):
|
||||
if self.output_dir is None:
|
||||
#cleanup temp files
|
||||
os.remove(self._dump)
|
||||
os.remove(self._bb_dump)
|
||||
if os.path.isfile(self._dump_diff):
|
||||
os.remove(self._dump_diff)
|
||||
if os.path.isfile(self._bb_dump_diff):
|
||||
os.remove(self._bb_dump_diff)
|
||||
|
||||
if self.gold_bb_dump is None:
|
||||
os.remove(self.gold_bb_dump)
|
||||
os.remove(self.gold_dump)
|
||||
|
||||
|
||||
def _diff(self, output_file, gold_file, diff_path):
|
||||
"""Compare two text files.
|
||||
|
||||
Args:
|
||||
output_file: a pathto_File, the latest text file
|
||||
gold_file: a pathto_File, the gold text file
|
||||
diff_path: The file to write the differences to
|
||||
Returns False if different
|
||||
"""
|
||||
|
||||
if (not os.path.isfile(output_file)):
|
||||
return False
|
||||
|
||||
if (not os.path.isfile(gold_file)):
|
||||
return False
|
||||
|
||||
# It is faster to read the contents in and directly compare
|
||||
output_data = codecs.open(output_file, "r", "utf_8").read()
|
||||
gold_data = codecs.open(gold_file, "r", "utf_8").read()
|
||||
if (gold_data == output_data):
|
||||
return True
|
||||
|
||||
# If they are different, invoke 'diff'
|
||||
diff_file = codecs.open(diff_path, "wb", "utf_8")
|
||||
# Gold needs to be passed in as 1st arg and output as 2nd
|
||||
dffcmdlst = ["diff", gold_file, output_file]
|
||||
subprocess.call(dffcmdlst, stdout = diff_file)
|
||||
|
||||
# create file path for gold files inside output folder. In case of diff, both gold and current run files
|
||||
# are available in the report output folder. Prefix Gold- is added to the filename.
|
||||
gold_file_in_output_dir = output_file[:output_file.rfind("/")] + "/Gold-" + output_file[output_file.rfind("/")+1:]
|
||||
shutil.copy(gold_file, gold_file_in_output_dir)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table):
|
||||
"""Dumps sorted text results to the given output location.
|
||||
|
||||
Smart method that deals with a blackboard comparison to avoid issues
|
||||
with different IDs based on when artifacts were created.
|
||||
|
||||
Args:
|
||||
db_file: a pathto_File, the output database.
|
||||
bb_dump_file: a pathto_File, the sorted dump file to write to
|
||||
"""
|
||||
|
||||
unsorted_dump = TskDbDiff._get_tmp_file("dump_data", ".txt")
|
||||
if isMultiUser:
|
||||
conn, unused_db = db_connect(db_file, isMultiUser, pgSettings)
|
||||
artifact_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||||
else: # Use Sqlite
|
||||
conn = sqlite3.connect(db_file)
|
||||
conn.text_factory = lambda x: x.decode("utf-8", "ignore")
|
||||
conn.row_factory = sqlite3.Row
|
||||
artifact_cursor = conn.cursor()
|
||||
# Get the list of all artifacts (along with type and associated file)
|
||||
# @@@ Could add a SORT by parent_path in here since that is how we are going to later sort it.
|
||||
artifact_cursor.execute("SELECT tsk_files.parent_path, tsk_files.name, blackboard_artifact_types.display_name, blackboard_artifacts.artifact_id FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id")
|
||||
database_log = codecs.open(unsorted_dump, "wb", "utf_8")
|
||||
row = artifact_cursor.fetchone()
|
||||
appnd = False
|
||||
counter = 0
|
||||
artifact_count = 0
|
||||
artifact_fail = 0
|
||||
|
||||
# Cycle through artifacts
|
||||
try:
|
||||
while (row != None):
|
||||
|
||||
# File Name and artifact type
|
||||
# Remove parent object ID from Unalloc file name
|
||||
normalizedName = re.sub('^Unalloc_[0-9]+_', 'Unalloc_', row["name"])
|
||||
if(row["parent_path"] != None):
|
||||
database_log.write(row["parent_path"] + normalizedName + ' <artifact type="' + row["display_name"] + '" > ')
|
||||
else:
|
||||
database_log.write(normalizedName + ' <artifact type="' + row["display_name"] + '" > ')
|
||||
|
||||
if isMultiUser:
|
||||
attribute_cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||||
else:
|
||||
attribute_cursor = conn.cursor()
|
||||
looptry = True
|
||||
artifact_count += 1
|
||||
try:
|
||||
art_id = ""
|
||||
art_id = str(row["artifact_id"])
|
||||
|
||||
# Get attributes for this artifact
|
||||
if isMultiUser:
|
||||
attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id = %s ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id])
|
||||
else:
|
||||
attribute_cursor.execute("SELECT blackboard_attributes.source, blackboard_attributes.attribute_type_id, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double FROM blackboard_attributes INNER JOIN blackboard_attribute_types ON blackboard_attributes.attribute_type_id = blackboard_attribute_types.attribute_type_id WHERE artifact_id =? ORDER BY blackboard_attributes.source, blackboard_attribute_types.display_name, blackboard_attributes.value_type, blackboard_attributes.value_text, blackboard_attributes.value_int32, blackboard_attributes.value_int64, blackboard_attributes.value_double", [art_id])
|
||||
|
||||
attributes = attribute_cursor.fetchall()
|
||||
|
||||
# Print attributes
|
||||
if (len(attributes) == 0):
|
||||
# @@@@ This should be </artifact>
|
||||
database_log.write(' <artifact/>\n')
|
||||
row = artifact_cursor.fetchone()
|
||||
continue
|
||||
|
||||
src = attributes[0][0]
|
||||
for attr in attributes:
|
||||
numvals = 0
|
||||
for x in range(3, 6):
|
||||
if(attr[x] != None):
|
||||
numvals += 1
|
||||
if(numvals > 1):
|
||||
msg = "There were too many values for attribute type: " + attr["display_name"] + " for artifact with id #" + str(row["artifact_id"]) + ".\n"
|
||||
|
||||
if(not attr["source"] == src):
|
||||
msg = "There were inconsistent sources for artifact with id #" + str(row["artifact_id"]) + ".\n"
|
||||
|
||||
try:
|
||||
if attr["value_type"] == 0:
|
||||
attr_value_as_string = str(attr["value_text"])
|
||||
elif attr["value_type"] == 1:
|
||||
attr_value_as_string = str(attr["value_int32"])
|
||||
elif attr["value_type"] == 2:
|
||||
attr_value_as_string = str(attr["value_int64"])
|
||||
if attr["attribute_type_id"] == 36 and id_obj_path_table != -1 and int(attr_value_as_string) > 0: #normalize positive TSK_PATH_IDs from being object id to a path if the obj_id_path_table was generated
|
||||
attr_value_as_string = id_obj_path_table[int(attr_value_as_string)]
|
||||
elif attr["value_type"] == 3:
|
||||
attr_value_as_string = "%20.10f" % float((attr["value_double"])) #use exact format from db schema to avoid python auto format double value to (0E-10) scientific style
|
||||
elif attr["value_type"] == 4:
|
||||
attr_value_as_string = "bytes"
|
||||
elif attr["value_type"] == 5:
|
||||
attr_value_as_string = str(attr["value_int64"])
|
||||
if attr["display_name"] == "Associated Artifact":
|
||||
attr_value_as_string = getAssociatedArtifactType(attribute_cursor, attr_value_as_string, isMultiUser)
|
||||
patrn = re.compile("[\n\0\a\b\r\f]")
|
||||
attr_value_as_string = re.sub(patrn, ' ', attr_value_as_string)
|
||||
if attr["source"] == "Keyword Search" and attr["display_name"] == "Keyword Preview":
|
||||
attr_value_as_string = "<Keyword Preview placeholder>"
|
||||
database_log.write('<attribute source="' + attr["source"] + '" type="' + attr["display_name"] + '" value="' + attr_value_as_string + '" />')
|
||||
except IOError as e:
|
||||
print("IO error")
|
||||
raise TskDbDiffException("Unexpected IO error while writing to database log." + str(e))
|
||||
|
||||
except sqlite3.Error as e:
|
||||
msg = "Attributes in artifact id (in output DB)# " + str(row["artifact_id"]) + " encountered an error: " + str(e) +" .\n"
|
||||
print("Attributes in artifact id (in output DB)# ", str(row["artifact_id"]), " encountered an error: ", str(e))
|
||||
print()
|
||||
looptry = False
|
||||
artifact_fail += 1
|
||||
database_log.write('Error Extracting Attributes')
|
||||
database_log.close()
|
||||
raise TskDbDiffException(msg)
|
||||
finally:
|
||||
attribute_cursor.close()
|
||||
|
||||
|
||||
# @@@@ This should be </artifact>
|
||||
database_log.write(' <artifact/>\n')
|
||||
row = artifact_cursor.fetchone()
|
||||
|
||||
if(artifact_fail > 0):
|
||||
msg ="There were " + str(artifact_count) + " artifacts and " + str(artifact_fail) + " threw an exception while loading.\n"
|
||||
except Exception as e:
|
||||
raise TskDbDiffException("Unexpected error while dumping blackboard database: " + str(e))
|
||||
finally:
|
||||
database_log.close()
|
||||
artifact_cursor.close()
|
||||
conn.close()
|
||||
|
||||
# Now sort the file
|
||||
srtcmdlst = ["sort", unsorted_dump, "-o", bb_dump_file]
|
||||
subprocess.call(srtcmdlst)
|
||||
|
||||
|
||||
# for key, val in get_pg_table_columns(psycopg2.connect(dbname="jythontest1_20200414_124128", user="postgres", password="password12345")).items():
|
||||
# for key, val in get_sqlite_table_columns(sqlite3.connect(r"C:\Users\gregd\Documents\cases\7500-take4\autopsy.db")).items():
|
||||
# print(f"{key}: {val}")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def _dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings):
|
||||
"""Dumps a database to a text file.
|
||||
|
||||
Does not dump the artifact and attributes.
|
||||
|
||||
Args:
|
||||
db_file: a pathto_File, the database file to dump
|
||||
dump_file: a pathto_File, the location to dump the non-blackboard database items
|
||||
"""
|
||||
|
||||
conn, backup_db_file = db_connect(db_file, isMultiUser, pgSettings)
|
||||
id_files_table = build_id_files_table(conn.cursor(), isMultiUser)
|
||||
id_vs_parts_table = build_id_vs_parts_table(conn.cursor(), isMultiUser)
|
||||
id_vs_info_table = build_id_vs_info_table(conn.cursor(), isMultiUser)
|
||||
id_fs_info_table = build_id_fs_info_table(conn.cursor(), isMultiUser)
|
||||
id_objects_table = build_id_objects_table(conn.cursor(), isMultiUser)
|
||||
id_artifact_types_table = build_id_artifact_types_table(conn.cursor(), isMultiUser)
|
||||
id_legacy_artifact_types = build_id_legacy_artifact_types_table(conn.cursor(), isMultiUser)
|
||||
id_reports_table = build_id_reports_table(conn.cursor(), isMultiUser)
|
||||
id_images_table = build_id_image_names_table(conn.cursor(), isMultiUser)
|
||||
id_accounts_table = build_id_accounts_table(conn.cursor(), isMultiUser)
|
||||
id_obj_path_table = build_id_obj_path_table(id_files_table, id_objects_table, id_artifact_types_table, id_reports_table, id_images_table, id_accounts_table)
|
||||
|
||||
if isMultiUser: # Use PostgreSQL
|
||||
os.environ['PGPASSWORD']=pgSettings.password
|
||||
pgDump = ["pg_dump", "--inserts", "-U", pgSettings.username, "-h", pgSettings.pgHost, "-p", pgSettings.pgPort, "-d", db_file, "-E", "utf-8", "-T", "blackboard_artifacts", "-T", "blackboard_attributes", "-f", "postgreSQLDump.sql"]
|
||||
subprocess.call(pgDump)
|
||||
postgreSQL_db = codecs.open("postgreSQLDump.sql", "r", "utf-8")
|
||||
# Write to the database dump
|
||||
with codecs.open(dump_file, "wb", "utf_8") as db_log:
|
||||
dump_line = ''
|
||||
for line in postgreSQL_db:
|
||||
line = line.strip('\r\n ')
|
||||
# Deal with pg_dump result file
|
||||
if (line.startswith('--') or line.lower().startswith('alter') or "pg_catalog" in line or "idle_in_transaction_session_timeout" in line or not line): # It's comment or alter statement or catalog entry or set idle entry or empty line
|
||||
continue
|
||||
elif not line.endswith(';'): # Statement not finished
|
||||
dump_line += line
|
||||
continue
|
||||
else:
|
||||
dump_line += line
|
||||
if 'INSERT INTO image_gallery_groups_seen' in dump_line:
|
||||
dump_line = ''
|
||||
continue;
|
||||
dump_line = normalize_db_entry(dump_line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
|
||||
db_log.write('%s\n' % dump_line)
|
||||
dump_line = ''
|
||||
postgreSQL_db.close()
|
||||
else: # use Sqlite
|
||||
# Delete the blackboard tables
|
||||
conn.text_factory = lambda x: x.decode("utf-8", "ignore")
|
||||
conn.execute("DROP TABLE blackboard_artifacts")
|
||||
conn.execute("DROP TABLE blackboard_attributes")
|
||||
# Write to the database dump
|
||||
with codecs.open(dump_file, "wb", "utf_8") as db_log:
|
||||
for line in conn.iterdump():
|
||||
if 'INSERT INTO "image_gallery_groups_seen"' in line:
|
||||
continue
|
||||
line = normalize_db_entry(line, id_obj_path_table, id_vs_parts_table, id_vs_info_table, id_fs_info_table, id_objects_table, id_reports_table, id_images_table, id_legacy_artifact_types, id_accounts_table)
|
||||
db_log.write('%s\n' % line)
|
||||
# Now sort the file
|
||||
srtcmdlst = ["sort", dump_file, "-o", dump_file]
|
||||
subprocess.call(srtcmdlst)
|
||||
|
||||
conn.close()
|
||||
# cleanup the backup
|
||||
if backup_db_file:
|
||||
os.remove(backup_db_file)
|
||||
return id_obj_path_table
|
||||
|
||||
|
||||
def dump_output_db(db_file, dump_file, bb_dump_file, isMultiUser, pgSettings):
|
||||
"""Dumps the given database to text files for later comparison.
|
||||
|
||||
Args:
|
||||
db_file: a pathto_File, the database file to dump
|
||||
dump_file: a pathto_File, the location to dump the non-blackboard database items
|
||||
bb_dump_file: a pathto_File, the location to dump the blackboard database items
|
||||
"""
|
||||
id_obj_path_table = TskDbDiff._dump_output_db_nonbb(db_file, dump_file, isMultiUser, pgSettings)
|
||||
TskDbDiff._dump_output_db_bb(db_file, bb_dump_file, isMultiUser, pgSettings, id_obj_path_table)
|
||||
|
||||
|
||||
def _get_tmp_file(base, ext):
|
||||
time = datetime.datetime.now().time().strftime("%H%M%f")
|
||||
return os.path.join(os.environ['TMP'], base + time + ext)
|
||||
|
||||
|
||||
class TskDbDiffException(Exception):
|
||||
pass
|
||||
|
||||
class PGSettings(object):
|
||||
def __init__(self, pgHost=None, pgPort=5432, user=None, password=None):
|
||||
self.pgHost = pgHost
|
||||
self.pgPort = pgPort
|
||||
self.username = user
|
||||
self.password = password
|
||||
|
||||
def get_pgHost(self):
|
||||
return self.pgHost
|
||||
|
||||
def get_pgPort(self):
|
||||
return self.pgPort
|
||||
|
||||
def get_username(self):
|
||||
return self.username
|
||||
|
||||
def get_password(self):
|
||||
return self.password
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def get_sqlite_table_columns(conn) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Retrieves the sqlite public tables and columns from a sqlite connection.
|
||||
Args:
|
||||
conn: The sqlite connection.
|
||||
|
||||
Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value.
|
||||
"""
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT name FROM sqlite_master tables WHERE tables.type='table'")
|
||||
tables = list([table[0] for table in cur.fetchall()])
|
||||
cur.close()
|
||||
|
||||
to_ret = {}
|
||||
for table in tables:
|
||||
cur = conn.cursor()
|
||||
cur.execute('SELECT name FROM pragma_table_info(?) ORDER BY cid', [table])
|
||||
to_ret[table] = list([col[0] for col in cur.fetchall()])
|
||||
cur.close()
|
||||
|
||||
return to_ret
|
||||
|
||||
|
||||
def get_pg_table_columns(conn) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Retrieves the postgres public tables and columns from a pg connection.
|
||||
Args:
|
||||
conn: The pg connection.
|
||||
|
||||
Returns: The mapping of table names to a list of column names in that table where the list is in ordinal value.
|
||||
"""
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("""
|
||||
SELECT cols.table_name, cols.column_name
|
||||
FROM information_schema.columns cols
|
||||
WHERE cols.column_name IS NOT NULL
|
||||
AND cols.table_name IS NOT NULL
|
||||
AND cols.table_name IN (
|
||||
SELECT tables.tablename FROM pg_catalog.pg_tables tables
|
||||
WHERE LOWER(schemaname) = 'public'
|
||||
)
|
||||
ORDER by cols.table_name, cols.ordinal_position;
|
||||
""")
|
||||
mapping = {}
|
||||
for row in cursor:
|
||||
mapping.setdefault(row[0], []).append(row[1])
|
||||
|
||||
cursor.close()
|
||||
return mapping
|
||||
|
||||
|
||||
def normalize_db_entry(line, files_table, vs_parts_table, vs_info_table, fs_info_table, objects_table, reports_table, images_table, artifact_table, accounts_table):
|
||||
""" Make testing more consistent and reasonable by doctoring certain db entries.
|
||||
|
||||
Args:
|
||||
line: a String, the line to remove the object id from.
|
||||
files_table: a map from object ids to file paths.
|
||||
"""
|
||||
|
||||
# Sqlite statement use double quotes for table name, PostgreSQL doesn't. We check both databases results for normalization.
|
||||
files_index = line.find('INSERT INTO "tsk_files"') > -1 or line.find('INSERT INTO tsk_files ') > -1
|
||||
path_index = line.find('INSERT INTO "tsk_files_path"') > -1 or line.find('INSERT INTO tsk_files_path ') > -1
|
||||
object_index = line.find('INSERT INTO "tsk_objects"') > -1 or line.find('INSERT INTO tsk_objects ') > -1
|
||||
vs_parts_index = line.find('INSERT INTO "tsk_vs_parts"') > -1 or line.find('INSERT INTO tsk_vs_parts ') > -1
|
||||
report_index = line.find('INSERT INTO "reports"') > -1 or line.find('INSERT INTO reports ') > -1
|
||||
layout_index = line.find('INSERT INTO "tsk_file_layout"') > -1 or line.find('INSERT INTO tsk_file_layout ') > -1
|
||||
data_source_info_index = line.find('INSERT INTO "data_source_info"') > -1 or line.find('INSERT INTO data_source_info ') > -1
|
||||
event_description_index = line.find('INSERT INTO "tsk_event_descriptions"') > -1 or line.find('INSERT INTO tsk_event_descriptions ') > -1
|
||||
events_index = line.find('INSERT INTO "tsk_events"') > -1 or line.find('INSERT INTO tsk_events ') > -1
|
||||
ingest_job_index = line.find('INSERT INTO "ingest_jobs"') > -1 or line.find('INSERT INTO ingest_jobs ') > -1
|
||||
examiners_index = line.find('INSERT INTO "tsk_examiners"') > -1 or line.find('INSERT INTO tsk_examiners ') > -1
|
||||
ig_groups_index = line.find('INSERT INTO "image_gallery_groups"') > -1 or line.find('INSERT INTO image_gallery_groups ') > -1
|
||||
ig_groups_seen_index = line.find('INSERT INTO "image_gallery_groups_seen"') > -1 or line.find('INSERT INTO image_gallery_groups_seen ') > -1
|
||||
os_account_index = line.find('INSERT INTO "tsk_os_accounts"') > -1 or line.find('INSERT INTO tsk_os_accounts') > -1
|
||||
os_account_attr_index = line.find('INSERT INTO "tsk_os_account_attributes"') > -1 or line.find('INSERT INTO tsk_os_account_attributes') > -1
|
||||
os_account_instances_index = line.find('INSERT INTO "tsk_os_account_instances"') > -1 or line.find('INSERT INTO tsk_os_account_instances') > -1
|
||||
data_artifacts_index = line.find('INSERT INTO "tsk_data_artifacts"') > -1 or line.find('INSERT INTO tsk_data_artifacts') > -1
|
||||
|
||||
parens = line[line.find('(') + 1 : line.rfind(')')]
|
||||
no_space_parens = parens.replace(" ", "")
|
||||
fields_list = list(csv.reader([no_space_parens], quotechar="'"))[0]
|
||||
#Add back in the quotechar for values that were originally wrapped (csv reader consumes this character)
|
||||
fields_list_with_quotes = []
|
||||
ptr = 0
|
||||
for field in fields_list:
|
||||
if(len(field) == 0):
|
||||
field = "'" + field + "'"
|
||||
else:
|
||||
start = no_space_parens.find(field, ptr)
|
||||
if((start - 1) >= 0 and no_space_parens[start - 1] == '\''):
|
||||
if((start + len(field)) < len(no_space_parens) and no_space_parens[start + len(field)] == '\''):
|
||||
field = "'" + field + "'"
|
||||
fields_list_with_quotes.append(field)
|
||||
if(ptr > 0):
|
||||
#Add one for each comma that is used to separate values in the original string
|
||||
ptr+=1
|
||||
ptr += len(field)
|
||||
|
||||
fields_list = fields_list_with_quotes
|
||||
|
||||
# remove object ID
|
||||
if files_index:
|
||||
|
||||
# Ignore TIFF size and hash if extracted from PDFs.
|
||||
# See JIRA-6951 for more details.
|
||||
# index -3 = 3rd from the end, which is extension
|
||||
# index -5 = 5th from the end, which is the parent path.
|
||||
if fields_list[-3] == "'tif'" and fields_list[-5].endswith(".pdf/'"):
|
||||
fields_list[15] = "'SIZE_IGNORED'"
|
||||
fields_list[23] = "'MD5_IGNORED'"
|
||||
fields_list[24] = "'SHA256_IGNORED'"
|
||||
newLine = ('INSERT INTO "tsk_files" VALUES(' + ', '.join(fields_list[1:-1]) + ');') #leave off first (object id) and last (os_account_id) field
|
||||
# Remove object ID from Unalloc file name
|
||||
newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
|
||||
return newLine
|
||||
# remove object ID
|
||||
elif vs_parts_index:
|
||||
newLine = ('INSERT INTO "tsk_vs_parts" VALUES(' + ', '.join(fields_list[1:]) + ');')
|
||||
return newLine
|
||||
# remove group ID
|
||||
elif ig_groups_index:
|
||||
newLine = ('INSERT INTO "image_gallery_groups" VALUES(' + ', '.join(fields_list[1:]) + ');')
|
||||
return newLine
|
||||
#remove id field
|
||||
elif ig_groups_seen_index:
|
||||
# Only removing the id and group_id fields for now. May need to care about examiner_id and seen fields in future.
|
||||
newLine = ('INSERT INTO "image_gallery_groups_seen" VALUES(' + ', '.join(fields_list[2:]) + ');')
|
||||
return newLine
|
||||
# remove object ID
|
||||
elif path_index:
|
||||
obj_id = int(fields_list[0])
|
||||
objValue = files_table[obj_id]
|
||||
# remove the obj_id from ModuleOutput/EmbeddedFileExtractor directory
|
||||
idx_pre = fields_list[1].find('EmbeddedFileExtractor') + len('EmbeddedFileExtractor')
|
||||
if idx_pre > -1:
|
||||
idx_pos = fields_list[1].find('\\', idx_pre + 2)
|
||||
dir_to_replace = fields_list[1][idx_pre + 1 : idx_pos] # +1 to skip the file seperator
|
||||
dir_to_replace = dir_to_replace[0:dir_to_replace.rfind('_')]
|
||||
pathValue = fields_list[1][:idx_pre+1] + dir_to_replace + fields_list[1][idx_pos:]
|
||||
else:
|
||||
pathValue = fields_list[1]
|
||||
# remove localhost from postgres par_obj_name
|
||||
multiOutput_idx = pathValue.find('ModuleOutput')
|
||||
if multiOutput_idx > -1:
|
||||
pathValue = "'" + pathValue[pathValue.find('ModuleOutput'):] #postgres par_obj_name include losthost
|
||||
|
||||
newLine = ('INSERT INTO "tsk_files_path" VALUES(' + objValue + ', ' + pathValue + ', ' + ', '.join(fields_list[2:]) + ');')
|
||||
return newLine
|
||||
# remove object ID
|
||||
elif layout_index:
|
||||
obj_id = fields_list[0]
|
||||
path= files_table[int(obj_id)]
|
||||
newLine = ('INSERT INTO "tsk_file_layout" VALUES(' + path + ', ' + ', '.join(fields_list[1:]) + ');')
|
||||
# Remove object ID from Unalloc file name
|
||||
newLine = re.sub('Unalloc_[0-9]+_', 'Unalloc_', newLine)
|
||||
return newLine
|
||||
# remove object ID
|
||||
elif object_index:
|
||||
obj_id = fields_list[0]
|
||||
parent_id = fields_list[1]
|
||||
newLine = 'INSERT INTO "tsk_objects" VALUES('
|
||||
path = None
|
||||
parent_path = None
|
||||
|
||||
#if obj_id or parent_id is invalid literal, we simple return the values as it is
|
||||
try:
|
||||
obj_id = int(obj_id)
|
||||
if parent_id != 'NULL':
|
||||
parent_id = int(parent_id)
|
||||
except Exception as e:
|
||||
print(obj_id, parent_id)
|
||||
return line
|
||||
|
||||
if obj_id in files_table.keys():
|
||||
path = files_table[obj_id]
|
||||
elif obj_id in vs_parts_table.keys():
|
||||
path = vs_parts_table[obj_id]
|
||||
elif obj_id in vs_info_table.keys():
|
||||
path = vs_info_table[obj_id]
|
||||
elif obj_id in fs_info_table.keys():
|
||||
path = fs_info_table[obj_id]
|
||||
elif obj_id in reports_table.keys():
|
||||
path = reports_table[obj_id]
|
||||
# remove host name (for multi-user) and dates/times from path for reports
|
||||
if path is not None:
|
||||
if 'ModuleOutput' in path:
|
||||
# skip past the host name (if any)
|
||||
path = path[path.find('ModuleOutput'):]
|
||||
if 'BulkExtractor' in path or 'Smirk' in path:
|
||||
# chop off the last folder (which contains a date/time)
|
||||
path = path[:path.rfind('\\')]
|
||||
if 'Reports\\AutopsyTestCase HTML Report' in path:
|
||||
path = 'Reports\\AutopsyTestCase HTML Report'
|
||||
|
||||
if parent_id in files_table.keys():
|
||||
parent_path = files_table[parent_id]
|
||||
elif parent_id in vs_parts_table.keys():
|
||||
parent_path = vs_parts_table[parent_id]
|
||||
elif parent_id in vs_info_table.keys():
|
||||
parent_path = vs_info_table[parent_id]
|
||||
elif parent_id in fs_info_table.keys():
|
||||
parent_path = fs_info_table[parent_id]
|
||||
elif parent_id in images_table.keys():
|
||||
parent_path = images_table[parent_id]
|
||||
elif parent_id in accounts_table.keys():
|
||||
parent_path = accounts_table[parent_id]
|
||||
elif parent_id == 'NULL':
|
||||
parent_path = "NULL"
|
||||
|
||||
# Remove host name (for multi-user) from parent_path
|
||||
if parent_path is not None:
|
||||
if 'ModuleOutput' in parent_path:
|
||||
# skip past the host name (if any)
|
||||
parent_path = parent_path[parent_path.find('ModuleOutput'):]
|
||||
|
||||
if path and parent_path:
|
||||
# Remove object ID from Unalloc file names and regripper output
|
||||
path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', path)
|
||||
path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', path)
|
||||
parent_path = re.sub('Unalloc_[0-9]+_', 'Unalloc_', parent_path)
|
||||
parent_path = re.sub('regripper\-[0-9]+\-full', 'regripper-full', parent_path)
|
||||
return newLine + path + ', ' + parent_path + ', ' + ', '.join(fields_list[2:]) + ');'
|
||||
else:
|
||||
return newLine + '"OBJECT IDS OMITTED", ' + ', '.join(fields_list[2:]) + ');' #omit parent object id and object id when we cant annonymize them
|
||||
# remove time-based information, ie Test_6/11/14 -> Test
|
||||
elif report_index:
|
||||
fields_list[1] = "AutopsyTestCase"
|
||||
fields_list[2] = "0"
|
||||
newLine = ('INSERT INTO "reports" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
|
||||
return newLine
|
||||
elif data_source_info_index:
|
||||
fields_list[1] = "{device id}"
|
||||
fields_list[4] = "{dateTime}"
|
||||
newLine = ('INSERT INTO "data_source_info" VALUES(' + ','.join(fields_list) + ');')
|
||||
return newLine
|
||||
elif ingest_job_index:
|
||||
fields_list[2] = "{host_name}"
|
||||
start_time = int(fields_list[3])
|
||||
end_time = int(fields_list[4])
|
||||
if (start_time <= end_time):
|
||||
fields_list[3] = "0"
|
||||
fields_list[4] = "0"
|
||||
newLine = ('INSERT INTO "ingest_jobs" VALUES(' + ','.join(fields_list) + ');')
|
||||
return newLine
|
||||
elif examiners_index:
|
||||
fields_list[1] = "{examiner_name}"
|
||||
newLine = ('INSERT INTO "tsk_examiners" VALUES(' + ','.join(fields_list) + ');')
|
||||
return newLine
|
||||
# remove all timing dependent columns from events table
|
||||
elif events_index:
|
||||
newLine = ('INSERT INTO "tsk_events" VALUES(' + ','.join(fields_list[1:2]) + ');')
|
||||
return newLine
|
||||
# remove object ids from event description table
|
||||
elif event_description_index:
|
||||
# replace object ids with information that is deterministic
|
||||
file_obj_id = int(fields_list[5])
|
||||
object_id = int(fields_list[4])
|
||||
legacy_artifact_id = 'NULL'
|
||||
if (fields_list[6] != 'NULL'):
|
||||
legacy_artifact_id = int(fields_list[6])
|
||||
if file_obj_id != 'NULL' and file_obj_id in files_table.keys():
|
||||
fields_list[5] = files_table[file_obj_id]
|
||||
if object_id != 'NULL' and object_id in files_table.keys():
|
||||
fields_list[4] = files_table[object_id]
|
||||
if legacy_artifact_id != 'NULL' and legacy_artifact_id in artifact_table.keys():
|
||||
fields_list[6] = artifact_table[legacy_artifact_id]
|
||||
if fields_list[1] == fields_list[2] and fields_list[1] == fields_list[3]:
|
||||
fields_list[1] = cleanupEventDescription(fields_list[1])
|
||||
fields_list[2] = cleanupEventDescription(fields_list[2])
|
||||
fields_list[3] = cleanupEventDescription(fields_list[3])
|
||||
newLine = ('INSERT INTO "tsk_event_descriptions" VALUES(' + ','.join(fields_list[1:]) + ');') # remove report_id
|
||||
return newLine
|
||||
elif os_account_index:
|
||||
newLine = ('INSERT INTO "tsk_os_accounts" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id since value that would be substituted is in diff line already
|
||||
return newLine
|
||||
elif os_account_attr_index:
|
||||
#substitue the account object id for a non changing value
|
||||
os_account_id = int(fields_list[1])
|
||||
fields_list[1] = accounts_table[os_account_id]
|
||||
#substitue the source object id for a non changing value
|
||||
source_obj_id = int(fields_list[3])
|
||||
if source_obj_id in files_table.keys():
|
||||
fields_list[3] = files_table[source_obj_id]
|
||||
elif source_obj_id in vs_parts_table.keys():
|
||||
fields_list[3] = vs_parts_table[source_obj_id]
|
||||
elif source_obj_id in vs_info_table.keys():
|
||||
fields_list[3] = vs_info_table[source_obj_id]
|
||||
elif source_obj_id in fs_info_table.keys():
|
||||
fields_list[3] = fs_info_table[source_obj_id]
|
||||
elif source_obj_id in images_table.keys():
|
||||
fields_list[3] = images_table[source_obj_id]
|
||||
elif source_obj_id in accounts_table.keys():
|
||||
fields_list[3] = accounts_table[source_obj_id]
|
||||
elif source_obj_id == 'NULL':
|
||||
fields_list[3] = "NULL"
|
||||
newLine = ('INSERT INTO "tsk_os_account_attributes" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
|
||||
return newLine
|
||||
elif os_account_instances_index:
|
||||
os_account_id = int(fields_list[1])
|
||||
fields_list[1] = accounts_table[os_account_id]
|
||||
newLine = ('INSERT INTO "tsk_os_account_instances" VALUES(' + ','.join(fields_list[1:]) + ');') # remove id
|
||||
return newLine
|
||||
elif data_artifacts_index:
|
||||
art_obj_id = int(fields_list[0])
|
||||
if art_obj_id in files_table.keys():
|
||||
fields_list[0] = files_table[art_obj_id]
|
||||
else:
|
||||
fields_list[0] = 'Artifact Object ID Omitted'
|
||||
account_obj_id = int(fields_list[1])
|
||||
if account_obj_id in files_table.keys():
|
||||
fields_list[1] = files_table[account_obj_id]
|
||||
else:
|
||||
fields_list[1] = 'Account Object ID Omitted'
|
||||
newLine = ('INSERT INTO "tsk_data_artifacts" VALUES(' + ','.join(fields_list[:]) + ');') # remove ids
|
||||
return newLine
|
||||
else:
|
||||
return line
|
||||
|
||||
def cleanupEventDescription(description):
|
||||
test = re.search("^'\D+:\d+'$", description)
|
||||
if test is not None:
|
||||
return re.sub(":\d+", ":<artifact_id>", description)
|
||||
else:
|
||||
return description
|
||||
|
||||
def getAssociatedArtifactType(cur, artifact_id, isMultiUser):
|
||||
if isMultiUser:
|
||||
cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=%s",[artifact_id])
|
||||
else:
|
||||
cur.execute("SELECT tsk_files.parent_path, blackboard_artifact_types.display_name FROM blackboard_artifact_types INNER JOIN blackboard_artifacts ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id INNER JOIN tsk_files ON tsk_files.obj_id = blackboard_artifacts.obj_id WHERE artifact_id=?",[artifact_id])
|
||||
|
||||
info = cur.fetchone()
|
||||
|
||||
return "File path: " + info[0] + " Artifact Type: " + info[1]
|
||||
|
||||
def build_id_files_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to file paths.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id, parent path, and name, then create a tuple in the dictionary
|
||||
# with the object id as the key and the full file path (parent + name) as the value
|
||||
mapping = dict([(row[0], str(row[1]) + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, parent_path, name FROM tsk_files")])
|
||||
return mapping
|
||||
|
||||
def build_id_vs_parts_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to vs_parts.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id, addr, and start, then create a tuple in the dictionary
|
||||
# with the object id as the key and (addr + start) as the value
|
||||
mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, addr, start FROM tsk_vs_parts")])
|
||||
return mapping
|
||||
|
||||
def build_id_vs_info_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to vs_info.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id, vs_type, and img_offset, then create a tuple in the dictionary
|
||||
# with the object id as the key and (vs_type + img_offset) as the value
|
||||
mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, vs_type, img_offset FROM tsk_vs_info")])
|
||||
return mapping
|
||||
|
||||
|
||||
def build_id_fs_info_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to fs_info.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id, img_offset, and fs_type, then create a tuple in the dictionary
|
||||
# with the object id as the key and (img_offset + fs_type) as the value
|
||||
mapping = dict([(row[0], str(row[1]) + '_' + str(row[2])) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, img_offset, fs_type FROM tsk_fs_info")])
|
||||
return mapping
|
||||
|
||||
def build_id_objects_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to par_id.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
|
||||
# with the object id as the key and par_obj_id, type as the value
|
||||
mapping = dict([(row[0], [row[1], row[2]]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT * FROM tsk_objects")])
|
||||
return mapping
|
||||
|
||||
def build_id_image_names_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to name.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id and name then create a tuple in the dictionary
|
||||
# with the object id as the key and name, type as the value
|
||||
mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, name FROM tsk_image_names WHERE sequence=0")])
|
||||
#data_sources which are logical file sets will be found in the files table
|
||||
return mapping
|
||||
|
||||
def build_id_artifact_types_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to artifact ids.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id, par_obj_id, then create a tuple in the dictionary
|
||||
# with the object id as the key and artifact type as the value
|
||||
mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_obj_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
|
||||
return mapping
|
||||
|
||||
def build_id_legacy_artifact_types_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of legacy artifact ids to artifact type.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the legacy artifact id then create a tuple in the dictionary
|
||||
# with the artifact id as the key and artifact type as the value
|
||||
mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT blackboard_artifacts.artifact_id, blackboard_artifact_types.type_name FROM blackboard_artifacts INNER JOIN blackboard_artifact_types ON blackboard_artifact_types.artifact_type_id = blackboard_artifacts.artifact_type_id ")])
|
||||
return mapping
|
||||
|
||||
def build_id_reports_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of report object ids to report path.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the reports table in the db, create an obj_id -> path map
|
||||
mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT obj_id, path FROM reports")])
|
||||
return mapping
|
||||
|
||||
def build_id_accounts_table(db_cursor, isPostgreSQL):
|
||||
"""Build the map of object ids to OS account SIDs.
|
||||
|
||||
Args:
|
||||
db_cursor: the database cursor
|
||||
"""
|
||||
# for each row in the db, take the object id and account SID then creates a tuple in the dictionary
|
||||
# with the object id as the key and the OS Account's SID as the value
|
||||
mapping = dict([(row[0], row[1]) for row in sql_select_execute(db_cursor, isPostgreSQL, "SELECT os_account_obj_id, addr FROM tsk_os_accounts")])
|
||||
return mapping
|
||||
|
||||
def build_id_obj_path_table(files_table, objects_table, artifacts_table, reports_table, images_table, accounts_table):
|
||||
"""Build the map of object ids to artifact ids.
|
||||
|
||||
Args:
|
||||
files_table: obj_id, path
|
||||
objects_table: obj_id, par_obj_id, type
|
||||
artifacts_table: obj_id, artifact_type_name
|
||||
reports_table: obj_id, path
|
||||
images_table: obj_id, name
|
||||
accounts_table: obj_id, addr
|
||||
"""
|
||||
# make a copy of files_table and update it with new data from artifacts_table and reports_table
|
||||
mapping = files_table.copy()
|
||||
for k, v in objects_table.items():
|
||||
path = ""
|
||||
if k not in mapping.keys(): # If the mapping table doesn't have data for obj_id
|
||||
if k in reports_table.keys(): # For a report we use the report path
|
||||
par_obj_id = v[0]
|
||||
if par_obj_id is not None:
|
||||
mapping[k] = reports_table[k]
|
||||
elif k in artifacts_table.keys(): # For an artifact we use it's par_obj_id's path+name plus it's artifact_type name
|
||||
par_obj_id = v[0] # The parent of an artifact can be a file or a report
|
||||
if par_obj_id in mapping.keys():
|
||||
path = mapping[par_obj_id]
|
||||
elif par_obj_id in reports_table.keys():
|
||||
path = reports_table[par_obj_id]
|
||||
elif par_obj_id in images_table.keys():
|
||||
path = images_table[par_obj_id]
|
||||
mapping[k] = path + "/" + artifacts_table[k]
|
||||
elif k in accounts_table.keys(): # For an OS Account object ID we use its addr field which is the account SID
|
||||
mapping[k] = accounts_table[k]
|
||||
elif v[0] not in mapping.keys():
|
||||
if v[0] in artifacts_table.keys():
|
||||
par_obj_id = objects_table[v[0]]
|
||||
path = mapping[par_obj_id]
|
||||
mapping[k] = path + "/" + artifacts_table[v[0]]
|
||||
return mapping
|
||||
|
||||
def db_connect(db_file, isMultiUser, pgSettings=None):
|
||||
if isMultiUser: # use PostgreSQL
|
||||
try:
|
||||
return psycopg2.connect("dbname=" + db_file + " user=" + pgSettings.username + " host=" + pgSettings.pgHost + " password=" + pgSettings.password), None
|
||||
except:
|
||||
print("Failed to connect to the database: " + db_file)
|
||||
else: # Sqlite
|
||||
# Make a copy that we can modify
|
||||
backup_db_file = TskDbDiff._get_tmp_file("tsk_backup_db", ".db")
|
||||
shutil.copy(db_file, backup_db_file)
|
||||
# We sometimes get situations with messed up permissions
|
||||
os.chmod (backup_db_file, 0o777)
|
||||
return sqlite3.connect(backup_db_file), backup_db_file
|
||||
|
||||
def sql_select_execute(cursor, isPostgreSQL, sql_stmt):
|
||||
if isPostgreSQL:
|
||||
cursor.execute(sql_stmt)
|
||||
return cursor.fetchall()
|
||||
else:
|
||||
return cursor.execute(sql_stmt)
|
||||
|
||||
def main():
|
||||
try:
|
||||
sys.argv.pop(0)
|
||||
output_db = sys.argv.pop(0)
|
||||
gold_db = sys.argv.pop(0)
|
||||
except:
|
||||
print("usage: tskdbdiff [OUTPUT DB PATH] [GOLD DB PATH]")
|
||||
sys.exit(1)
|
||||
|
||||
db_diff = TskDbDiff(output_db, gold_db, output_dir=".")
|
||||
dump_passed, bb_dump_passed = db_diff.run_diff()
|
||||
|
||||
if dump_passed and bb_dump_passed:
|
||||
print("Database comparison passed.")
|
||||
if not dump_passed:
|
||||
print("Non blackboard database comparison failed.")
|
||||
if not bb_dump_passed:
|
||||
print("Blackboard database comparison failed.")
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if sys.hexversion < 0x03000000:
|
||||
print("Python 3 required")
|
||||
sys.exit(1)
|
||||
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user