2107 lines
		
	
	
		
			69 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			2107 lines
		
	
	
		
			69 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env python3
 | 
						|
"""
 | 
						|
Browser History to Timesketch CSV Converter - Comprehensive Edition
 | 
						|
 | 
						|
Extracts ALL timestamped browser events to Timesketch-compatible CSV format.
 | 
						|
Browser-agnostic output with consistent event naming across all browsers.
 | 
						|
 | 
						|
Supports:
 | 
						|
- Firefox/Gecko: visits, bookmarks, downloads, form history, annotations, 
 | 
						|
  page metadata, input history, keywords, origins
 | 
						|
- Chrome/Chromium/Edge/Brave: visits, downloads, searches, autofill, 
 | 
						|
  favicons, media history, site engagement
 | 
						|
- Safari/WebKit: visits, bookmarks, downloads, reading list, top sites
 | 
						|
 | 
						|
Output format: Timesketch-compatible CSV with browser-agnostic event types
 | 
						|
"""
 | 
						|
 | 
						|
import sqlite3
 | 
						|
import csv
 | 
						|
import argparse
 | 
						|
import sys
 | 
						|
from datetime import datetime, timezone
 | 
						|
from pathlib import Path
 | 
						|
from typing import Tuple, Optional, Dict, Any, List
 | 
						|
 | 
						|
 | 
						|
class BrowserDetectionError(Exception):
 | 
						|
    """Raised when browser type cannot be detected"""
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
class DatabaseValidationError(Exception):
 | 
						|
    """Raised when database validation fails"""
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
class TimestampValidationError(Exception):
 | 
						|
    """Raised when timestamp validation fails"""
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
def validate_sqlite_database(db_path: str) -> None:
 | 
						|
    """
 | 
						|
    Validate that the file is a SQLite database and is accessible.
 | 
						|
    
 | 
						|
    Args:
 | 
						|
        db_path: Path to database file
 | 
						|
        
 | 
						|
    Raises:
 | 
						|
        DatabaseValidationError: If validation fails
 | 
						|
    """
 | 
						|
    path = Path(db_path)
 | 
						|
    
 | 
						|
    if not path.exists():
 | 
						|
        raise DatabaseValidationError(f"Database file not found: {db_path}")
 | 
						|
    
 | 
						|
    if not path.is_file():
 | 
						|
        raise DatabaseValidationError(f"Path is not a file: {db_path}")
 | 
						|
    
 | 
						|
    # Try to open as SQLite database
 | 
						|
    try:
 | 
						|
        conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
 | 
						|
        cursor = conn.cursor()
 | 
						|
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
 | 
						|
        conn.close()
 | 
						|
    except sqlite3.DatabaseError as e:
 | 
						|
        raise DatabaseValidationError(f"Not a valid SQLite database: {db_path}. Error: {e}")
 | 
						|
    except sqlite3.OperationalError as e:
 | 
						|
        raise DatabaseValidationError(f"Cannot access database (may be locked or corrupted): {db_path}. Error: {e}")
 | 
						|
 | 
						|
 | 
						|
def detect_browser_type(db_path: str) -> str:
 | 
						|
    """
 | 
						|
    Auto-detect browser type by examining database schema.
 | 
						|
    
 | 
						|
    Args:
 | 
						|
        db_path: Path to database file
 | 
						|
        
 | 
						|
    Returns:
 | 
						|
        Detected browser type: 'gecko', 'chromium', or 'webkit'
 | 
						|
        
 | 
						|
    Raises:
 | 
						|
        BrowserDetectionError: If browser type cannot be determined
 | 
						|
    """
 | 
						|
    try:
 | 
						|
        conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
 | 
						|
        cursor = conn.cursor()
 | 
						|
        
 | 
						|
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
 | 
						|
        tables = {row[0] for row in cursor.fetchall()}
 | 
						|
        
 | 
						|
        conn.close()
 | 
						|
        
 | 
						|
        if 'moz_historyvisits' in tables and 'moz_places' in tables:
 | 
						|
            return 'gecko'
 | 
						|
        
 | 
						|
        if 'visits' in tables and 'urls' in tables:
 | 
						|
            return 'chromium'
 | 
						|
        
 | 
						|
        if 'history_visits' in tables and 'history_items' in tables:
 | 
						|
            return 'webkit'
 | 
						|
        
 | 
						|
        raise BrowserDetectionError(
 | 
						|
            f"Cannot determine browser type. Found tables: {', '.join(sorted(tables))}"
 | 
						|
        )
 | 
						|
        
 | 
						|
    except sqlite3.Error as e:
 | 
						|
        raise BrowserDetectionError(f"Error reading database schema: {e}")
 | 
						|
 | 
						|
 | 
						|
def validate_timestamp(unix_microseconds: int, browser_type: str) -> None:
 | 
						|
    """
 | 
						|
    Validate that a timestamp is within reasonable bounds.
 | 
						|
    
 | 
						|
    Args:
 | 
						|
        unix_microseconds: Timestamp in Unix microseconds
 | 
						|
        browser_type: Browser type for error messages
 | 
						|
        
 | 
						|
    Raises:
 | 
						|
        TimestampValidationError: If timestamp is unreasonable
 | 
						|
    """
 | 
						|
    if unix_microseconds <= 0:
 | 
						|
        return
 | 
						|
    
 | 
						|
    timestamp_seconds = unix_microseconds / 1000000
 | 
						|
    
 | 
						|
    min_date = datetime(1990, 1, 1)
 | 
						|
    max_date = datetime(2040, 1, 1)
 | 
						|
    min_seconds = min_date.timestamp()
 | 
						|
    max_seconds = max_date.timestamp()
 | 
						|
    
 | 
						|
    if timestamp_seconds < min_seconds:
 | 
						|
        dt = datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
 | 
						|
        raise TimestampValidationError(
 | 
						|
            f"Timestamp appears too old: {dt.strftime('%Y-%m-%d %H:%M:%S')} (before 1990). "
 | 
						|
            f"This may indicate a timestamp conversion error for {browser_type}."
 | 
						|
        )
 | 
						|
    
 | 
						|
    if timestamp_seconds > max_seconds:
 | 
						|
        dt = datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
 | 
						|
        raise TimestampValidationError(
 | 
						|
            f"Timestamp appears to be in the future: {dt.strftime('%Y-%m-%d %H:%M:%S')} (after 2040). "
 | 
						|
            f"This may indicate a timestamp conversion error for {browser_type}."
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def convert_gecko_timestamp(gecko_timestamp: Optional[int]) -> Tuple[int, str]:
 | 
						|
    """Convert Gecko/Firefox timestamp to Unix microseconds and ISO format."""
 | 
						|
    if gecko_timestamp is None or gecko_timestamp == 0:
 | 
						|
        return 0, ""
 | 
						|
    
 | 
						|
    validate_timestamp(gecko_timestamp, "Gecko/Firefox")
 | 
						|
    
 | 
						|
    timestamp_seconds = gecko_timestamp / 1000000
 | 
						|
    dt = datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
 | 
						|
    return gecko_timestamp, dt.strftime('%Y-%m-%dT%H:%M:%S+00:00')
 | 
						|
 | 
						|
 | 
						|
def convert_chromium_timestamp(chromium_timestamp: Optional[int]) -> Tuple[int, str]:
 | 
						|
    """Convert Chromium timestamp to Unix microseconds and ISO format."""
 | 
						|
    if chromium_timestamp is None or chromium_timestamp == 0:
 | 
						|
        return 0, ""
 | 
						|
    
 | 
						|
    chromium_epoch_offset = 11644473600
 | 
						|
    timestamp_seconds = (chromium_timestamp / 1000000) - chromium_epoch_offset
 | 
						|
    unix_microseconds = int(timestamp_seconds * 1000000)
 | 
						|
    
 | 
						|
    validate_timestamp(unix_microseconds, "Chromium")
 | 
						|
    
 | 
						|
    dt = datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
 | 
						|
    return unix_microseconds, dt.strftime('%Y-%m-%dT%H:%M:%S+00:00')
 | 
						|
 | 
						|
 | 
						|
def convert_webkit_timestamp(webkit_timestamp: Optional[float]) -> Tuple[int, str]:
 | 
						|
    """Convert WebKit/Safari timestamp to Unix microseconds and ISO format."""
 | 
						|
    if webkit_timestamp is None or webkit_timestamp == 0:
 | 
						|
        return 0, ""
 | 
						|
    
 | 
						|
    webkit_epoch_offset = 978307200
 | 
						|
    timestamp_seconds = webkit_timestamp + webkit_epoch_offset
 | 
						|
    unix_microseconds = int(timestamp_seconds * 1000000)
 | 
						|
    
 | 
						|
    validate_timestamp(unix_microseconds, "WebKit/Safari")
 | 
						|
    
 | 
						|
    dt = datetime.fromtimestamp(timestamp_seconds, tz=timezone.utc)
 | 
						|
    return unix_microseconds, dt.strftime('%Y-%m-%dT%H:%M:%S+00:00')
 | 
						|
 | 
						|
 | 
						|
def write_timesketch_csv(output_csv: str, rows: List[Dict[str, Any]]) -> None:
 | 
						|
    """
 | 
						|
    Write history data to Timesketch-compatible CSV format with dynamic fields.
 | 
						|
    
 | 
						|
    Args:
 | 
						|
        output_csv: Path to output CSV file
 | 
						|
        rows: List of row dictionaries to write
 | 
						|
    """
 | 
						|
    if not rows:
 | 
						|
        return
 | 
						|
    
 | 
						|
    # Collect all unique fields from all rows
 | 
						|
    all_fields = set()
 | 
						|
    for row in rows:
 | 
						|
        all_fields.update(row.keys())
 | 
						|
    
 | 
						|
    # Define standard field order (these come first)
 | 
						|
    standard_fields = ['timestamp', 'datetime', 'timestamp_desc', 'message', 'data_type']
 | 
						|
    
 | 
						|
    # Build fieldnames list with standard fields first, then alphabetically sorted remainder
 | 
						|
    fieldnames = [f for f in standard_fields if f in all_fields]
 | 
						|
    remaining_fields = sorted(all_fields - set(standard_fields))
 | 
						|
    fieldnames.extend(remaining_fields)
 | 
						|
    
 | 
						|
    with open(output_csv, 'w', newline='', encoding='utf-8') as csvfile:
 | 
						|
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames, extrasaction='ignore')
 | 
						|
        writer.writeheader()
 | 
						|
        
 | 
						|
        for row in rows:
 | 
						|
            writer.writerow(row)
 | 
						|
 | 
						|
 | 
						|
def connect_database_readonly(db_path: str) -> sqlite3.Connection:
 | 
						|
    """Connect to database in read-only mode to avoid lock issues."""
 | 
						|
    try:
 | 
						|
        conn = sqlite3.connect(f'file:{db_path}?mode=ro', uri=True)
 | 
						|
        return conn
 | 
						|
    except sqlite3.OperationalError as e:
 | 
						|
        raise sqlite3.OperationalError(
 | 
						|
            f"Cannot open database (it may be locked by the browser): {db_path}\n"
 | 
						|
            f"Please close the browser and try again, or copy the database file.\n"
 | 
						|
            f"Original error: {e}"
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def table_exists(conn: sqlite3.Connection, table_name: str) -> bool:
 | 
						|
    """Check if a table exists in the database."""
 | 
						|
    cursor = conn.cursor()
 | 
						|
    cursor.execute(
 | 
						|
        "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
 | 
						|
        (table_name,)
 | 
						|
    )
 | 
						|
    return cursor.fetchone() is not None
 | 
						|
 | 
						|
 | 
						|
def column_exists(conn: sqlite3.Connection, table_name: str, column_name: str) -> bool:
 | 
						|
    """Check if a column exists in a table."""
 | 
						|
    cursor = conn.cursor()
 | 
						|
    try:
 | 
						|
        cursor.execute(f"PRAGMA table_info({table_name})")
 | 
						|
        columns = {row[1] for row in cursor.fetchall()}
 | 
						|
        return column_name in columns
 | 
						|
    except sqlite3.Error:
 | 
						|
        return False
 | 
						|
 | 
						|
 | 
						|
# ============================================================================
 | 
						|
# CHROMIUM EXTRACTORS
 | 
						|
# ============================================================================
 | 
						|
 | 
						|
def extract_chromium_visits(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract visit events from Chromium database with resolved foreign keys."""
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        visits.visit_time,
 | 
						|
        urls.url,
 | 
						|
        urls.title,
 | 
						|
        visits.transition,
 | 
						|
        visits.visit_duration,
 | 
						|
        urls.visit_count,
 | 
						|
        urls.typed_count,
 | 
						|
        visits.segment_id,
 | 
						|
        visits.incremented_omnibox_typed_score,
 | 
						|
        urls.hidden,
 | 
						|
        from_urls.url as from_url,
 | 
						|
        from_urls.title as from_title,
 | 
						|
        opener_urls.url as opener_url,
 | 
						|
        opener_urls.title as opener_title
 | 
						|
    FROM visits
 | 
						|
    JOIN urls ON visits.url = urls.id
 | 
						|
    LEFT JOIN visits from_visits ON visits.from_visit = from_visits.id
 | 
						|
    LEFT JOIN urls from_urls ON from_visits.url = from_urls.id
 | 
						|
    LEFT JOIN visits opener_visits ON visits.opener_visit = opener_visits.id
 | 
						|
    LEFT JOIN urls opener_urls ON opener_visits.url = opener_urls.id
 | 
						|
    ORDER BY visits.visit_time
 | 
						|
    """
 | 
						|
    
 | 
						|
    cursor.execute(query)
 | 
						|
    results = cursor.fetchall()
 | 
						|
    
 | 
						|
    transition_types = {
 | 
						|
        0: "Link", 1: "Typed", 2: "Auto_Bookmark", 3: "Auto_Subframe",
 | 
						|
        4: "Manual_Subframe", 5: "Generated", 6: "Start_Page",
 | 
						|
        7: "Form_Submit", 8: "Reload", 9: "Keyword", 10: "Keyword_Generated"
 | 
						|
    }
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        (chromium_timestamp, url, title, transition, visit_duration,
 | 
						|
         visit_count, typed_count, segment_id, incremented_typed, hidden,
 | 
						|
         from_url, from_title, opener_url, opener_title) = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_chromium_timestamp(chromium_timestamp)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        core_transition = transition & 0xFF
 | 
						|
        transition_name = transition_types.get(core_transition, f"Unknown({core_transition})")
 | 
						|
        
 | 
						|
        # Build row with only useful fields
 | 
						|
        row_data = {
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Visit Time',
 | 
						|
            'message': f"Visited: {title or '(No title)'}",
 | 
						|
            'data_type': 'browser:page:visit',
 | 
						|
            'browser': browser_name,
 | 
						|
            'url': url or "",
 | 
						|
            'title': title or "(No title)",
 | 
						|
            'visit_type': transition_name,
 | 
						|
            'visit_duration_us': visit_duration or 0,
 | 
						|
            'total_visits': visit_count or 0,
 | 
						|
            'typed_count': typed_count or 0,
 | 
						|
            'typed_in_omnibox': bool(incremented_typed),
 | 
						|
            'hidden': bool(hidden)
 | 
						|
        }
 | 
						|
        
 | 
						|
        # Add optional fields only if present
 | 
						|
        if from_url:
 | 
						|
            row_data['from_url'] = from_url
 | 
						|
            if from_title:
 | 
						|
                row_data['from_title'] = from_title
 | 
						|
        
 | 
						|
        if opener_url:
 | 
						|
            row_data['opener_url'] = opener_url
 | 
						|
            if opener_title:
 | 
						|
                row_data['opener_title'] = opener_title
 | 
						|
        
 | 
						|
        # Add session ID only if non-zero
 | 
						|
        if segment_id and segment_id != 0:
 | 
						|
            row_data['session_id'] = segment_id
 | 
						|
        
 | 
						|
        rows.append(row_data)
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_chromium_downloads(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract download events from Chromium database."""
 | 
						|
    if not table_exists(conn, 'downloads'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        guid,
 | 
						|
        current_path,
 | 
						|
        target_path,
 | 
						|
        start_time,
 | 
						|
        received_bytes,
 | 
						|
        total_bytes,
 | 
						|
        state,
 | 
						|
        danger_type,
 | 
						|
        interrupt_reason,
 | 
						|
        end_time,
 | 
						|
        opened,
 | 
						|
        last_access_time,
 | 
						|
        referrer,
 | 
						|
        tab_url,
 | 
						|
        mime_type
 | 
						|
    FROM downloads
 | 
						|
    ORDER BY start_time
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    download_states = {
 | 
						|
        0: "In Progress",
 | 
						|
        1: "Complete",
 | 
						|
        2: "Cancelled",
 | 
						|
        3: "Interrupted",
 | 
						|
        4: "Dangerous"
 | 
						|
    }
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        (dl_id, guid, current_path, target_path, start_time, received_bytes,
 | 
						|
         total_bytes, state, danger_type, interrupt_reason, end_time, opened,
 | 
						|
         last_access_time, referrer, tab_url, mime_type) = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            start_us, start_iso = convert_chromium_timestamp(start_time)
 | 
						|
            end_us, end_iso = convert_chromium_timestamp(end_time) if end_time else (0, "")
 | 
						|
            access_us, access_iso = convert_chromium_timestamp(last_access_time) if last_access_time else (0, "")
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        state_name = download_states.get(state, f"Unknown({state})")
 | 
						|
        filename = Path(target_path).name if target_path else "(unknown)"
 | 
						|
        
 | 
						|
        # Download start event
 | 
						|
        rows.append({
 | 
						|
            'timestamp': start_us,
 | 
						|
            'datetime': start_iso,
 | 
						|
            'timestamp_desc': 'Download Started',
 | 
						|
            'message': f"Download started: {filename} ({mime_type or 'unknown type'})",
 | 
						|
            'data_type': 'browser:download:start',
 | 
						|
            'browser': browser_name,
 | 
						|
            'download_id': dl_id,
 | 
						|
            'filename': filename,
 | 
						|
            'file_path': target_path or "",
 | 
						|
            'file_size_bytes': total_bytes or 0,
 | 
						|
            'mime_type': mime_type or "",
 | 
						|
            'download_state': state_name,
 | 
						|
            'referrer_url': referrer or "",
 | 
						|
            'tab_url': tab_url or "",
 | 
						|
            'dangerous': bool(danger_type),
 | 
						|
            'interrupted': bool(interrupt_reason)
 | 
						|
        })
 | 
						|
        
 | 
						|
        # Download complete event (if completed)
 | 
						|
        if end_time and end_time != start_time:
 | 
						|
            duration_seconds = (end_us - start_us) / 1000000
 | 
						|
            rows.append({
 | 
						|
                'timestamp': end_us,
 | 
						|
                'datetime': end_iso,
 | 
						|
                'timestamp_desc': 'Download Completed',
 | 
						|
                'message': f"Download completed: {filename} ({received_bytes or 0} bytes in {duration_seconds:.1f}s)",
 | 
						|
                'data_type': 'browser:download:complete',
 | 
						|
                'browser': browser_name,
 | 
						|
                'download_id': dl_id,
 | 
						|
                'filename': filename,
 | 
						|
                'file_path': target_path or "",
 | 
						|
                'file_size_bytes': received_bytes or 0,
 | 
						|
                'mime_type': mime_type or "",
 | 
						|
                'download_state': state_name,
 | 
						|
                'download_duration_seconds': duration_seconds
 | 
						|
            })
 | 
						|
        
 | 
						|
        # Last access event (if different from completion)
 | 
						|
        if last_access_time and last_access_time != end_time and last_access_time != start_time:
 | 
						|
            rows.append({
 | 
						|
                'timestamp': access_us,
 | 
						|
                'datetime': access_iso,
 | 
						|
                'timestamp_desc': 'File Accessed',
 | 
						|
                'message': f"Downloaded file accessed: {filename}",
 | 
						|
                'data_type': 'browser:download:accessed',
 | 
						|
                'browser': browser_name,
 | 
						|
                'download_id': dl_id,
 | 
						|
                'filename': filename,
 | 
						|
                'file_path': target_path or ""
 | 
						|
            })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_chromium_search_terms(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract search terms from Chromium database."""
 | 
						|
    if not table_exists(conn, 'keyword_search_terms'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        kst.term,
 | 
						|
        kst.normalized_term,
 | 
						|
        u.url,
 | 
						|
        u.title,
 | 
						|
        u.last_visit_time
 | 
						|
    FROM keyword_search_terms kst
 | 
						|
    JOIN urls u ON kst.url_id = u.id
 | 
						|
    ORDER BY u.last_visit_time
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        term, normalized_term, url, title, last_visit = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_chromium_timestamp(last_visit)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Search Performed',
 | 
						|
            'message': f"Search: {term}",
 | 
						|
            'data_type': 'browser:search:query',
 | 
						|
            'browser': browser_name,
 | 
						|
            'search_term': term,
 | 
						|
            'normalized_search_term': normalized_term,
 | 
						|
            'search_url': url or "",
 | 
						|
            'search_page_title': title or ""
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_chromium_autofill(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract autofill/form data from Chromium database."""
 | 
						|
    if not table_exists(conn, 'autofill'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check which timestamp columns exist
 | 
						|
    has_created = column_exists(conn, 'autofill', 'date_created')
 | 
						|
    has_last_used = column_exists(conn, 'autofill', 'date_last_used')
 | 
						|
    
 | 
						|
    if not (has_created or has_last_used):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    # Build query based on available columns
 | 
						|
    timestamp_cols = []
 | 
						|
    if has_created:
 | 
						|
        timestamp_cols.append('date_created')
 | 
						|
    if has_last_used:
 | 
						|
        timestamp_cols.append('date_last_used')
 | 
						|
    
 | 
						|
    query = f"""
 | 
						|
    SELECT 
 | 
						|
        name,
 | 
						|
        value,
 | 
						|
        {', '.join(timestamp_cols)},
 | 
						|
        count
 | 
						|
    FROM autofill
 | 
						|
    WHERE {' OR '.join([f'{col} > 0' for col in timestamp_cols])}
 | 
						|
    ORDER BY {timestamp_cols[0]}
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        name, value, *timestamps, count = row
 | 
						|
        date_created = timestamps[0] if has_created else None
 | 
						|
        date_last_used = timestamps[1] if has_last_used and len(timestamps) > 1 else timestamps[0] if not has_created else None
 | 
						|
        
 | 
						|
        # Form field created/first used
 | 
						|
        if date_created:
 | 
						|
            try:
 | 
						|
                created_us, created_iso = convert_chromium_timestamp(date_created)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': created_us,
 | 
						|
                    'datetime': created_iso,
 | 
						|
                    'timestamp_desc': 'Form Field First Used',
 | 
						|
                    'message': f"First use of form field: {name}",
 | 
						|
                    'data_type': 'browser:form:first_use',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'form_field_name': name,
 | 
						|
                    'form_field_value': value[:50] + '...' if len(value) > 50 else value,
 | 
						|
                    'total_uses': count or 0
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
        
 | 
						|
        # Form field last used (if different)
 | 
						|
        if date_last_used and date_last_used != date_created:
 | 
						|
            try:
 | 
						|
                last_us, last_iso = convert_chromium_timestamp(date_last_used)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': last_us,
 | 
						|
                    'datetime': last_iso,
 | 
						|
                    'timestamp_desc': 'Form Field Last Used',
 | 
						|
                    'message': f"Used form field: {name}",
 | 
						|
                    'data_type': 'browser:form:use',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'form_field_name': name,
 | 
						|
                    'form_field_value': value[:50] + '...' if len(value) > 50 else value,
 | 
						|
                    'total_uses': count or 0
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_chromium_favicons(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract favicon mapping timestamps from Chromium database."""
 | 
						|
    if not table_exists(conn, 'icon_mapping'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    # Check if last_updated column exists (not in all Chromium versions)
 | 
						|
    if not column_exists(conn, 'icon_mapping', 'last_updated'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        im.last_updated,
 | 
						|
        im.page_url,
 | 
						|
        f.url as favicon_url
 | 
						|
    FROM icon_mapping im
 | 
						|
    LEFT JOIN favicons f ON im.icon_id = f.id
 | 
						|
    WHERE im.last_updated > 0
 | 
						|
    ORDER BY im.last_updated
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        last_updated, page_url, favicon_url = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_chromium_timestamp(last_updated)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Favicon Updated',
 | 
						|
            'message': f"Updated favicon for: {page_url}",
 | 
						|
            'data_type': 'browser:favicon:update',
 | 
						|
            'browser': browser_name,
 | 
						|
            'page_url': page_url or "",
 | 
						|
            'favicon_url': favicon_url or ""
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_chromium_media_history(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract media playback history from Chromium database."""
 | 
						|
    if not table_exists(conn, 'playback'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check available columns
 | 
						|
    has_last_updated = column_exists(conn, 'playback', 'last_updated_time_s')
 | 
						|
    
 | 
						|
    if not has_last_updated:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        p.url,
 | 
						|
        p.watch_time_s,
 | 
						|
        p.has_audio,
 | 
						|
        p.has_video,
 | 
						|
        p.last_updated_time_s
 | 
						|
    FROM playback p
 | 
						|
    WHERE p.last_updated_time_s > 0
 | 
						|
    ORDER BY p.last_updated_time_s
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        url, watch_time, has_audio, has_video, last_updated = row
 | 
						|
        
 | 
						|
        # Convert Unix seconds to microseconds for consistency
 | 
						|
        unix_microseconds = int(last_updated * 1000000)
 | 
						|
        
 | 
						|
        try:
 | 
						|
            validate_timestamp(unix_microseconds, "Chromium Media")
 | 
						|
            dt = datetime.fromtimestamp(last_updated, tz=timezone.utc)
 | 
						|
            iso_datetime = dt.strftime('%Y-%m-%dT%H:%M:%S+00:00')
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        media_type = []
 | 
						|
        if has_audio:
 | 
						|
            media_type.append("audio")
 | 
						|
        if has_video:
 | 
						|
            media_type.append("video")
 | 
						|
        media_type_str = "+".join(media_type) if media_type else "unknown"
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Media Playback',
 | 
						|
            'message': f"Played {media_type_str}: {url} ({watch_time:.1f}s)",
 | 
						|
            'data_type': 'browser:media:playback',
 | 
						|
            'browser': browser_name,
 | 
						|
            'media_url': url or "",
 | 
						|
            'watch_time_seconds': watch_time or 0,
 | 
						|
            'has_audio': bool(has_audio),
 | 
						|
            'has_video': bool(has_video)
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_chromium_site_engagement(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract site engagement scores from Chromium database."""
 | 
						|
    if not table_exists(conn, 'site_engagement'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check for timestamp column
 | 
						|
    has_last_engagement = column_exists(conn, 'site_engagement', 'last_engagement_time')
 | 
						|
    
 | 
						|
    if not has_last_engagement:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        origin_url,
 | 
						|
        score,
 | 
						|
        last_engagement_time
 | 
						|
    FROM site_engagement
 | 
						|
    WHERE last_engagement_time > 0 AND score > 0
 | 
						|
    ORDER BY last_engagement_time
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        origin_url, score, last_engagement = row
 | 
						|
        
 | 
						|
        # Convert internal timestamp format (typically Unix seconds)
 | 
						|
        unix_microseconds = int(last_engagement * 1000000)
 | 
						|
        
 | 
						|
        try:
 | 
						|
            validate_timestamp(unix_microseconds, "Chromium Site Engagement")
 | 
						|
            dt = datetime.fromtimestamp(last_engagement, tz=timezone.utc)
 | 
						|
            iso_datetime = dt.strftime('%Y-%m-%dT%H:%M:%S+00:00')
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Site Engagement Updated',
 | 
						|
            'message': f"Site engagement: {origin_url} (score: {score:.1f})",
 | 
						|
            'data_type': 'browser:engagement:update',
 | 
						|
            'browser': browser_name,
 | 
						|
            'site_url': origin_url or "",
 | 
						|
            'engagement_score': score or 0
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
# ============================================================================
 | 
						|
# GECKO/FIREFOX EXTRACTORS
 | 
						|
# ============================================================================
 | 
						|
 | 
						|
def extract_gecko_visits(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract visit events from Gecko database with resolved foreign keys."""
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        v.visit_date,
 | 
						|
        p.url,
 | 
						|
        p.title,
 | 
						|
        p.description,
 | 
						|
        v.visit_type,
 | 
						|
        v.session,
 | 
						|
        p.visit_count,
 | 
						|
        p.typed,
 | 
						|
        p.frecency,
 | 
						|
        p.hidden,
 | 
						|
        p.rev_host,
 | 
						|
        prev_p.url as from_url,
 | 
						|
        prev_p.title as from_title
 | 
						|
    FROM moz_historyvisits v
 | 
						|
    JOIN moz_places p ON v.place_id = p.id
 | 
						|
    LEFT JOIN moz_historyvisits prev_v ON v.from_visit = prev_v.id
 | 
						|
    LEFT JOIN moz_places prev_p ON prev_v.place_id = prev_p.id
 | 
						|
    ORDER BY v.visit_date
 | 
						|
    """
 | 
						|
    
 | 
						|
    cursor.execute(query)
 | 
						|
    results = cursor.fetchall()
 | 
						|
    
 | 
						|
    visit_types = {
 | 
						|
        1: "Link", 2: "Typed", 3: "Bookmark", 4: "Embed",
 | 
						|
        5: "Redirect_Permanent", 6: "Redirect_Temporary",
 | 
						|
        7: "Download", 8: "Framed_Link", 9: "Reload"
 | 
						|
    }
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        (timestamp_us, url, title, description, visit_type_id,
 | 
						|
         session, visit_count, typed, frecency, hidden, rev_host,
 | 
						|
         from_url, from_title) = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_gecko_timestamp(timestamp_us)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        visit_type_name = visit_types.get(visit_type_id, f"Unknown({visit_type_id})")
 | 
						|
        
 | 
						|
        message = f"Visited: {title or '(No title)'}"
 | 
						|
        if description:
 | 
						|
            message += f" - {description}"
 | 
						|
        
 | 
						|
        # Build row with only useful fields
 | 
						|
        row_data = {
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Visit Time',
 | 
						|
            'message': message,
 | 
						|
            'data_type': 'browser:page:visit',
 | 
						|
            'browser': browser_name,
 | 
						|
            'url': url or "",
 | 
						|
            'title': title or "(No title)",
 | 
						|
            'visit_type': visit_type_name,
 | 
						|
            'total_visit_count': visit_count or 0,
 | 
						|
            'typed_count': typed or 0,
 | 
						|
            'frecency_score': frecency,
 | 
						|
            'hidden': bool(hidden),
 | 
						|
            'domain': rev_host[::-1] if rev_host else ""
 | 
						|
        }
 | 
						|
        
 | 
						|
        # Add optional fields only if present
 | 
						|
        if description:
 | 
						|
            row_data['description'] = description
 | 
						|
        
 | 
						|
        # Add navigation chain info if present
 | 
						|
        if from_url:
 | 
						|
            row_data['from_url'] = from_url
 | 
						|
            if from_title:
 | 
						|
                row_data['from_title'] = from_title
 | 
						|
        
 | 
						|
        # Add session ID only if non-zero
 | 
						|
        if session and session != 0:
 | 
						|
            row_data['session_id'] = session
 | 
						|
        
 | 
						|
        rows.append(row_data)
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_bookmarks(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract bookmark events from Gecko database."""
 | 
						|
    if not table_exists(conn, 'moz_bookmarks'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        b.id,
 | 
						|
        b.type,
 | 
						|
        b.title,
 | 
						|
        b.dateAdded,
 | 
						|
        b.lastModified,
 | 
						|
        p.url,
 | 
						|
        p.title as page_title,
 | 
						|
        b.parent,
 | 
						|
        b.position
 | 
						|
    FROM moz_bookmarks b
 | 
						|
    LEFT JOIN moz_places p ON b.fk = p.id
 | 
						|
    WHERE b.dateAdded IS NOT NULL
 | 
						|
    ORDER BY b.dateAdded
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    bookmark_types = {
 | 
						|
        1: "Bookmark",
 | 
						|
        2: "Folder",
 | 
						|
        3: "Separator"
 | 
						|
    }
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        (bm_id, bm_type, title, date_added, last_modified,
 | 
						|
         url, page_title, parent, position) = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            added_us, added_iso = convert_gecko_timestamp(date_added)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        type_name = bookmark_types.get(bm_type, f"Unknown({bm_type})")
 | 
						|
        display_title = title or page_title or "(No title)"
 | 
						|
        
 | 
						|
        # Bookmark added event
 | 
						|
        rows.append({
 | 
						|
            'timestamp': added_us,
 | 
						|
            'datetime': added_iso,
 | 
						|
            'timestamp_desc': 'Bookmark Added',
 | 
						|
            'message': f"Bookmarked: {display_title}",
 | 
						|
            'data_type': 'browser:bookmark:added',
 | 
						|
            'browser': browser_name,
 | 
						|
            'bookmark_id': bm_id,
 | 
						|
            'bookmark_type': type_name,
 | 
						|
            'bookmark_title': display_title,
 | 
						|
            'url': url or "",
 | 
						|
            'parent_folder_id': parent,
 | 
						|
            'position': position
 | 
						|
        })
 | 
						|
        
 | 
						|
        # Bookmark modified event (if different from added)
 | 
						|
        if last_modified and last_modified != date_added:
 | 
						|
            try:
 | 
						|
                modified_us, modified_iso = convert_gecko_timestamp(last_modified)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': modified_us,
 | 
						|
                    'datetime': modified_iso,
 | 
						|
                    'timestamp_desc': 'Bookmark Modified',
 | 
						|
                    'message': f"Modified bookmark: {display_title}",
 | 
						|
                    'data_type': 'browser:bookmark:modified',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'bookmark_id': bm_id,
 | 
						|
                    'bookmark_title': display_title,
 | 
						|
                    'url': url or ""
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_downloads(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract downloads from Gecko database (older Firefox versions)."""
 | 
						|
    if not table_exists(conn, 'moz_downloads'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        name,
 | 
						|
        source,
 | 
						|
        target,
 | 
						|
        startTime,
 | 
						|
        endTime,
 | 
						|
        state,
 | 
						|
        referrer,
 | 
						|
        currBytes,
 | 
						|
        maxBytes,
 | 
						|
        mimeType
 | 
						|
    FROM moz_downloads
 | 
						|
    WHERE startTime IS NOT NULL
 | 
						|
    ORDER BY startTime
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    download_states = {
 | 
						|
        0: "Downloading",
 | 
						|
        1: "Complete",
 | 
						|
        2: "Failed",
 | 
						|
        3: "Cancelled",
 | 
						|
        4: "Paused"
 | 
						|
    }
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        (dl_id, name, source, target, start_time, end_time, state,
 | 
						|
         referrer, curr_bytes, max_bytes, mime_type) = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            start_us, start_iso = convert_gecko_timestamp(start_time)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        state_name = download_states.get(state, f"Unknown({state})")
 | 
						|
        
 | 
						|
        # Download start event
 | 
						|
        rows.append({
 | 
						|
            'timestamp': start_us,
 | 
						|
            'datetime': start_iso,
 | 
						|
            'timestamp_desc': 'Download Started',
 | 
						|
            'message': f"Download started: {name} ({mime_type or 'unknown type'})",
 | 
						|
            'data_type': 'browser:download:start',
 | 
						|
            'browser': browser_name,
 | 
						|
            'download_id': dl_id,
 | 
						|
            'filename': name or "",
 | 
						|
            'source_url': source or "",
 | 
						|
            'target_path': target or "",
 | 
						|
            'file_size_bytes': max_bytes or 0,
 | 
						|
            'mime_type': mime_type or "",
 | 
						|
            'download_state': state_name,
 | 
						|
            'referrer_url': referrer or ""
 | 
						|
        })
 | 
						|
        
 | 
						|
        # Download complete event
 | 
						|
        if end_time and end_time != start_time:
 | 
						|
            try:
 | 
						|
                end_us, end_iso = convert_gecko_timestamp(end_time)
 | 
						|
                duration_seconds = (end_us - start_us) / 1000000
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': end_us,
 | 
						|
                    'datetime': end_iso,
 | 
						|
                    'timestamp_desc': 'Download Completed',
 | 
						|
                    'message': f"Download completed: {name} ({curr_bytes or 0} bytes in {duration_seconds:.1f}s)",
 | 
						|
                    'data_type': 'browser:download:complete',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'download_id': dl_id,
 | 
						|
                    'filename': name or "",
 | 
						|
                    'file_size_bytes': curr_bytes or 0,
 | 
						|
                    'mime_type': mime_type or "",
 | 
						|
                    'download_state': state_name,
 | 
						|
                    'download_duration_seconds': duration_seconds
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_form_history(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract form autofill history from Gecko database."""
 | 
						|
    if not table_exists(conn, 'moz_formhistory'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        fieldname,
 | 
						|
        value,
 | 
						|
        timesUsed,
 | 
						|
        firstUsed,
 | 
						|
        lastUsed
 | 
						|
    FROM moz_formhistory
 | 
						|
    WHERE firstUsed IS NOT NULL
 | 
						|
    ORDER BY firstUsed
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        form_id, fieldname, value, times_used, first_used, last_used = row
 | 
						|
        
 | 
						|
        # First use event
 | 
						|
        if first_used:
 | 
						|
            try:
 | 
						|
                first_us, first_iso = convert_gecko_timestamp(first_used)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': first_us,
 | 
						|
                    'datetime': first_iso,
 | 
						|
                    'timestamp_desc': 'Form Field First Used',
 | 
						|
                    'message': f"First use of form field: {fieldname}",
 | 
						|
                    'data_type': 'browser:form:first_use',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'form_id': form_id,
 | 
						|
                    'form_field_name': fieldname,
 | 
						|
                    'form_field_value': value[:50] + '...' if len(value) > 50 else value,
 | 
						|
                    'total_uses': times_used or 0
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
        
 | 
						|
        # Last use event (if different)
 | 
						|
        if last_used and last_used != first_used:
 | 
						|
            try:
 | 
						|
                last_us, last_iso = convert_gecko_timestamp(last_used)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': last_us,
 | 
						|
                    'datetime': last_iso,
 | 
						|
                    'timestamp_desc': 'Form Field Last Used',
 | 
						|
                    'message': f"Used form field: {fieldname}",
 | 
						|
                    'data_type': 'browser:form:use',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'form_id': form_id,
 | 
						|
                    'form_field_name': fieldname,
 | 
						|
                    'form_field_value': value[:50] + '...' if len(value) > 50 else value,
 | 
						|
                    'total_uses': times_used or 0
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_annotations(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract page and bookmark annotations from Gecko database."""
 | 
						|
    rows = []
 | 
						|
    
 | 
						|
    # Page annotations
 | 
						|
    if table_exists(conn, 'moz_annos'):
 | 
						|
        cursor = conn.cursor()
 | 
						|
        query = """
 | 
						|
        SELECT 
 | 
						|
            a.id,
 | 
						|
            a.place_id,
 | 
						|
            a.dateAdded,
 | 
						|
            a.lastModified,
 | 
						|
            n.name,
 | 
						|
            a.content,
 | 
						|
            p.url,
 | 
						|
            p.title
 | 
						|
        FROM moz_annos a
 | 
						|
        JOIN moz_anno_attributes n ON a.anno_attribute_id = n.id
 | 
						|
        JOIN moz_places p ON a.place_id = p.id
 | 
						|
        WHERE a.dateAdded IS NOT NULL
 | 
						|
        ORDER BY a.dateAdded
 | 
						|
        """
 | 
						|
        
 | 
						|
        try:
 | 
						|
            cursor.execute(query)
 | 
						|
            results = cursor.fetchall()
 | 
						|
            
 | 
						|
            for row in results:
 | 
						|
                anno_id, place_id, date_added, last_modified, name, content, url, title = row
 | 
						|
                
 | 
						|
                # Annotation added
 | 
						|
                if date_added:
 | 
						|
                    try:
 | 
						|
                        added_us, added_iso = convert_gecko_timestamp(date_added)
 | 
						|
                        rows.append({
 | 
						|
                            'timestamp': added_us,
 | 
						|
                            'datetime': added_iso,
 | 
						|
                            'timestamp_desc': 'Page Annotation Added',
 | 
						|
                            'message': f"Added annotation '{name}' to: {title or url}",
 | 
						|
                            'data_type': 'browser:annotation:added',
 | 
						|
                            'browser': browser_name,
 | 
						|
                            'annotation_id': anno_id,
 | 
						|
                            'annotation_name': name,
 | 
						|
                            'annotation_content': content[:100] + '...' if content and len(content) > 100 else content,
 | 
						|
                            'url': url or "",
 | 
						|
                            'title': title or ""
 | 
						|
                        })
 | 
						|
                    except TimestampValidationError:
 | 
						|
                        pass
 | 
						|
                
 | 
						|
                # Annotation modified (if different)
 | 
						|
                if last_modified and last_modified != date_added:
 | 
						|
                    try:
 | 
						|
                        modified_us, modified_iso = convert_gecko_timestamp(last_modified)
 | 
						|
                        rows.append({
 | 
						|
                            'timestamp': modified_us,
 | 
						|
                            'datetime': modified_iso,
 | 
						|
                            'timestamp_desc': 'Page Annotation Modified',
 | 
						|
                            'message': f"Modified annotation '{name}' on: {title or url}",
 | 
						|
                            'data_type': 'browser:annotation:modified',
 | 
						|
                            'browser': browser_name,
 | 
						|
                            'annotation_id': anno_id,
 | 
						|
                            'annotation_name': name,
 | 
						|
                            'url': url or ""
 | 
						|
                        })
 | 
						|
                    except TimestampValidationError:
 | 
						|
                        pass
 | 
						|
        except sqlite3.Error:
 | 
						|
            pass
 | 
						|
    
 | 
						|
    # Bookmark annotations
 | 
						|
    if table_exists(conn, 'moz_items_annos'):
 | 
						|
        cursor = conn.cursor()
 | 
						|
        query = """
 | 
						|
        SELECT 
 | 
						|
            ia.id,
 | 
						|
            ia.item_id,
 | 
						|
            ia.dateAdded,
 | 
						|
            ia.lastModified,
 | 
						|
            n.name,
 | 
						|
            ia.content,
 | 
						|
            b.title
 | 
						|
        FROM moz_items_annos ia
 | 
						|
        JOIN moz_anno_attributes n ON ia.anno_attribute_id = n.id
 | 
						|
        JOIN moz_bookmarks b ON ia.item_id = b.id
 | 
						|
        WHERE ia.dateAdded IS NOT NULL
 | 
						|
        ORDER BY ia.dateAdded
 | 
						|
        """
 | 
						|
        
 | 
						|
        try:
 | 
						|
            cursor.execute(query)
 | 
						|
            results = cursor.fetchall()
 | 
						|
            
 | 
						|
            for row in results:
 | 
						|
                anno_id, item_id, date_added, last_modified, name, content, title = row
 | 
						|
                
 | 
						|
                # Annotation added
 | 
						|
                if date_added:
 | 
						|
                    try:
 | 
						|
                        added_us, added_iso = convert_gecko_timestamp(date_added)
 | 
						|
                        rows.append({
 | 
						|
                            'timestamp': added_us,
 | 
						|
                            'datetime': added_iso,
 | 
						|
                            'timestamp_desc': 'Bookmark Annotation Added',
 | 
						|
                            'message': f"Added annotation '{name}' to bookmark: {title}",
 | 
						|
                            'data_type': 'browser:annotation:added',
 | 
						|
                            'browser': browser_name,
 | 
						|
                            'annotation_id': anno_id,
 | 
						|
                            'annotation_name': name,
 | 
						|
                            'annotation_content': content[:100] + '...' if content and len(content) > 100 else content,
 | 
						|
                            'bookmark_title': title or ""
 | 
						|
                        })
 | 
						|
                    except TimestampValidationError:
 | 
						|
                        pass
 | 
						|
                
 | 
						|
                # Annotation modified (if different)
 | 
						|
                if last_modified and last_modified != date_added:
 | 
						|
                    try:
 | 
						|
                        modified_us, modified_iso = convert_gecko_timestamp(last_modified)
 | 
						|
                        rows.append({
 | 
						|
                            'timestamp': modified_us,
 | 
						|
                            'datetime': modified_iso,
 | 
						|
                            'timestamp_desc': 'Bookmark Annotation Modified',
 | 
						|
                            'message': f"Modified annotation '{name}' on bookmark: {title}",
 | 
						|
                            'data_type': 'browser:annotation:modified',
 | 
						|
                            'browser': browser_name,
 | 
						|
                            'annotation_id': anno_id,
 | 
						|
                            'annotation_name': name
 | 
						|
                        })
 | 
						|
                    except TimestampValidationError:
 | 
						|
                        pass
 | 
						|
        except sqlite3.Error:
 | 
						|
            pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_metadata(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract page metadata/engagement events from Gecko database."""
 | 
						|
    if not table_exists(conn, 'moz_places_metadata'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        m.place_id,
 | 
						|
        m.created_at,
 | 
						|
        m.updated_at,
 | 
						|
        m.total_view_time,
 | 
						|
        m.typing_time,
 | 
						|
        m.key_presses,
 | 
						|
        m.scrolling_time,
 | 
						|
        m.scrolling_distance,
 | 
						|
        m.document_type,
 | 
						|
        p.url,
 | 
						|
        p.title
 | 
						|
    FROM moz_places_metadata m
 | 
						|
    JOIN moz_places p ON m.place_id = p.id
 | 
						|
    WHERE m.created_at > 0
 | 
						|
    ORDER BY m.created_at
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        (place_id, created_at, updated_at, total_view_time, typing_time,
 | 
						|
         key_presses, scrolling_time, scrolling_distance, document_type,
 | 
						|
         url, title) = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            created_us, created_iso = convert_gecko_timestamp(created_at)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        # Convert microseconds to seconds for display
 | 
						|
        view_seconds = (total_view_time or 0) / 1000000
 | 
						|
        typing_seconds = (typing_time or 0) / 1000000
 | 
						|
        scrolling_seconds = (scrolling_time or 0) / 1000000
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': created_us,
 | 
						|
            'datetime': created_iso,
 | 
						|
            'timestamp_desc': 'Page Engagement',
 | 
						|
            'message': f"Engaged with: {title or '(No title)'} ({view_seconds:.1f}s)",
 | 
						|
            'data_type': 'browser:page:engagement',
 | 
						|
            'browser': browser_name,
 | 
						|
            'url': url or "",
 | 
						|
            'title': title or "(No title)",
 | 
						|
            'total_view_time_seconds': view_seconds,
 | 
						|
            'typing_time_seconds': typing_seconds,
 | 
						|
            'key_presses': key_presses or 0,
 | 
						|
            'scrolling_time_seconds': scrolling_seconds,
 | 
						|
            'scrolling_distance': scrolling_distance or 0,
 | 
						|
            'document_type': document_type
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_input_history(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract address bar input history from Gecko database."""
 | 
						|
    if not table_exists(conn, 'moz_inputhistory'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        ih.place_id,
 | 
						|
        ih.input,
 | 
						|
        ih.use_count,
 | 
						|
        p.url,
 | 
						|
        p.title,
 | 
						|
        p.last_visit_date
 | 
						|
    FROM moz_inputhistory ih
 | 
						|
    JOIN moz_places p ON ih.place_id = p.id
 | 
						|
    WHERE p.last_visit_date IS NOT NULL
 | 
						|
    ORDER BY p.last_visit_date
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        place_id, input_text, use_count, url, title, last_visit = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_gecko_timestamp(last_visit)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Address Bar Input',
 | 
						|
            'message': f"Typed in address bar: {input_text}",
 | 
						|
            'data_type': 'browser:addressbar:input',
 | 
						|
            'browser': browser_name,
 | 
						|
            'input_text': input_text or "",
 | 
						|
            'matched_url': url or "",
 | 
						|
            'matched_title': title or "",
 | 
						|
            'use_count': use_count or 0
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_keywords(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract custom search keywords from Gecko database."""
 | 
						|
    if not table_exists(conn, 'moz_keywords'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check if dateAdded column exists (not in all Firefox versions)
 | 
						|
    if not column_exists(conn, 'moz_keywords', 'dateAdded'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        k.id,
 | 
						|
        k.keyword,
 | 
						|
        k.dateAdded,
 | 
						|
        p.url,
 | 
						|
        p.title
 | 
						|
    FROM moz_keywords k
 | 
						|
    LEFT JOIN moz_places p ON k.place_id = p.id
 | 
						|
    WHERE k.dateAdded IS NOT NULL
 | 
						|
    ORDER BY k.dateAdded
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        keyword_id, keyword, date_added, url, title = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            added_us, added_iso = convert_gecko_timestamp(date_added)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': added_us,
 | 
						|
            'datetime': added_iso,
 | 
						|
            'timestamp_desc': 'Keyword Added',
 | 
						|
            'message': f"Added search keyword: {keyword}",
 | 
						|
            'data_type': 'browser:keyword:added',
 | 
						|
            'browser': browser_name,
 | 
						|
            'keyword_id': keyword_id,
 | 
						|
            'keyword': keyword or "",
 | 
						|
            'search_url': url or "",
 | 
						|
            'title': title or ""
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_gecko_origins(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract origin (domain) tracking data from Gecko database."""
 | 
						|
    if not table_exists(conn, 'moz_origins'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check for last_visit_date column
 | 
						|
    if not column_exists(conn, 'moz_origins', 'last_visit_date'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        prefix,
 | 
						|
        host,
 | 
						|
        frecency,
 | 
						|
        last_visit_date
 | 
						|
    FROM moz_origins
 | 
						|
    WHERE last_visit_date IS NOT NULL
 | 
						|
    ORDER BY last_visit_date
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        origin_id, prefix, host, frecency, last_visit = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_gecko_timestamp(last_visit)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        full_origin = f"{prefix}{host}" if prefix else host
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Domain Visited',
 | 
						|
            'message': f"Visited domain: {full_origin}",
 | 
						|
            'data_type': 'browser:domain:visit',
 | 
						|
            'browser': browser_name,
 | 
						|
            'origin': full_origin or "",
 | 
						|
            'host': host or "",
 | 
						|
            'prefix': prefix or "",
 | 
						|
            'frecency_score': frecency or 0
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
# ============================================================================
 | 
						|
# WEBKIT/SAFARI EXTRACTORS
 | 
						|
# ============================================================================
 | 
						|
 | 
						|
def extract_webkit_visits(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract visit events from WebKit database with resolved redirect chains."""
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        hv.visit_time,
 | 
						|
        hi.url,
 | 
						|
        hi.title,
 | 
						|
        hv.title as visit_title,
 | 
						|
        hv.load_successful,
 | 
						|
        hv.http_non_get,
 | 
						|
        hi.visit_count,
 | 
						|
        redirect_src_items.url as redirect_source_url,
 | 
						|
        redirect_dst_items.url as redirect_destination_url
 | 
						|
    FROM history_visits hv
 | 
						|
    JOIN history_items hi ON hv.history_item = hi.id
 | 
						|
    LEFT JOIN history_visits redirect_src ON hv.redirect_source = redirect_src.id
 | 
						|
    LEFT JOIN history_items redirect_src_items ON redirect_src.history_item = redirect_src_items.id
 | 
						|
    LEFT JOIN history_visits redirect_dst ON hv.redirect_destination = redirect_dst.id
 | 
						|
    LEFT JOIN history_items redirect_dst_items ON redirect_dst.history_item = redirect_dst_items.id
 | 
						|
    ORDER BY hv.visit_time
 | 
						|
    """
 | 
						|
    
 | 
						|
    cursor.execute(query)
 | 
						|
    results = cursor.fetchall()
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        (webkit_timestamp, url, title, visit_title,
 | 
						|
         load_successful, http_non_get, visit_count,
 | 
						|
         redirect_source_url, redirect_destination_url) = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_webkit_timestamp(webkit_timestamp)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        display_title = title or visit_title or "(No title)"
 | 
						|
        
 | 
						|
        message = f"Visited: {display_title}"
 | 
						|
        if not load_successful:
 | 
						|
            message += " [FAILED TO LOAD]"
 | 
						|
        if http_non_get:
 | 
						|
            message += " [POST/Form]"
 | 
						|
        
 | 
						|
        # Build row with only useful fields
 | 
						|
        row_data = {
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Visit Time',
 | 
						|
            'message': message,
 | 
						|
            'data_type': 'browser:page:visit',
 | 
						|
            'browser': browser_name,
 | 
						|
            'url': url or "",
 | 
						|
            'title': display_title,
 | 
						|
            'load_successful': bool(load_successful),
 | 
						|
            'http_post': bool(http_non_get),
 | 
						|
            'total_visit_count': visit_count or 0
 | 
						|
        }
 | 
						|
        
 | 
						|
        # Add redirect chain info if present
 | 
						|
        if redirect_source_url:
 | 
						|
            row_data['redirect_source_url'] = redirect_source_url
 | 
						|
        if redirect_destination_url:
 | 
						|
            row_data['redirect_destination_url'] = redirect_destination_url
 | 
						|
        
 | 
						|
        rows.append(row_data)
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_webkit_bookmarks(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract bookmarks from WebKit database."""
 | 
						|
    if not table_exists(conn, 'bookmarks'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check for date_added column
 | 
						|
    if not column_exists(conn, 'bookmarks', 'date_added'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        title,
 | 
						|
        url,
 | 
						|
        date_added,
 | 
						|
        date_last_modified
 | 
						|
    FROM bookmarks
 | 
						|
    WHERE date_added > 0
 | 
						|
    ORDER BY date_added
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        bm_id, title, url, date_added, date_modified = row
 | 
						|
        
 | 
						|
        # Bookmark added
 | 
						|
        if date_added:
 | 
						|
            try:
 | 
						|
                added_us, added_iso = convert_webkit_timestamp(date_added)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': added_us,
 | 
						|
                    'datetime': added_iso,
 | 
						|
                    'timestamp_desc': 'Bookmark Added',
 | 
						|
                    'message': f"Bookmarked: {title or url}",
 | 
						|
                    'data_type': 'browser:bookmark:added',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'bookmark_id': bm_id,
 | 
						|
                    'bookmark_title': title or "",
 | 
						|
                    'url': url or ""
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
        
 | 
						|
        # Bookmark modified (if different)
 | 
						|
        if date_modified and date_modified != date_added:
 | 
						|
            try:
 | 
						|
                modified_us, modified_iso = convert_webkit_timestamp(date_modified)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': modified_us,
 | 
						|
                    'datetime': modified_iso,
 | 
						|
                    'timestamp_desc': 'Bookmark Modified',
 | 
						|
                    'message': f"Modified bookmark: {title or url}",
 | 
						|
                    'data_type': 'browser:bookmark:modified',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'bookmark_id': bm_id,
 | 
						|
                    'bookmark_title': title or "",
 | 
						|
                    'url': url or ""
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_webkit_downloads(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract downloads from WebKit database."""
 | 
						|
    if not table_exists(conn, 'downloads'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check for date_started column
 | 
						|
    if not column_exists(conn, 'downloads', 'date_started'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        url,
 | 
						|
        path,
 | 
						|
        mime_type,
 | 
						|
        bytes_received,
 | 
						|
        total_bytes,
 | 
						|
        date_started,
 | 
						|
        date_finished
 | 
						|
    FROM downloads
 | 
						|
    WHERE date_started > 0
 | 
						|
    ORDER BY date_started
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        dl_id, url, path, mime_type, bytes_received, total_bytes, date_started, date_finished = row
 | 
						|
        
 | 
						|
        filename = Path(path).name if path else "(unknown)"
 | 
						|
        
 | 
						|
        # Download started
 | 
						|
        if date_started:
 | 
						|
            try:
 | 
						|
                started_us, started_iso = convert_webkit_timestamp(date_started)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': started_us,
 | 
						|
                    'datetime': started_iso,
 | 
						|
                    'timestamp_desc': 'Download Started',
 | 
						|
                    'message': f"Download started: {filename} ({mime_type or 'unknown type'})",
 | 
						|
                    'data_type': 'browser:download:start',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'download_id': dl_id,
 | 
						|
                    'filename': filename,
 | 
						|
                    'source_url': url or "",
 | 
						|
                    'file_path': path or "",
 | 
						|
                    'file_size_bytes': total_bytes or 0,
 | 
						|
                    'mime_type': mime_type or ""
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
        
 | 
						|
        # Download finished (if different)
 | 
						|
        if date_finished and date_finished != date_started:
 | 
						|
            try:
 | 
						|
                finished_us, finished_iso = convert_webkit_timestamp(date_finished)
 | 
						|
                duration_seconds = (finished_us - started_us) / 1000000 if date_started else 0
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': finished_us,
 | 
						|
                    'datetime': finished_iso,
 | 
						|
                    'timestamp_desc': 'Download Completed',
 | 
						|
                    'message': f"Download completed: {filename} ({bytes_received or 0} bytes in {duration_seconds:.1f}s)",
 | 
						|
                    'data_type': 'browser:download:complete',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'download_id': dl_id,
 | 
						|
                    'filename': filename,
 | 
						|
                    'file_path': path or "",
 | 
						|
                    'file_size_bytes': bytes_received or 0,
 | 
						|
                    'mime_type': mime_type or "",
 | 
						|
                    'download_duration_seconds': duration_seconds
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_webkit_reading_list(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract Reading List items from WebKit database."""
 | 
						|
    if not table_exists(conn, 'reading_list'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check for date_added column
 | 
						|
    if not column_exists(conn, 'reading_list', 'date_added'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        title,
 | 
						|
        url,
 | 
						|
        date_added,
 | 
						|
        date_last_viewed
 | 
						|
    FROM reading_list
 | 
						|
    WHERE date_added > 0
 | 
						|
    ORDER BY date_added
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        item_id, title, url, date_added, date_viewed = row
 | 
						|
        
 | 
						|
        # Reading list item added
 | 
						|
        if date_added:
 | 
						|
            try:
 | 
						|
                added_us, added_iso = convert_webkit_timestamp(date_added)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': added_us,
 | 
						|
                    'datetime': added_iso,
 | 
						|
                    'timestamp_desc': 'Reading List Item Added',
 | 
						|
                    'message': f"Added to Reading List: {title or url}",
 | 
						|
                    'data_type': 'browser:readinglist:added',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'reading_list_id': item_id,
 | 
						|
                    'title': title or "",
 | 
						|
                    'url': url or ""
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
        
 | 
						|
        # Reading list item viewed (if present)
 | 
						|
        if date_viewed and date_viewed > 0:
 | 
						|
            try:
 | 
						|
                viewed_us, viewed_iso = convert_webkit_timestamp(date_viewed)
 | 
						|
                rows.append({
 | 
						|
                    'timestamp': viewed_us,
 | 
						|
                    'datetime': viewed_iso,
 | 
						|
                    'timestamp_desc': 'Reading List Item Viewed',
 | 
						|
                    'message': f"Viewed Reading List item: {title or url}",
 | 
						|
                    'data_type': 'browser:readinglist:viewed',
 | 
						|
                    'browser': browser_name,
 | 
						|
                    'reading_list_id': item_id,
 | 
						|
                    'title': title or "",
 | 
						|
                    'url': url or ""
 | 
						|
                })
 | 
						|
            except TimestampValidationError:
 | 
						|
                pass
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
def extract_webkit_top_sites(conn: sqlite3.Connection, browser_name: str) -> List[Dict[str, Any]]:
 | 
						|
    """Extract Top Sites data from WebKit database."""
 | 
						|
    if not table_exists(conn, 'top_sites'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    cursor = conn.cursor()
 | 
						|
    
 | 
						|
    # Check for last_visited column
 | 
						|
    if not column_exists(conn, 'top_sites', 'last_visited'):
 | 
						|
        return []
 | 
						|
    
 | 
						|
    query = """
 | 
						|
    SELECT 
 | 
						|
        id,
 | 
						|
        url,
 | 
						|
        title,
 | 
						|
        visit_count,
 | 
						|
        last_visited
 | 
						|
    FROM top_sites
 | 
						|
    WHERE last_visited > 0
 | 
						|
    ORDER BY last_visited
 | 
						|
    """
 | 
						|
    
 | 
						|
    try:
 | 
						|
        cursor.execute(query)
 | 
						|
        results = cursor.fetchall()
 | 
						|
    except sqlite3.Error:
 | 
						|
        return []
 | 
						|
    
 | 
						|
    rows = []
 | 
						|
    for row in results:
 | 
						|
        site_id, url, title, visit_count, last_visited = row
 | 
						|
        
 | 
						|
        try:
 | 
						|
            unix_microseconds, iso_datetime = convert_webkit_timestamp(last_visited)
 | 
						|
        except TimestampValidationError:
 | 
						|
            continue
 | 
						|
        
 | 
						|
        rows.append({
 | 
						|
            'timestamp': unix_microseconds,
 | 
						|
            'datetime': iso_datetime,
 | 
						|
            'timestamp_desc': 'Top Site Last Visited',
 | 
						|
            'message': f"Top site visited: {title or url}",
 | 
						|
            'data_type': 'browser:topsite:visit',
 | 
						|
            'browser': browser_name,
 | 
						|
            'site_id': site_id,
 | 
						|
            'url': url or "",
 | 
						|
            'title': title or "",
 | 
						|
            'visit_count': visit_count or 0
 | 
						|
        })
 | 
						|
    
 | 
						|
    return rows
 | 
						|
 | 
						|
 | 
						|
# ============================================================================
 | 
						|
# MAIN EXTRACTION ORCHESTRATION
 | 
						|
# ============================================================================
 | 
						|
 | 
						|
def extract_all_events(db_path: str, browser_type: str, browser_name: Optional[str] = None) -> Tuple[List[Dict[str, Any]], Dict[str, int]]:
 | 
						|
    """
 | 
						|
    Extract ALL timeline events from browser database.
 | 
						|
    
 | 
						|
    Returns:
 | 
						|
        Tuple of (all_rows, event_counts dictionary)
 | 
						|
    """
 | 
						|
    if browser_name is None:
 | 
						|
        browser_name = {'gecko': 'Firefox', 'chromium': 'Chromium', 'webkit': 'Safari'}[browser_type]
 | 
						|
    
 | 
						|
    conn = connect_database_readonly(db_path)
 | 
						|
    all_rows = []
 | 
						|
    event_counts = {}
 | 
						|
    
 | 
						|
    print(f"Extracting events from {browser_name} database...")
 | 
						|
    print("=" * 60)
 | 
						|
    
 | 
						|
    try:
 | 
						|
        if browser_type == 'gecko':
 | 
						|
            # Firefox/Gecko - extract all event types
 | 
						|
            extractors = [
 | 
						|
                ('Page Visits', extract_gecko_visits),
 | 
						|
                ('Bookmarks', extract_gecko_bookmarks),
 | 
						|
                ('Downloads', extract_gecko_downloads),
 | 
						|
                ('Form History', extract_gecko_form_history),
 | 
						|
                ('Annotations', extract_gecko_annotations),
 | 
						|
                ('Page Engagement', extract_gecko_metadata),
 | 
						|
                ('Address Bar Input', extract_gecko_input_history),
 | 
						|
                ('Search Keywords', extract_gecko_keywords),
 | 
						|
                ('Domain Tracking', extract_gecko_origins),
 | 
						|
            ]
 | 
						|
            
 | 
						|
        elif browser_type == 'chromium':
 | 
						|
            # Chromium - extract all event types
 | 
						|
            extractors = [
 | 
						|
                ('Page Visits', extract_chromium_visits),
 | 
						|
                ('Downloads', extract_chromium_downloads),
 | 
						|
                ('Search Queries', extract_chromium_search_terms),
 | 
						|
                ('Form Autofill', extract_chromium_autofill),
 | 
						|
                ('Favicons', extract_chromium_favicons),
 | 
						|
                ('Media Playback', extract_chromium_media_history),
 | 
						|
                ('Site Engagement', extract_chromium_site_engagement),
 | 
						|
            ]
 | 
						|
            
 | 
						|
        elif browser_type == 'webkit':
 | 
						|
            # Safari/WebKit - extract all event types
 | 
						|
            extractors = [
 | 
						|
                ('Page Visits', extract_webkit_visits),
 | 
						|
                ('Bookmarks', extract_webkit_bookmarks),
 | 
						|
                ('Downloads', extract_webkit_downloads),
 | 
						|
                ('Reading List', extract_webkit_reading_list),
 | 
						|
                ('Top Sites', extract_webkit_top_sites),
 | 
						|
            ]
 | 
						|
        
 | 
						|
        # Run all extractors
 | 
						|
        for name, extractor_func in extractors:
 | 
						|
            try:
 | 
						|
                events = extractor_func(conn, browser_name)
 | 
						|
                all_rows.extend(events)
 | 
						|
                event_counts[name] = len(events)
 | 
						|
                print(f"  ✓ {name:25} {len(events):>7,} events")
 | 
						|
            except Exception as e:
 | 
						|
                print(f"  ✗ {name:25} Error: {e}")
 | 
						|
                event_counts[name] = 0
 | 
						|
        
 | 
						|
    finally:
 | 
						|
        conn.close()
 | 
						|
    
 | 
						|
    # Sort all events by timestamp
 | 
						|
    all_rows.sort(key=lambda x: x['timestamp'])
 | 
						|
    
 | 
						|
    print("=" * 60)
 | 
						|
    
 | 
						|
    return all_rows, event_counts
 | 
						|
 | 
						|
 | 
						|
def generate_default_output_filename(browser_type: str, input_path: str) -> str:
 | 
						|
    """Generate a sensible default output filename based on browser type and input."""
 | 
						|
    path_lower = input_path.lower()
 | 
						|
    
 | 
						|
    browser_names = {
 | 
						|
        'firefox': 'firefox', 'chrome': 'chrome', 'edge': 'edge',
 | 
						|
        'brave': 'brave', 'opera': 'opera', 'vivaldi': 'vivaldi', 'safari': 'safari',
 | 
						|
    }
 | 
						|
    
 | 
						|
    detected_name = None
 | 
						|
    for name_key, name_value in browser_names.items():
 | 
						|
        if name_key in path_lower:
 | 
						|
            detected_name = name_value
 | 
						|
            break
 | 
						|
    
 | 
						|
    if detected_name:
 | 
						|
        return f"{detected_name}_timeline_timesketch.csv"
 | 
						|
    else:
 | 
						|
        return f"{browser_type}_timeline_timesketch.csv"
 | 
						|
 | 
						|
 | 
						|
def main() -> int:
 | 
						|
    parser = argparse.ArgumentParser(
 | 
						|
        description='Convert ALL browser events to Timesketch CSV format',
 | 
						|
        formatter_class=argparse.RawDescriptionHelpFormatter,
 | 
						|
        epilog="""
 | 
						|
Extracts ALL timestamped events from browser databases:
 | 
						|
  
 | 
						|
CHROMIUM (Chrome, Edge, Brave, Opera, Vivaldi):
 | 
						|
  - Page visits (with metadata and navigation chains)
 | 
						|
  - Downloads (start, completion, access times)
 | 
						|
  - Search queries
 | 
						|
  - Form autofill data (first use, last use)
 | 
						|
  - Favicon updates
 | 
						|
  - Media playback history
 | 
						|
  - Site engagement scores
 | 
						|
 | 
						|
GECKO/FIREFOX:
 | 
						|
  - Page visits (with metadata and navigation chains)
 | 
						|
  - Bookmarks (added, modified)
 | 
						|
  - Downloads
 | 
						|
  - Form history (first use, last use)
 | 
						|
  - Page and bookmark annotations
 | 
						|
  - Page engagement metrics (view time, scrolling, typing)
 | 
						|
  - Address bar input history
 | 
						|
  - Custom search keywords
 | 
						|
  - Domain-level tracking data
 | 
						|
 | 
						|
WEBKIT/SAFARI:
 | 
						|
  - Page visits (with redirect chains)
 | 
						|
  - Bookmarks (added, modified)
 | 
						|
  - Downloads (start, completion)
 | 
						|
  - Reading List items (added, viewed)
 | 
						|
  - Top Sites tracking
 | 
						|
 | 
						|
Browser types:
 | 
						|
  gecko, firefox      - Gecko-based browsers (Firefox)
 | 
						|
  chromium            - Chromium-based browsers (Chrome, Edge, Brave, etc.)
 | 
						|
  webkit, safari      - WebKit-based browsers (Safari)
 | 
						|
  auto                - Auto-detect browser type (default)
 | 
						|
 | 
						|
Output Format:
 | 
						|
  - Browser-agnostic Timesketch-compatible CSV
 | 
						|
  - Consistent event naming across all browsers
 | 
						|
  - All timestamps in Unix microseconds + ISO 8601
 | 
						|
 | 
						|
Example usage:
 | 
						|
  # Auto-detect and extract everything
 | 
						|
  python browser2timesketch_complete.py -i /path/to/History
 | 
						|
  
 | 
						|
  # Specify browser and output
 | 
						|
  python browser2timesketch_complete.py -b firefox -i places.sqlite -o output.csv
 | 
						|
  
 | 
						|
  # Custom browser name (e.g., for Brave, Edge)
 | 
						|
  python browser2timesketch_complete.py --browser-name "Brave" -i History
 | 
						|
        """
 | 
						|
    )
 | 
						|
    
 | 
						|
    parser.add_argument(
 | 
						|
        '-b', '--browser',
 | 
						|
        choices=['gecko', 'firefox', 'chromium', 'webkit', 'safari', 'auto'],
 | 
						|
        default='auto',
 | 
						|
        help='Browser engine type (default: auto-detect)'
 | 
						|
    )
 | 
						|
    
 | 
						|
    parser.add_argument(
 | 
						|
        '-i', '--input',
 | 
						|
        required=True,
 | 
						|
        help='Path to browser history database'
 | 
						|
    )
 | 
						|
    
 | 
						|
    parser.add_argument(
 | 
						|
        '-o', '--output',
 | 
						|
        help='Output CSV file path (default: auto-generated)'
 | 
						|
    )
 | 
						|
    
 | 
						|
    parser.add_argument(
 | 
						|
        '--browser-name',
 | 
						|
        help='Custom browser name for the browser field (e.g., "Brave", "Edge")'
 | 
						|
    )
 | 
						|
    
 | 
						|
    args = parser.parse_args()
 | 
						|
    
 | 
						|
    try:
 | 
						|
        # Validate database file
 | 
						|
        print(f"Validating database: {args.input}")
 | 
						|
        validate_sqlite_database(args.input)
 | 
						|
        print("✓ Database is valid SQLite file\n")
 | 
						|
        
 | 
						|
        # Detect or validate browser type
 | 
						|
        browser_type = args.browser.lower()
 | 
						|
        
 | 
						|
        if browser_type == 'auto':
 | 
						|
            print("Auto-detecting browser type...")
 | 
						|
            browser_type = detect_browser_type(args.input)
 | 
						|
            print(f"✓ Detected browser type: {browser_type}\n")
 | 
						|
        else:
 | 
						|
            if browser_type == 'firefox':
 | 
						|
                browser_type = 'gecko'
 | 
						|
            elif browser_type == 'safari':
 | 
						|
                browser_type = 'webkit'
 | 
						|
            
 | 
						|
            detected_type = detect_browser_type(args.input)
 | 
						|
            if detected_type != browser_type:
 | 
						|
                print(f"Warning: You specified '{args.browser}' but database appears to be '{detected_type}'", 
 | 
						|
                      file=sys.stderr)
 | 
						|
                response = input("Continue anyway? [y/N]: ")
 | 
						|
                if response.lower() != 'y':
 | 
						|
                    return 1
 | 
						|
        
 | 
						|
        # Generate output filename if not provided
 | 
						|
        if args.output:
 | 
						|
            output_csv = args.output
 | 
						|
        else:
 | 
						|
            output_csv = generate_default_output_filename(browser_type, args.input)
 | 
						|
            print(f"Using output filename: {output_csv}\n")
 | 
						|
        
 | 
						|
        # Extract all events
 | 
						|
        all_rows, event_counts = extract_all_events(args.input, browser_type, args.browser_name)
 | 
						|
        
 | 
						|
        if not all_rows:
 | 
						|
            print("\n❌ No events found in database!")
 | 
						|
            return 1
 | 
						|
        
 | 
						|
        # Write to CSV
 | 
						|
        print(f"\nWriting {len(all_rows):,} total events to CSV...")
 | 
						|
        write_timesketch_csv(output_csv, all_rows)
 | 
						|
        
 | 
						|
        # Summary
 | 
						|
        print("\n" + "=" * 60)
 | 
						|
        print("EXTRACTION COMPLETE")
 | 
						|
        print("=" * 60)
 | 
						|
        print(f"Total events:  {len(all_rows):,}")
 | 
						|
        print("\nEvent breakdown:")
 | 
						|
        for event_type, count in sorted(event_counts.items()):
 | 
						|
            if count > 0:
 | 
						|
                print(f"  • {event_type:25} {count:>7,} events")
 | 
						|
        print(f"\n✓ Output saved to: {output_csv}")
 | 
						|
        print(f"✓ Format: Browser-agnostic Timesketch CSV")
 | 
						|
        print("=" * 60)
 | 
						|
        
 | 
						|
        return 0
 | 
						|
        
 | 
						|
    except DatabaseValidationError as e:
 | 
						|
        print(f"\n❌ Database Validation Error: {e}", file=sys.stderr)
 | 
						|
        return 1
 | 
						|
    except BrowserDetectionError as e:
 | 
						|
        print(f"\n❌ Browser Detection Error: {e}", file=sys.stderr)
 | 
						|
        return 1
 | 
						|
    except sqlite3.Error as e:
 | 
						|
        print(f"\n❌ Database Error: {e}", file=sys.stderr)
 | 
						|
        return 1
 | 
						|
    except KeyboardInterrupt:
 | 
						|
        print("\n\n⚠️  Operation cancelled by user", file=sys.stderr)
 | 
						|
        return 130
 | 
						|
    except Exception as e:
 | 
						|
        print(f"\n❌ Unexpected Error: {e}", file=sys.stderr)
 | 
						|
        import traceback
 | 
						|
        traceback.print_exc()
 | 
						|
        return 1
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    sys.exit(main()) |