try_db

it
update edge labels
2025-09-14 22:54:37 +02:00 · 2025-09-14 22:37:23 +02:00 · 2025-09-14 20:50:09 +02:00 · 2025-09-14 20:22:09 +02:00 · 2025-09-14 19:12:12 +02:00 · 2025-09-14 19:06:20 +02:00
18 changed files with 1086 additions and 1272 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,34 @@
+# ===============================================
+# DNSRecon Environment Variables
+# ===============================================
+# Copy this file to .env and fill in your values.
+
+# --- API Keys ---
+# Add your Shodan API key for the Shodan provider to be enabled.
+SHODAN_API_KEY=
+
+# --- Flask & Session Settings ---
+# A strong, random secret key is crucial for session security.
+FLASK_SECRET_KEY=your-very-secret-and-random-key-here
+FLASK_HOST=127.0.0.1
+FLASK_PORT=5000
+FLASK_DEBUG=True
+# How long a user's session in the browser lasts (in hours).
+FLASK_PERMANENT_SESSION_LIFETIME_HOURS=2
+# How long inactive scanner data is stored in Redis (in minutes).
+SESSION_TIMEOUT_MINUTES=60
+
+
+# --- Application Core Settings ---
+# The default number of levels to recurse when scanning.
+DEFAULT_RECURSION_DEPTH=2
+# Default timeout for provider API requests in seconds.
+DEFAULT_TIMEOUT=30
+# The number of concurrent provider requests to make.
+MAX_CONCURRENT_REQUESTS=5
+# The number of results from a provider that triggers the "large entity" grouping.
+LARGE_ENTITY_THRESHOLD=100
+# The number of times to retry a target if a provider fails.
+MAX_RETRIES_PER_TARGET=3
+# How long cached provider responses are stored (in hours).
+CACHE_EXPIRY_HOURS=12
--- a/app.py
+++ b/app.py
@@ -1,3 +1,5 @@
+# dnsrecon-reduced/app.py
+
 """
 Flask application entry point for DNSRecon web interface.
 Provides REST API endpoints and serves the web interface with user session support.
@@ -14,48 +16,36 @@ from config import config


 app = Flask(__name__)
-app.config['SECRET_KEY'] = 'dnsrecon-dev-key-change-in-production'
-app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=2)  # 2 hour session lifetime
+# Use centralized configuration for Flask settings
+app.config['SECRET_KEY'] = config.flask_secret_key
+app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=config.flask_permanent_session_lifetime_hours)

 def get_user_scanner():
    """
-    User scanner retrieval with better error handling and debugging.
+    Retrieves the scanner for the current session, or creates a new
+    session and scanner if one doesn't exist.
    """
    # Get current Flask session info for debugging
    current_flask_session_id = session.get('dnsrecon_session_id')
-    client_ip = request.remote_addr
-    user_agent = request.headers.get('User-Agent', '')[:100]  # Truncate for logging
    
    # Try to get existing session
    if current_flask_session_id:
        existing_scanner = session_manager.get_session(current_flask_session_id)
        if existing_scanner:
-            # Ensure session ID is set
-            existing_scanner.session_id = current_flask_session_id
            return current_flask_session_id, existing_scanner
-        else:
-            print(f"Session {current_flask_session_id} not found in session manager")
    
-    # Create new session
-    print("Creating new session...")
+    # Create new session if none exists
+    print("Creating new session as none was found...")
    new_session_id = session_manager.create_session()
    new_scanner = session_manager.get_session(new_session_id)
    
    if not new_scanner:
-        print(f"ERROR: Failed to retrieve newly created session {new_session_id}")
        raise Exception("Failed to create new scanner session")
    
    # Store in Flask session
    session['dnsrecon_session_id'] = new_session_id
    session.permanent = True
    
-    # Ensure session ID is set on scanner
-    new_scanner.session_id = new_session_id
-    
-    print(f"Created new session: {new_session_id}")
-    print(f"New scanner status: {new_scanner.status}")
-    print("=== END SESSION DEBUG ===")
-    
    return new_session_id, new_scanner

@app.route('/')
@@ -67,101 +57,68 @@ def index():
@app.route('/api/scan/start', methods=['POST'])
 def start_scan():
    """
-    Start a new reconnaissance scan with immediate GUI feedback.
+    Start a new reconnaissance scan. Creates a new isolated scanner if
+    clear_graph is true, otherwise adds to the existing one.
    """
    print("=== API: /api/scan/start called ===")
    
    try:
-        print("Getting JSON data from request...")
        data = request.get_json()
-        print(f"Request data: {data}")
-        
        if not data or 'target_domain' not in data:
-            print("ERROR: Missing target_domain in request")
-            return jsonify({
-                'success': False,
-                'error': 'Missing target_domain in request'
-            }), 400
+            return jsonify({'success': False, 'error': 'Missing target_domain in request'}), 400
        
        target_domain = data['target_domain'].strip()
        max_depth = data.get('max_depth', config.default_recursion_depth)
        clear_graph = data.get('clear_graph', True)
        
-        print(f"Parsed - target_domain: '{target_domain}', max_depth: {max_depth}")
+        print(f"Parsed - target_domain: '{target_domain}', max_depth: {max_depth}, clear_graph: {clear_graph}")
        
        # Validation
        if not target_domain:
-            print("ERROR: Target domain cannot be empty")
-            return jsonify({
-                'success': False,
-                'error': 'Target domain cannot be empty'
-            }), 400
+            return jsonify({'success': False, 'error': 'Target domain cannot be empty'}), 400
+        if not isinstance(max_depth, int) or not 1 <= max_depth <= 5:
+            return jsonify({'success': False, 'error': 'Max depth must be an integer between 1 and 5'}), 400
        
-        if not isinstance(max_depth, int) or max_depth < 1 or max_depth > 5:
-            print(f"ERROR: Invalid max_depth: {max_depth}")
-            return jsonify({
-                'success': False,
-                'error': 'Max depth must be an integer between 1 and 5'
-            }), 400
+        user_session_id, scanner = None, None
+
+        if clear_graph:
+            print("Clear graph requested: Creating a new, isolated scanner session.")
+            old_session_id = session.get('dnsrecon_session_id')
+            if old_session_id:
+                session_manager.terminate_session(old_session_id)
+            
+            user_session_id = session_manager.create_session()
+            session['dnsrecon_session_id'] = user_session_id
+            session.permanent = True
+            scanner = session_manager.get_session(user_session_id)
+        else:
+            print("Adding to existing graph: Reusing the current scanner session.")
+            user_session_id, scanner = get_user_scanner()
+
+        if not scanner:
+            return jsonify({'success': False, 'error': 'Failed to get or create a scanner instance.'}), 500
        
-        print("Validation passed, getting user scanner...")
+        print(f"Using scanner {id(scanner)} in session {user_session_id}")
        
-        # Get user-specific scanner
-        user_session_id, scanner = get_user_scanner()
-        
-        # Ensure session ID is properly set
-        if not scanner.session_id:
-            scanner.session_id = user_session_id
-        
-        print(f"Using session: {user_session_id}")
-        print(f"Scanner object ID: {id(scanner)}")
-        
-        # Start scan
-        print(f"Calling start_scan on scanner {id(scanner)}...")
        success = scanner.start_scan(target_domain, max_depth, clear_graph=clear_graph)
        
-        # Immediately update session state regardless of success
-        session_manager.update_session_scanner(user_session_id, scanner)
-        
        if success:
-            scan_session_id = scanner.logger.session_id
-            print(f"Scan started successfully with scan session ID: {scan_session_id}")
            return jsonify({
                'success': True,
                'message': 'Scan started successfully',
-                'scan_id': scan_session_id,
+                'scan_id': scanner.logger.session_id,
                'user_session_id': user_session_id,
-                'scanner_status': scanner.status,
-                'debug_info': {
-                    'scanner_object_id': id(scanner),
-                    'scanner_status': scanner.status
-                }
            })
        else:
-            print("ERROR: Scanner returned False")
-            
-            # Provide more detailed error information
-            error_details = {
-                'scanner_status': scanner.status,
-                'scanner_object_id': id(scanner),
-                'session_id': user_session_id,
-                'providers_count': len(scanner.providers) if hasattr(scanner, 'providers') else 0
-            }
-            
            return jsonify({
                'success': False,
                'error': f'Failed to start scan (scanner status: {scanner.status})',
-                'debug_info': error_details
            }), 409
        
    except Exception as e:
        print(f"ERROR: Exception in start_scan endpoint: {e}")
        traceback.print_exc()
-        return jsonify({
-            'success': False,
-            'error': f'Internal server error: {str(e)}'
-        }), 500
-
+        return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500

@app.route('/api/scan/stop', methods=['POST'])
 def stop_scan():
@@ -367,12 +324,18 @@ def export_results():
@app.route('/api/providers', methods=['GET'])
 def get_providers():
    """Get information about available providers for the user session."""
-    print("=== API: /api/providers called ===")
    
    try:
        # Get user-specific scanner
        user_session_id, scanner = get_user_scanner()
        
+        if scanner:
+            completed_tasks = scanner.indicators_completed
+            enqueued_tasks = len(scanner.task_queue)
+            print(f"DEBUG: Tasks - Completed: {completed_tasks}, Enqueued: {enqueued_tasks}")
+        else:
+            print("DEBUG: No active scanner session found.")
+
        provider_info = scanner.get_provider_info()
        
        return jsonify({
@@ -447,113 +410,6 @@ def set_api_keys():
            'error': f'Internal server error: {str(e)}'
        }), 500

-
-@app.route('/api/session/info', methods=['GET'])
-def get_session_info():
-    """Get information about the current user session."""
-    try:
-        user_session_id, scanner = get_user_scanner()
-        session_info = session_manager.get_session_info(user_session_id)
-        
-        return jsonify({
-            'success': True,
-            'session_info': session_info
-        })
-    
-    except Exception as e:
-        print(f"ERROR: Exception in get_session_info endpoint: {e}")
-        traceback.print_exc()
-        return jsonify({
-            'success': False,
-            'error': f'Internal server error: {str(e)}'
-        }), 500
-
-
-@app.route('/api/session/terminate', methods=['POST'])
-def terminate_session():
-    """Terminate the current user session."""
-    try:
-        user_session_id = session.get('dnsrecon_session_id')
-        
-        if user_session_id:
-            success = session_manager.terminate_session(user_session_id)
-            # Clear Flask session
-            session.pop('dnsrecon_session_id', None)
-            
-            return jsonify({
-                'success': success,
-                'message': 'Session terminated' if success else 'Session not found'
-            })
-        else:
-            return jsonify({
-                'success': False,
-                'error': 'No active session to terminate'
-            }), 400
-    
-    except Exception as e:
-        print(f"ERROR: Exception in terminate_session endpoint: {e}")
-        traceback.print_exc()
-        return jsonify({
-            'success': False,
-            'error': f'Internal server error: {str(e)}'
-        }), 500
-
-
-@app.route('/api/admin/sessions', methods=['GET'])
-def list_sessions():
-    """Admin endpoint to list all active sessions."""
-    try:
-        sessions = session_manager.list_active_sessions()
-        stats = session_manager.get_statistics()
-        
-        return jsonify({
-            'success': True,
-            'sessions': sessions,
-            'statistics': stats
-        })
-    
-    except Exception as e:
-        print(f"ERROR: Exception in list_sessions endpoint: {e}")
-        traceback.print_exc()
-        return jsonify({
-            'success': False,
-            'error': f'Internal server error: {str(e)}'
-        }), 500
-
-
-@app.route('/api/health', methods=['GET'])
-def health_check():
-    """Health check endpoint."""
-    try:
-        # Get session stats
-        session_stats = session_manager.get_statistics()
-        
-        return jsonify({
-            'success': True,
-            'status': 'healthy',
-            'timestamp': datetime.now(timezone.utc).isoformat(),
-            'version': '1.0.0-phase2',
-            'phase': 2,
-            'features': {
-                'multi_provider': True,
-                'concurrent_processing': True,
-                'real_time_updates': True,
-                'api_key_management': True,
-                'visualization': True,
-                'retry_logic': True,
-                'user_sessions': True,
-                'session_isolation': True
-            },
-            'session_statistics': session_stats
-        })
-    except Exception as e:
-        print(f"ERROR: Exception in health_check endpoint: {e}")
-        return jsonify({
-            'success': False,
-            'error': f'Health check failed: {str(e)}'
-        }), 500
-
-
@app.errorhandler(404)
 def not_found(error):
    """Handle 404 errors."""
--- a/config.py
+++ b/config.py
@@ -5,110 +5,97 @@ Handles API key storage, rate limiting, and default settings.

 import os
 from typing import Dict, Optional
+from dotenv import load_dotenv

+# Load environment variables from .env file
+load_dotenv()

 class Config:
    """Configuration manager for DNSRecon application."""
    
    def __init__(self):
        """Initialize configuration with default values."""
-        self.api_keys: Dict[str, Optional[str]] = {
-            'shodan': None
-        }
+        self.api_keys: Dict[str, Optional[str]] = {}
        
-        # Default settings
+        # --- General Settings ---
        self.default_recursion_depth = 2
-        self.default_timeout = 10
+        self.default_timeout = 15
        self.max_concurrent_requests = 5
        self.large_entity_threshold = 100
+        self.max_retries_per_target = 3
+        self.cache_expiry_hours = 12
        
-        # Rate limiting settings (requests per minute)
+        # --- Provider Caching Settings ---
+        self.cache_timeout_hours = 6  # Provider-specific cache timeout
+        
+        # --- Rate Limiting (requests per minute) ---
        self.rate_limits = {
-            'crtsh': 60,          # Free service, be respectful
-            'shodan': 60,         # API dependent
-            'dns': 100            # Local DNS queries
+            'crtsh': 30,
+            'shodan': 60,
+            'dns': 100
        }
        
-        # Provider settings
+        # --- Provider Settings ---
        self.enabled_providers = {
-            'crtsh': True,        # Always enabled (free)
-            'dns': True,          # Always enabled (free)
-            'shodan': False       # Requires API key
+            'crtsh': True,
+            'dns': True,
+            'shodan': False
        }
        
-        # Logging configuration
+        # --- Logging ---
        self.log_level = 'INFO'
        self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        
-        # Flask configuration
+        # --- Flask & Session Settings ---
        self.flask_host = '127.0.0.1'
        self.flask_port = 5000
        self.flask_debug = True
+        self.flask_secret_key = 'default-secret-key-change-me'
+        self.flask_permanent_session_lifetime_hours = 2
+        self.session_timeout_minutes = 60
        
-    def set_api_key(self, provider: str, api_key: str) -> bool:
-        """
-        Set API key for a provider.
+        # Load environment variables to override defaults
+        self.load_from_env()
        
-        Args:
-            provider: Provider name (shodan, etc)
-            api_key: API key string
-            
-        Returns:
-            bool: True if key was set successfully
-        """
-        if provider in self.api_keys:
-            self.api_keys[provider] = api_key
-            self.enabled_providers[provider] = True if api_key else False
-            return True
-        return False
+    def load_from_env(self):
+        """Load configuration from environment variables."""
+        self.set_api_key('shodan', os.getenv('SHODAN_API_KEY'))
+
+        # Override settings from environment
+        self.default_recursion_depth = int(os.getenv('DEFAULT_RECURSION_DEPTH', self.default_recursion_depth))
+        self.default_timeout = int(os.getenv('DEFAULT_TIMEOUT', self.default_timeout))
+        self.max_concurrent_requests = int(os.getenv('MAX_CONCURRENT_REQUESTS', self.max_concurrent_requests))
+        self.large_entity_threshold = int(os.getenv('LARGE_ENTITY_THRESHOLD', self.large_entity_threshold))
+        self.max_retries_per_target = int(os.getenv('MAX_RETRIES_PER_TARGET', self.max_retries_per_target))
+        self.cache_expiry_hours = int(os.getenv('CACHE_EXPIRY_HOURS', self.cache_expiry_hours))
+        self.cache_timeout_hours = int(os.getenv('CACHE_TIMEOUT_HOURS', self.cache_timeout_hours))
+        
+        # Override Flask and session settings
+        self.flask_host = os.getenv('FLASK_HOST', self.flask_host)
+        self.flask_port = int(os.getenv('FLASK_PORT', self.flask_port))
+        self.flask_debug = os.getenv('FLASK_DEBUG', str(self.flask_debug)).lower() == 'true'
+        self.flask_secret_key = os.getenv('FLASK_SECRET_KEY', self.flask_secret_key)
+        self.flask_permanent_session_lifetime_hours = int(os.getenv('FLASK_PERMANENT_SESSION_LIFETIME_HOURS', self.flask_permanent_session_lifetime_hours))
+        self.session_timeout_minutes = int(os.getenv('SESSION_TIMEOUT_MINUTES', self.session_timeout_minutes))
+
+    def set_api_key(self, provider: str, api_key: Optional[str]) -> bool:
+        """Set API key for a provider."""
+        self.api_keys[provider] = api_key
+        if api_key:
+            self.enabled_providers[provider] = True
+        return True
    
    def get_api_key(self, provider: str) -> Optional[str]:
-        """
-        Get API key for a provider.
-        
-        Args:
-            provider: Provider name
-            
-        Returns:
-            API key or None if not set
-        """
+        """Get API key for a provider."""
        return self.api_keys.get(provider)
    
    def is_provider_enabled(self, provider: str) -> bool:
-        """
-        Check if a provider is enabled.
-        
-        Args:
-            provider: Provider name
-            
-        Returns:
-            bool: True if provider is enabled
-        """
+        """Check if a provider is enabled."""
        return self.enabled_providers.get(provider, False)
    
    def get_rate_limit(self, provider: str) -> int:
-        """
-        Get rate limit for a provider.
-        
-        Args:
-            provider: Provider name
-            
-        Returns:
-            Rate limit in requests per minute
-        """
+        """Get rate limit for a provider."""
        return self.rate_limits.get(provider, 60)
-    
-    def load_from_env(self):
-        """Load configuration from environment variables."""
-        if os.getenv('SHODAN_API_KEY'):
-            self.set_api_key('shodan', os.getenv('SHODAN_API_KEY'))
-        
-        # Override default settings from environment
-        self.default_recursion_depth = int(os.getenv('DEFAULT_RECURSION_DEPTH', '2'))
-        self.flask_debug = os.getenv('FLASK_DEBUG', 'True').lower() == 'true'
-        self.default_timeout = 30
-        self.max_concurrent_requests = 5
-

 # Global configuration instance
 config = Config()
--- a/core/graph_manager.py
+++ b/core/graph_manager.py
@@ -1,3 +1,5 @@
+# core/graph_manager.py
+
 """
 Graph data model for DNSRecon using NetworkX.
 Manages in-memory graph storage with confidence scoring and forensic metadata.
@@ -50,21 +52,23 @@ class GraphManager:
        self.__dict__.update(state)
        self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')

-    def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = None):
-        """Recursively traverse metadata and add hashable values to the index."""
+    def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = [], parent_attr: str = ""):
+        """Recursively traverse metadata and add hashable values to the index with better path tracking."""
        if path is None:
            path = []

        if isinstance(data, dict):
            for key, value in data.items():
-                self._update_correlation_index(node_id, value, path + [key])
+                self._update_correlation_index(node_id, value, path + [key], key)
        elif isinstance(data, list):
            for i, item in enumerate(data):
-                self._update_correlation_index(node_id, item, path + [f"[{i}]"])
+                # Instead of just using [i], include the parent attribute context
+                list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]"
+                self._update_correlation_index(node_id, item, path + [list_path_component], parent_attr)
        else:
-            self._add_to_correlation_index(node_id, data, ".".join(path))
+            self._add_to_correlation_index(node_id, data, ".".join(path), parent_attr)

-    def _add_to_correlation_index(self, node_id: str, value: Any, path_str: str):
+    def _add_to_correlation_index(self, node_id: str, value: Any, path_str: str, parent_attr: str = ""):
        """Add a hashable value to the correlation index, filtering out noise."""
        if not isinstance(value, (str, int, float, bool)) or value is None:
            return
@@ -80,8 +84,8 @@ class GraphManager:
                return
            if len(value) < 4 or value.lower() in ['true', 'false', 'unknown', 'none', 'crt.sh']:
                return
-        elif isinstance(value, int) and abs(value) < 9999:
-            return  # Ignore small integers
+        elif isinstance(value, int) and (abs(value) < 1024 or abs(value) > 65535):
+            return  # Ignore small integers and common port numbers
        elif isinstance(value, bool):
            return  # Ignore boolean values

@@ -90,10 +94,47 @@ class GraphManager:
            self.correlation_index[value] = {}
        if node_id not in self.correlation_index[value]:
            self.correlation_index[value][node_id] = []
-        if path_str not in self.correlation_index[value][node_id]:
-            self.correlation_index[value][node_id].append(path_str)
+        
+        # Store both the full path and the parent attribute for better edge labeling
+        correlation_entry = {
+            'path': path_str,
+            'parent_attr': parent_attr,
+            'meaningful_attr': self._extract_meaningful_attribute(path_str, parent_attr)
+        }
+        
+        if correlation_entry not in self.correlation_index[value][node_id]:
+            self.correlation_index[value][node_id].append(correlation_entry)

-    def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = None) -> List[Dict]:
+    def _extract_meaningful_attribute(self, path_str: str, parent_attr: str = "") -> str:
+        """Extract the most meaningful attribute name from a path string."""
+        if not path_str:
+            return "unknown"
+        
+        path_parts = path_str.split('.')
+        
+        # Look for the last non-array-index part
+        for part in reversed(path_parts):
+            # Skip array indices like [0], [1], etc.
+            if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()):
+                # Clean up compound names like "hostnames[0]" to just "hostnames"
+                clean_part = re.sub(r'\[\d+\]$', '', part)
+                if clean_part:
+                    return clean_part
+        
+        # Fallback to parent attribute if available
+        if parent_attr:
+            return parent_attr
+        
+        # Last resort - use the first meaningful part
+        for part in path_parts:
+            if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()):
+                clean_part = re.sub(r'\[\d+\]$', '', part)
+                if clean_part:
+                    return clean_part
+        
+        return "correlation"
+
+    def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = [], parent_attr: str = "") -> List[Dict]:
        """Recursively traverse metadata to find correlations with existing data."""
        if path is None:
            path = []
@@ -103,10 +144,11 @@ class GraphManager:
            for key, value in data.items():
                if key == 'source':  # Avoid correlating on the provider name
                    continue
-                all_correlations.extend(self._check_for_correlations(new_node_id, value, path + [key]))
+                all_correlations.extend(self._check_for_correlations(new_node_id, value, path + [key], key))
        elif isinstance(data, list):
            for i, item in enumerate(data):
-                all_correlations.extend(self._check_for_correlations(new_node_id, item, path + [f"[{i}]"]))
+                list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]"
+                all_correlations.extend(self._check_for_correlations(new_node_id, item, path + [list_path_component], parent_attr))
        else:
            value = data
            if value in self.correlation_index:
@@ -117,11 +159,31 @@ class GraphManager:
                if len(unique_nodes) < 2:
                    return all_correlations # Correlation must involve at least two distinct nodes

-                new_source = {'node_id': new_node_id, 'path': ".".join(path)}
+                new_source = {
+                    'node_id': new_node_id, 
+                    'path': ".".join(path),
+                    'parent_attr': parent_attr,
+                    'meaningful_attr': self._extract_meaningful_attribute(".".join(path), parent_attr)
+                }
                all_sources = [new_source]
-                for node_id, paths in existing_nodes_with_paths.items():
-                    for p_str in paths:
-                        all_sources.append({'node_id': node_id, 'path': p_str})
+                
+                for node_id, path_entries in existing_nodes_with_paths.items():
+                    for entry in path_entries:
+                        if isinstance(entry, dict):
+                            all_sources.append({
+                                'node_id': node_id,
+                                'path': entry['path'],
+                                'parent_attr': entry.get('parent_attr', ''),
+                                'meaningful_attr': entry.get('meaningful_attr', self._extract_meaningful_attribute(entry['path'], entry.get('parent_attr', '')))
+                            })
+                        else:
+                            # Handle legacy string-only entries
+                            all_sources.append({
+                                'node_id': node_id,
+                                'path': str(entry),
+                                'parent_attr': '',
+                                'meaningful_attr': self._extract_meaningful_attribute(str(entry))
+                            })

                all_correlations.append({
                    'value': value,
@@ -163,8 +225,7 @@ class GraphManager:
                    # Skip creating correlation node - would be redundant
                    continue
                
-                # STEP 2: Filter out node pairs that already have direct edges
-                eligible_nodes = self._filter_nodes_without_direct_edges(set(corr['nodes']))
+                eligible_nodes = set(corr['nodes'])
                
                if len(eligible_nodes) < 2:
                    # Need at least 2 nodes to create a correlation
@@ -184,11 +245,12 @@ class GraphManager:
                                metadata={'values': [value], 'sources': corr['sources'],
                                            'correlated_nodes': list(eligible_nodes)})
                    
-                    # Create edges from eligible nodes to this correlation node
+                    # Create edges from eligible nodes to this correlation node with better labeling
                    for c_node_id in eligible_nodes:
                        if self.graph.has_node(c_node_id):
-                            attribute = corr['sources'][0]['path'].split('.')[-1]
-                            relationship_type = f"c_{attribute}"
+                            # Find the best attribute name for this node
+                            meaningful_attr = self._find_best_attribute_name_for_node(c_node_id, corr['sources'])
+                            relationship_type = f"c_{meaningful_attr}"
                            self.add_edge(c_node_id, correlation_node_id, relationship_type, confidence_score=0.9)

            self._update_correlation_index(node_id, attributes)
@@ -196,27 +258,34 @@ class GraphManager:
        self.last_modified = datetime.now(timezone.utc).isoformat()
        return is_new_node

-    def _filter_nodes_without_direct_edges(self, node_set: set) -> set:
-        """
-        Filter out nodes that already have direct edges between them.
-        Returns set of nodes that should be included in correlation.
-        """
-        nodes_list = list(node_set)
-        eligible_nodes = set(node_set)  # Start with all nodes
+    def _find_best_attribute_name_for_node(self, node_id: str, sources: List[Dict]) -> str:
+        """Find the best attribute name for a correlation edge by looking at the sources."""
+        node_sources = [s for s in sources if s['node_id'] == node_id]
        
-        # Check all pairs of nodes
-        for i in range(len(nodes_list)):
-            for j in range(i + 1, len(nodes_list)):
-                node_a = nodes_list[i]
-                node_b = nodes_list[j]
-                
-                # Check if direct edge exists in either direction
-                if self._has_direct_edge_bidirectional(node_a, node_b):
-                    # Remove both nodes from eligible set since they're already connected
-                    eligible_nodes.discard(node_a)
-                    eligible_nodes.discard(node_b)
+        if not node_sources:
+            return "correlation"
        
-        return eligible_nodes
+        # Use the meaningful_attr if available
+        for source in node_sources:
+            meaningful_attr = source.get('meaningful_attr')
+            if meaningful_attr and meaningful_attr != "unknown":
+                return meaningful_attr
+        
+        # Fallback to parent_attr
+        for source in node_sources:
+            parent_attr = source.get('parent_attr')
+            if parent_attr:
+                return parent_attr
+        
+        # Last resort - extract from path
+        for source in node_sources:
+            path = source.get('path', '')
+            if path:
+                extracted = self._extract_meaningful_attribute(path)
+                if extracted != "unknown":
+                    return extracted
+        
+        return "correlation"

    def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
        """
@@ -290,7 +359,7 @@ class GraphManager:
        # Create set of unique sources based on (node_id, path) tuples
        source_set = set()
        for source in existing_sources + new_sources:
-            source_tuple = (source['node_id'], source['path'])
+            source_tuple = (source['node_id'], source.get('path', ''))
            source_set.add(source_tuple)
        
        # Convert back to list of dictionaries
@@ -421,10 +490,14 @@ class GraphManager:
    def _get_confidence_distribution(self) -> Dict[str, int]:
        """Get distribution of edge confidence scores."""
        distribution = {'high': 0, 'medium': 0, 'low': 0}
-        for _, _, confidence in self.graph.edges(data='confidence_score', default=0):
-            if confidence >= 0.8: distribution['high'] += 1
-            elif confidence >= 0.6: distribution['medium'] += 1
-            else: distribution['low'] += 1
+        for _, _, data in self.graph.edges(data=True):
+            confidence = data.get('confidence_score', 0)
+            if confidence >= 0.8:
+                distribution['high'] += 1
+            elif confidence >= 0.6:
+                distribution['medium'] += 1
+            else:
+                distribution['low'] += 1
        return distribution

    def get_statistics(self) -> Dict[str, Any]:
@@ -439,9 +512,10 @@ class GraphManager:
        # Calculate distributions
        for node_type in NodeType:
            stats['node_type_distribution'][node_type.value] = self.get_nodes_by_type(node_type).__len__()
-        for _, _, rel_type in self.graph.edges(data='relationship_type', default='unknown'):
+        for _, _, data in self.graph.edges(data=True):
+            rel_type = data.get('relationship_type', 'unknown')
            stats['relationship_type_distribution'][rel_type] = stats['relationship_type_distribution'].get(rel_type, 0) + 1
-        for _, _, provider in self.graph.edges(data='source_provider', default='unknown'):
+            provider = data.get('source_provider', 'unknown')
            stats['provider_distribution'][provider] = stats['provider_distribution'].get(provider, 0) + 1
        return stats

--- a/core/logger.py
+++ b/core/logger.py
@@ -42,7 +42,7 @@ class ForensicLogger:
    Maintains detailed audit trail of all reconnaissance activities.
    """
    
-    def __init__(self, session_id: str = None):
+    def __init__(self, session_id: str = ""):
        """
        Initialize forensic logger.
        
@@ -203,8 +203,6 @@ class ForensicLogger:
        self.session_metadata['target_domains'] = list(self.session_metadata['target_domains'])
        
        self.logger.info(f"Scan Complete - Session: {self.session_id}")
-        self.logger.info(f"Total API Requests: {self.session_metadata['total_requests']}")
-        self.logger.info(f"Total Relationships: {self.session_metadata['total_relationships']}")
    
    def export_audit_trail(self) -> Dict[str, Any]:
        """
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -5,7 +5,7 @@ import traceback
 import time
 import os
 import importlib
-from typing import List, Set, Dict, Any, Tuple
+from typing import List, Set, Dict, Any, Tuple, Optional
 from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future
 from collections import defaultdict, deque
 from datetime import datetime, timezone
@@ -49,11 +49,20 @@ class Scanner:
            self.max_depth = 2
            self.stop_event = threading.Event()
            self.scan_thread = None
-            self.session_id = None  # Will be set by session manager
+            self.session_id: Optional[str] = None  # Will be set by session manager
+            self.task_queue = deque([])
+            self.target_retries = defaultdict(int)
+            self.scan_failed_due_to_retries = False
+
+            # **NEW**: Track currently processing tasks to prevent processing after stop
+            self.currently_processing = set()
+            self.processing_lock = threading.Lock()

            # Scanning progress tracking
            self.total_indicators_found = 0
            self.indicators_processed = 0
+            self.indicators_completed = 0
+            self.tasks_re_enqueued = 0
            self.current_indicator = ""

            # Concurrent processing configuration
@@ -119,7 +128,8 @@ class Scanner:
        unpicklable_attrs = [
            'stop_event',
            'scan_thread', 
-            'executor'
+            'executor',
+            'processing_lock'  # **NEW**: Exclude the processing lock
        ]
        
        for attr in unpicklable_attrs:
@@ -143,6 +153,11 @@ class Scanner:
        self.stop_event = threading.Event()
        self.scan_thread = None
        self.executor = None
+        self.processing_lock = threading.Lock()  # **NEW**: Recreate processing lock
+        
+        # **NEW**: Reset processing tracking
+        if not hasattr(self, 'currently_processing'):
+            self.currently_processing = set()
        
        # Re-set stop events for providers
        if hasattr(self, 'providers'):
@@ -165,9 +180,10 @@ class Scanner:
                        attribute = getattr(module, attribute_name)
                        if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider:
                            provider_class = attribute
-                            provider_name = provider_class(session_config=self.config).get_name()
+                            provider = provider_class(name=attribute_name, session_config=self.config)
+                            provider_name = provider.get_name()
+
                            if self.config.is_provider_enabled(provider_name):
-                                provider = provider_class(session_config=self.config)
                                if provider.is_available():
                                    provider.set_stop_event(self.stop_event)
                                    self.providers.append(provider)
@@ -189,28 +205,59 @@ class Scanner:
        print("Session configuration updated")

    def start_scan(self, target_domain: str, max_depth: int = 2, clear_graph: bool = True) -> bool:
-        """Start a new reconnaissance scan with immediate GUI feedback."""
+        """Start a new reconnaissance scan with proper cleanup of previous scans."""
        print(f"=== STARTING SCAN IN SCANNER {id(self)} ===")
        print(f"Session ID: {self.session_id}")
        print(f"Initial scanner status: {self.status}")

-        # Clean up previous scan thread if needed
+        # **IMPROVED**: More aggressive cleanup of previous scan
        if self.scan_thread and self.scan_thread.is_alive():
-            print("A previous scan thread is still alive. Sending termination signal and waiting...")
-            self.stop_scan()
-            self.scan_thread.join(10.0)
-
+            print("A previous scan thread is still alive. Forcing termination...")
+            
+            # Set stop signals immediately
+            self._set_stop_signal()
+            self.status = ScanStatus.STOPPED
+            
+            # Clear all processing state
+            with self.processing_lock:
+                self.currently_processing.clear()
+            self.task_queue.clear()
+            
+            # Shutdown executor aggressively
+            if self.executor:
+                print("Shutting down executor forcefully...")
+                self.executor.shutdown(wait=False, cancel_futures=True)
+                self.executor = None
+            
+            # Wait for thread termination with shorter timeout
+            print("Waiting for previous scan thread to terminate...")
+            self.scan_thread.join(5.0)  # Reduced from 10 seconds
+            
            if self.scan_thread.is_alive():
-                print("ERROR: The previous scan thread is unresponsive and could not be stopped.")
-                self.status = ScanStatus.FAILED
-                self._update_session_state()
-                return False
-            print("Previous scan thread terminated successfully.")
+                print("WARNING: Previous scan thread is still alive after 5 seconds")
+                # Continue anyway, but log the issue
+                self.logger.logger.warning("Previous scan thread failed to terminate cleanly")

-        # Reset state for new scan
+        # Reset state for new scan with proper forensic logging
+        print("Resetting scanner state for new scan...")
        self.status = ScanStatus.IDLE
-        self._update_session_state()  # Update GUI immediately
-        print("Scanner state is now clean for a new scan.")
+        self.stop_event.clear()
+        
+        # **NEW**: Clear Redis stop signal explicitly
+        if self.session_id:
+            from core.session_manager import session_manager
+            session_manager.clear_stop_signal(self.session_id)
+        
+        with self.processing_lock:
+            self.currently_processing.clear()
+        
+        self.task_queue.clear()
+        self.target_retries.clear()
+        self.scan_failed_due_to_retries = False
+        
+        # Update session state immediately for GUI feedback
+        self._update_session_state()
+        print("Scanner state reset complete.")

        try:
            if not hasattr(self, 'providers') or not self.providers:
@@ -225,24 +272,20 @@ class Scanner:
            self.max_depth = max_depth
            self.current_depth = 0
            
-            # Clear both local and Redis stop signals
-            self.stop_event.clear()
-            if self.session_id:
-                from core.session_manager import session_manager
-                session_manager.clear_stop_signal(self.session_id)
-            
            self.total_indicators_found = 0
            self.indicators_processed = 0
+            self.indicators_completed = 0
+            self.tasks_re_enqueued = 0
            self.current_indicator = self.current_target

-            # Update GUI with scan preparation
+            # Update GUI with scan preparation state
            self._update_session_state()

            # Start new forensic session
            print(f"Starting new forensic session for scanner {id(self)}...")
            self.logger = new_session()

-            # Start scan in separate thread
+            # Start scan in a separate thread
            print(f"Starting scan thread for scanner {id(self)}...")
            self.scan_thread = threading.Thread(
                target=self._execute_scan,
@@ -258,16 +301,16 @@ class Scanner:
            print(f"ERROR: Exception in start_scan for scanner {id(self)}: {e}")
            traceback.print_exc()
            self.status = ScanStatus.FAILED
-            self._update_session_state()  # Update failed status immediately
+            self._update_session_state() 
            return False

    def _execute_scan(self, target_domain: str, max_depth: int) -> None:
-        """Execute the reconnaissance scan using a task queue-based approach."""
+        """Execute the reconnaissance scan with proper termination handling."""
        print(f"_execute_scan started for {target_domain} with depth {max_depth}")
        self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
        processed_targets = set()
        
-        task_queue = deque([(target_domain, 0, False)])  # target, depth, is_large_entity_member
+        self.task_queue.append((target_domain, 0, False))
        
        try:
            self.status = ScanStatus.RUNNING
@@ -278,34 +321,80 @@ class Scanner:
            self.graph.add_node(target_domain, NodeType.DOMAIN)
            self._initialize_provider_states(target_domain)

-            while task_queue:
-                if self._is_stop_requested():
-                    print("Stop requested, terminating scan.")
+            # **IMPROVED**: Better termination checking in main loop
+            while self.task_queue and not self._is_stop_requested():
+                try:
+                    target, depth, is_large_entity_member = self.task_queue.popleft()
+                except IndexError:
+                    # Queue became empty during processing
                    break

-                target, depth, is_large_entity_member = task_queue.popleft()
-
                if target in processed_targets:
                    continue

                if depth > max_depth:
                    continue

-                self.current_depth = depth
-                self.current_indicator = target
-                self._update_session_state()
-                
-                new_targets, large_entity_members = self._query_providers_for_target(target, depth, is_large_entity_member)
-                processed_targets.add(target)
-                
-                for new_target in new_targets:
-                    if new_target not in processed_targets:
-                        task_queue.append((new_target, depth + 1, False))
-                
-                for member in large_entity_members:
-                    if member not in processed_targets:
-                        task_queue.append((member, depth, True))
+                # **NEW**: Track this target as currently processing
+                with self.processing_lock:
+                    if self._is_stop_requested():
+                        print(f"Stop requested before processing {target}")
+                        break
+                    self.currently_processing.add(target)

+                try:
+                    self.current_depth = depth
+                    self.current_indicator = target
+                    self._update_session_state()
+                    
+                    # **IMPROVED**: More frequent stop checking during processing
+                    if self._is_stop_requested():
+                        print(f"Stop requested during processing setup for {target}")
+                        break
+                    
+                    new_targets, large_entity_members, success = self._query_providers_for_target(target, depth, is_large_entity_member)
+                    
+                    # **NEW**: Check stop signal after provider queries
+                    if self._is_stop_requested():
+                        print(f"Stop requested after querying providers for {target}")
+                        break
+                    
+                    if not success:
+                        self.target_retries[target] += 1
+                        if self.target_retries[target] <= self.config.max_retries_per_target:
+                            print(f"Re-queueing target {target} (attempt {self.target_retries[target]})")
+                            self.task_queue.append((target, depth, is_large_entity_member))
+                            self.tasks_re_enqueued += 1
+                        else:
+                            print(f"ERROR: Max retries exceeded for target {target}")
+                            self.scan_failed_due_to_retries = True
+                            self._log_target_processing_error(target, "Max retries exceeded")
+                    else:
+                        processed_targets.add(target)
+                        self.indicators_completed += 1
+                    
+                    # **NEW**: Only add new targets if not stopped
+                    if not self._is_stop_requested():
+                        for new_target in new_targets:
+                            if new_target not in processed_targets:
+                                self.task_queue.append((new_target, depth + 1, False))
+                        
+                        for member in large_entity_members:
+                            if member not in processed_targets:
+                                self.task_queue.append((member, depth, True))
+                
+                finally:
+                    # **NEW**: Always remove from processing set
+                    with self.processing_lock:
+                        self.currently_processing.discard(target)
+
+            # **NEW**: Log termination reason
+            if self._is_stop_requested():
+                print("Scan terminated due to stop request")
+                self.logger.logger.info("Scan terminated by user request")
+            elif not self.task_queue:
+                print("Scan completed - no more targets to process")
+                self.logger.logger.info("Scan completed - all targets processed")

        except Exception as e:
            print(f"ERROR: Scan execution failed with error: {e}")
@@ -313,8 +402,14 @@ class Scanner:
            self.status = ScanStatus.FAILED
            self.logger.logger.error(f"Scan failed: {e}")
        finally:
+            # **NEW**: Clear processing state on exit
+            with self.processing_lock:
+                self.currently_processing.clear()
+            
            if self._is_stop_requested():
                self.status = ScanStatus.STOPPED
+            elif self.scan_failed_due_to_retries:
+                self.status = ScanStatus.FAILED
            else:
                self.status = ScanStatus.COMPLETED
                
@@ -322,43 +417,50 @@ class Scanner:
            self.logger.log_scan_complete()
            if self.executor:
                self.executor.shutdown(wait=False, cancel_futures=True)
+                self.executor = None
            stats = self.graph.get_statistics()
            print("Final scan statistics:")
            print(f"  - Total nodes: {stats['basic_metrics']['total_nodes']}")
            print(f"  - Total edges: {stats['basic_metrics']['total_edges']}")
            print(f"  - Targets processed: {len(processed_targets)}")

-    def _query_providers_for_target(self, target: str, depth: int, dns_only: bool = False) -> Tuple[Set[str], Set[str]]:
-        """Helper method to query providers for a single target."""
+    def _query_providers_for_target(self, target: str, depth: int, dns_only: bool = False) -> Tuple[Set[str], Set[str], bool]:
+        """Query providers for a single target with enhanced stop checking."""
+        # **NEW**: Early termination check
+        if self._is_stop_requested():
+            print(f"Stop requested before querying providers for {target}")
+            return set(), set(), False
+
        is_ip = _is_valid_ip(target)
        target_type = NodeType.IP if is_ip else NodeType.DOMAIN
        print(f"Querying providers for {target_type.value}: {target} at depth {depth}")

-        if self._is_stop_requested():
-            print(f"Stop requested before querying providers for {target}")
-            return set(), set()
-
        self.graph.add_node(target, target_type)
        self._initialize_provider_states(target)
        
        new_targets = set()
        large_entity_members = set()
        node_attributes = defaultdict(lambda: defaultdict(list))
+        all_providers_successful = True

        eligible_providers = self._get_eligible_providers(target, is_ip, dns_only)
        
        if not eligible_providers:
            self._log_no_eligible_providers(target, is_ip)
-            return new_targets, large_entity_members
+            return new_targets, large_entity_members, True

-        for provider in eligible_providers:
+        # **IMPROVED**: Check stop signal before each provider
+        for i, provider in enumerate(eligible_providers):
            if self._is_stop_requested():
-                print(f"Stop requested while querying providers for {target}")
+                print(f"Stop requested while querying provider {i+1}/{len(eligible_providers)} for {target}")
+                all_providers_successful = False
                break
            
            try:
                provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
-                if provider_results and not self._is_stop_requested():
+                if provider_results is None:
+                    all_providers_successful = False
+                elif not self._is_stop_requested():
                    discovered, is_large_entity = self._process_provider_results_forensic(
                        target, provider, provider_results, node_attributes, depth
                    )
@@ -366,16 +468,65 @@ class Scanner:
                        large_entity_members.update(discovered)
                    else:
                        new_targets.update(discovered)
+                else:
+                    print(f"Stop requested after processing results from {provider.get_name()}")
+                    break
            except Exception as e:
+                all_providers_successful = False
                self._log_provider_error(target, provider.get_name(), str(e))

-        for node_id, attributes in node_attributes.items():
-            if self.graph.graph.has_node(node_id):
-                node_is_ip = _is_valid_ip(node_id)
-                node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
-                self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
+        # **NEW**: Only update node attributes if not stopped
+        if not self._is_stop_requested():
+            for node_id, attributes in node_attributes.items():
+                if self.graph.graph.has_node(node_id):
+                    node_is_ip = _is_valid_ip(node_id)
+                    node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
+                    self.graph.add_node(node_id, node_type_to_add, attributes=attributes)

-        return new_targets, large_entity_members
+        return new_targets, large_entity_members, all_providers_successful
+
+    def stop_scan(self) -> bool:
+        """Request immediate scan termination with proper cleanup."""
+        try:
+            print("=== INITIATING IMMEDIATE SCAN TERMINATION ===")
+            self.logger.logger.info("Scan termination requested by user")
+            
+            # **IMPROVED**: More aggressive stop signal setting
+            self._set_stop_signal()
+            self.status = ScanStatus.STOPPED
+            
+            # **NEW**: Clear processing state immediately
+            with self.processing_lock:
+                currently_processing_copy = self.currently_processing.copy()
+                self.currently_processing.clear()
+                print(f"Cleared {len(currently_processing_copy)} currently processing targets: {currently_processing_copy}")
+            
+            # **IMPROVED**: Clear task queue and log what was discarded
+            discarded_tasks = list(self.task_queue)
+            self.task_queue.clear()
+            print(f"Discarded {len(discarded_tasks)} pending tasks")
+            
+            # **IMPROVED**: Aggressively shut down executor
+            if self.executor:
+                print("Shutting down executor with immediate cancellation...")
+                try:
+                    # Cancel all pending futures
+                    self.executor.shutdown(wait=False, cancel_futures=True)
+                    print("Executor shutdown completed")
+                except Exception as e:
+                    print(f"Error during executor shutdown: {e}")
+
+            # Immediately update GUI with stopped status
+            self._update_session_state()
+
+            print("Termination signals sent. The scan will stop as soon as possible.")
+            return True
+            
+        except Exception as e:
+            print(f"ERROR: Exception in stop_scan: {e}")
+            self.logger.logger.error(f"Error during scan termination: {e}")
+            traceback.print_exc()
+            return False

    def _update_session_state(self) -> None:
        """
@@ -391,6 +542,49 @@ class Scanner:
            except Exception as e:
                print(f"ERROR: Failed to update session state: {e}")

+    def get_scan_status(self) -> Dict[str, Any]:
+        """Get current scan status with processing information."""
+        try:
+            with self.processing_lock:
+                currently_processing_count = len(self.currently_processing)
+                currently_processing_list = list(self.currently_processing)
+            
+            return {
+                'status': self.status,
+                'target_domain': self.current_target,
+                'current_depth': self.current_depth,
+                'max_depth': self.max_depth,
+                'current_indicator': self.current_indicator,
+                'indicators_processed': self.indicators_processed,
+                'indicators_completed': self.indicators_completed,
+                'tasks_re_enqueued': self.tasks_re_enqueued,
+                'progress_percentage': self._calculate_progress(),
+                'enabled_providers': [provider.get_name() for provider in self.providers],
+                'graph_statistics': self.graph.get_statistics(),
+                'task_queue_size': len(self.task_queue),
+                'currently_processing_count': currently_processing_count,  # **NEW**
+                'currently_processing': currently_processing_list[:5]  # **NEW**: Show first 5 for debugging
+            }
+        except Exception as e:
+            print(f"ERROR: Exception in get_scan_status: {e}")
+            traceback.print_exc()
+            return {
+                'status': 'error',
+                'target_domain': None,
+                'current_depth': 0,
+                'max_depth': 0,
+                'current_indicator': '',
+                'indicators_processed': 0,
+                'indicators_completed': 0,
+                'tasks_re_enqueued': 0,
+                'progress_percentage': 0.0,
+                'enabled_providers': [],
+                'graph_statistics': {},
+                'task_queue_size': 0,
+                'currently_processing_count': 0,
+                'currently_processing': []
+            }
+
    def _initialize_provider_states(self, target: str) -> None:
        """Initialize provider states for forensic tracking."""
        if not self.graph.graph.has_node(target):
@@ -420,22 +614,25 @@ class Scanner:
        return eligible

    def _already_queried_provider(self, target: str, provider_name: str) -> bool:
-        """Check if we already queried a provider for a target."""
+        """Check if we already successfully queried a provider for a target."""
        if not self.graph.graph.has_node(target):
            return False
            
        node_data = self.graph.graph.nodes[target]
        provider_states = node_data.get('metadata', {}).get('provider_states', {})
-        return provider_name in provider_states
+        
+        # A provider has been successfully queried if a state exists and its status is 'success'
+        provider_state = provider_states.get(provider_name)
+        return provider_state is not None and provider_state.get('status') == 'success'

-    def _query_single_provider_forensic(self, provider, target: str, is_ip: bool, current_depth: int) -> List:
+    def _query_single_provider_forensic(self, provider, target: str, is_ip: bool, current_depth: int) -> Optional[List]:
        """Query a single provider with stop signal checking."""
        provider_name = provider.get_name()
        start_time = datetime.now(timezone.utc)
        
        if self._is_stop_requested():
            print(f"Stop requested before querying {provider_name} for {target}")
-            return []
+            return None
        
        print(f"Querying {provider_name} for {target}")
        
@@ -449,7 +646,7 @@ class Scanner:
            
            if self._is_stop_requested():
                print(f"Stop requested after querying {provider_name} for {target}")
-                return []
+                return None
            
            self._update_provider_state(target, provider_name, 'success', len(results), None, start_time)
            
@@ -459,10 +656,10 @@ class Scanner:
        except Exception as e:
            self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
            print(f"✗ {provider_name} failed for {target}: {e}")
-            return []
+            return None

    def _update_provider_state(self, target: str, provider_name: str, status: str, 
-                              results_count: int, error: str, start_time: datetime) -> None:
+                              results_count: int, error: Optional[str], start_time: datetime) -> None:
        """Update provider state in node metadata for forensic tracking."""
        if not self.graph.graph.has_node(target):
            return
@@ -499,7 +696,7 @@ class Scanner:
            return members, True

        for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results):
-            if i % 10 == 0 and self._is_stop_requested():
+            if i % 5 == 0 and self._is_stop_requested():  # Check more frequently
                print(f"Stop requested while processing results from {provider_name} for {target}")
                break

@@ -515,7 +712,22 @@ class Scanner:

            self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])

-            if _is_valid_ip(rel_target):
+            if isinstance(rel_target, list):
+                # If the target is a list, iterate and process each item
+                for single_target in rel_target:
+                    if _is_valid_ip(single_target):
+                        self.graph.add_node(single_target, NodeType.IP)
+                        if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
+                            print(f"Added IP relationship: {source} -> {single_target} ({rel_type})")
+                        discovered_targets.add(single_target)
+                    elif _is_valid_domain(single_target):
+                        self.graph.add_node(single_target, NodeType.DOMAIN)
+                        if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
+                            print(f"Added domain relationship: {source} -> {single_target} ({rel_type})")
+                        discovered_targets.add(single_target)
+                        self._collect_node_attributes(single_target, provider_name, rel_type, source, raw_data, node_attributes[single_target])
+
+            elif _is_valid_ip(rel_target):
                self.graph.add_node(rel_target, NodeType.IP)
                if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
                    print(f"Added IP relationship: {source} -> {rel_target} ({rel_type})")
@@ -621,7 +833,6 @@ class Scanner:
            if target not in attributes[record_type_name]:
                attributes[record_type_name].append(target)

-
    def _log_target_processing_error(self, target: str, error: str) -> None:
        """Log target processing errors for forensic trail."""
        self.logger.logger.error(f"Target processing failed for {target}: {error}")
@@ -635,69 +846,12 @@ class Scanner:
        target_type = 'IP' if is_ip else 'domain'
        self.logger.logger.warning(f"No eligible providers for {target_type}: {target}")

-    def stop_scan(self) -> bool:
-        """Request immediate scan termination with immediate GUI feedback."""
-        try:
-            print("=== INITIATING IMMEDIATE SCAN TERMINATION ===")
-            self.logger.logger.info("Scan termination requested by user")
-            
-            # Set both local and Redis stop signals
-            self._set_stop_signal()
-            self.status = ScanStatus.STOPPED
-            
-            # Immediately update GUI with stopped status
-            self._update_session_state()
-
-            # Cancel executor futures if running
-            if self.executor:
-                print("Shutting down executor with immediate cancellation...")
-                self.executor.shutdown(wait=False, cancel_futures=True)
-
-            print("Termination signals sent. The scan will stop as soon as possible.")
-            return True
-            
-        except Exception as e:
-            print(f"ERROR: Exception in stop_scan: {e}")
-            self.logger.logger.error(f"Error during scan termination: {e}")
-            traceback.print_exc()
-            return False
-
-    def get_scan_status(self) -> Dict[str, Any]:
-        """Get current scan status with forensic information."""
-        try:
-            return {
-                'status': self.status,
-                'target_domain': self.current_target,
-                'current_depth': self.current_depth,
-                'max_depth': self.max_depth,
-                'current_indicator': self.current_indicator,
-                'total_indicators_found': self.total_indicators_found,
-                'indicators_processed': self.indicators_processed,
-                'progress_percentage': self._calculate_progress(),
-                'enabled_providers': [provider.get_name() for provider in self.providers],
-                'graph_statistics': self.graph.get_statistics()
-            }
-        except Exception as e:
-            print(f"ERROR: Exception in get_scan_status: {e}")
-            traceback.print_exc()
-            return {
-                'status': 'error',
-                'target_domain': None,
-                'current_depth': 0,
-                'max_depth': 0,
-                'current_indicator': '',
-                'total_indicators_found': 0,
-                'indicators_processed': 0,
-                'progress_percentage': 0.0,
-                'enabled_providers': [],
-                'graph_statistics': {}
-            }
-
    def _calculate_progress(self) -> float:
-        """Calculate scan progress percentage."""
-        if self.total_indicators_found == 0:
+        """Calculate scan progress percentage based on task completion."""
+        total_tasks = self.indicators_completed + len(self.task_queue)
+        if total_tasks == 0:
            return 0.0
-        return min(100.0, (self.indicators_processed / self.total_indicators_found) * 100)
+        return min(100.0, (self.indicators_completed / total_tasks) * 100)

    def get_graph_data(self) -> Dict[str, Any]:
        """Get current graph data for visualization."""
@@ -748,7 +902,7 @@ class Scanner:
                        if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider:
                            provider_class = attribute
                            # Instantiate to get metadata, even if not fully configured
-                            temp_provider = provider_class(session_config=self.config)
+                            temp_provider = provider_class(name=attribute_name, session_config=self.config)
                            provider_name = temp_provider.get_name()
                            
                            # Find the actual provider instance if it exists, to get live stats
--- a/core/session_config.py
+++ b/core/session_config.py
@@ -3,11 +3,9 @@ Per-session configuration management for DNSRecon.
 Provides isolated configuration instances for each user session.
 """

-import os
-from typing import Dict, Optional
+from config import Config

-
-class SessionConfig:
+class SessionConfig(Config):
    """
    Session-specific configuration that inherits from global config
    but maintains isolated API keys and provider settings.
@@ -15,106 +13,8 @@ class SessionConfig:
    
    def __init__(self):
        """Initialize session config with global defaults."""
-        # Copy all attributes from global config
-        self.api_keys: Dict[str, Optional[str]] = {
-            'shodan': None
-        }
-        
-        # Default settings (copied from global config)
-        self.default_recursion_depth = 2
-        self.default_timeout = 30
-        self.max_concurrent_requests = 5
-        self.large_entity_threshold = 100
-        
-        # Rate limiting settings (per session)
-        self.rate_limits = {
-            'crtsh': 60,
-            'shodan': 60,
-            'dns': 100
-        }
-        
-        # Provider settings (per session)
-        self.enabled_providers = {
-            'crtsh': True,
-            'dns': True,
-            'shodan': False
-        }
-        
-        # Logging configuration
-        self.log_level = 'INFO'
-        self.log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-        
-        # Flask configuration (shared)
-        self.flask_host = '127.0.0.1'
-        self.flask_port = 5000
-        self.flask_debug = True
-    
-    def set_api_key(self, provider: str, api_key: str) -> bool:
-        """
-        Set API key for a provider in this session.
-        
-        Args:
-            provider: Provider name (shodan, etc)
-            api_key: API key string
-            
-        Returns:
-            bool: True if key was set successfully
-        """
-        if provider in self.api_keys:
-            self.api_keys[provider] = api_key
-            self.enabled_providers[provider] = True if api_key else False
-            return True
-        return False
-    
-    def get_api_key(self, provider: str) -> Optional[str]:
-        """
-        Get API key for a provider in this session.
-        
-        Args:
-            provider: Provider name
-            
-        Returns:
-            API key or None if not set
-        """
-        return self.api_keys.get(provider)
-    
-    def is_provider_enabled(self, provider: str) -> bool:
-        """
-        Check if a provider is enabled in this session.
-        
-        Args:
-            provider: Provider name
-            
-        Returns:
-            bool: True if provider is enabled
-        """
-        return self.enabled_providers.get(provider, False)
-    
-    def get_rate_limit(self, provider: str) -> int:
-        """
-        Get rate limit for a provider in this session.
-        
-        Args:
-            provider: Provider name
-            
-        Returns:
-            Rate limit in requests per minute
-        """
-        return self.rate_limits.get(provider, 60)
-    
-    def load_from_env(self):
-        """Load configuration from environment variables (only if not already set)."""
-        if os.getenv('SHODAN_API_KEY') and not self.api_keys['shodan']:
-            self.set_api_key('shodan', os.getenv('SHODAN_API_KEY'))
-        
-        # Override default settings from environment
-        self.default_recursion_depth = int(os.getenv('DEFAULT_RECURSION_DEPTH', '2'))
-        self.default_timeout = 30
-        self.max_concurrent_requests = 5
+        super().__init__()

-
-def create_session_config() -> SessionConfig:
+def create_session_config() -> 'SessionConfig':
    """Create a new session configuration instance."""
-    session_config = SessionConfig()
-    session_config.load_from_env()
-    return session_config
+    return SessionConfig()
--- a/core/session_manager.py
+++ b/core/session_manager.py
@@ -8,6 +8,7 @@ import pickle
 from typing import Dict, Optional, Any, List

 from core.scanner import Scanner
+from config import config

 # WARNING: Using pickle can be a security risk if the data source is not trusted.
 # In this case, we are only serializing/deserializing our own trusted Scanner objects,
@@ -18,10 +19,13 @@ class SessionManager:
    Manages multiple scanner instances for concurrent user sessions using Redis.
    """
    
-    def __init__(self, session_timeout_minutes: int = 60):
+    def __init__(self, session_timeout_minutes: int = 0):
        """
        Initialize session manager with a Redis backend.
        """
+        if session_timeout_minutes is None:
+            session_timeout_minutes = config.session_timeout_minutes
+
        self.redis_client = redis.StrictRedis(db=0, decode_responses=False)
        self.session_timeout = session_timeout_minutes * 60  # Convert to seconds
        self.lock = threading.Lock() # Lock for local operations, Redis handles atomic ops
@@ -355,31 +359,6 @@ class SessionManager:
            
            time.sleep(300)  # Sleep for 5 minutes

-    def list_active_sessions(self) -> List[Dict[str, Any]]:
-        """List all active sessions for admin purposes."""
-        try:
-            session_keys = self.redis_client.keys("dnsrecon:session:*")
-            sessions = []
-            
-            for session_key in session_keys:
-                session_id = session_key.decode('utf-8').split(':')[-1]
-                session_data = self._get_session_data(session_id)
-                
-                if session_data:
-                    scanner = session_data.get('scanner')
-                    sessions.append({
-                        'session_id': session_id,
-                        'created_at': session_data.get('created_at'),
-                        'last_activity': session_data.get('last_activity'),
-                        'scanner_status': scanner.status if scanner else 'unknown',
-                        'current_target': scanner.current_target if scanner else None
-                    })
-            
-            return sessions
-        except Exception as e:
-            print(f"ERROR: Failed to list active sessions: {e}")
-            return []
-
    def get_statistics(self) -> Dict[str, Any]:
        """Get session manager statistics."""
        try:
--- a/dump.rdb
+++ b/dump.rdb
--- a/providers/base_provider.py
+++ b/providers/base_provider.py
@@ -3,8 +3,6 @@
 import time
 import requests
 import threading
-import os
-import json
 from abc import ABC, abstractmethod
 from typing import List, Dict, Any, Optional, Tuple

@@ -80,20 +78,12 @@ class BaseProvider(ABC):
        self.logger = get_forensic_logger()
        self._stop_event = None

-        # Caching configuration (per session)
-        self.cache_dir = f'.cache/{id(self.config)}'  # Unique cache per session config
-        self.cache_expiry = 12 * 3600  # 12 hours in seconds
-        if not os.path.exists(self.cache_dir):
-            os.makedirs(self.cache_dir)
-
        # Statistics (per provider instance)
        self.total_requests = 0
        self.successful_requests = 0
        self.failed_requests = 0
        self.total_relationships_found = 0

-        print(f"Initialized {name} provider with session-specific config (rate: {actual_rate_limit}/min)")
-
    def __getstate__(self):
        """Prepare BaseProvider for pickling by excluding unpicklable objects."""
        state = self.__dict__.copy()
@@ -174,171 +164,79 @@ class BaseProvider(ABC):
    def make_request(self, url: str, method: str = "GET",
                    params: Optional[Dict[str, Any]] = None,
                    headers: Optional[Dict[str, str]] = None,
-                    target_indicator: str = "",
-                    max_retries: int = 3) -> Optional[requests.Response]:
+                    target_indicator: str = "") -> Optional[requests.Response]:
        """
-        Make a rate-limited HTTP request with aggressive stop signal handling.
-        Terminates immediately when stop is requested, including during retries.
+        Make a rate-limited HTTP request.
        """
-        # Check for cancellation before starting
        if self._is_stop_requested():
            print(f"Request cancelled before start: {url}")
            return None

-        # Create a unique cache key
-        cache_key = f"{self.name}_{hash(f'{method}:{url}:{json.dumps(params, sort_keys=True)}')}.json"
-        cache_path = os.path.join(self.cache_dir, cache_key)
+        self.rate_limiter.wait_if_needed()

-        # Check cache
-        if os.path.exists(cache_path):
-            cache_age = time.time() - os.path.getmtime(cache_path)
-            if cache_age < self.cache_expiry:
-                print(f"Returning cached response for: {url}")
-                with open(cache_path, 'r') as f:
-                    cached_data = json.load(f)
-                    response = requests.Response()
-                    response.status_code = cached_data['status_code']
-                    response._content = cached_data['content'].encode('utf-8')
-                    response.headers = cached_data['headers']
-                    return response
+        start_time = time.time()
+        response = None
+        error = None

-        # Determine effective max_retries based on stop signal
-        effective_max_retries = 0 if self._is_stop_requested() else max_retries
-        last_exception = None
+        try:
+            self.total_requests += 1

-        for attempt in range(effective_max_retries + 1):
-            # AGGRESSIVE: Check for cancellation before each attempt
-            if self._is_stop_requested():
-                print(f"Request cancelled during attempt {attempt + 1}: {url}")
-                return None
+            request_headers = dict(self.session.headers).copy()
+            if headers:
+                request_headers.update(headers)

-            # Apply rate limiting with cancellation awareness
-            if not self._wait_with_cancellation_check():
-                print(f"Request cancelled during rate limiting: {url}")
-                return None
+            print(f"Making {method} request to: {url}")

-            # AGGRESSIVE: Final check before making HTTP request
-            if self._is_stop_requested():
-                print(f"Request cancelled before HTTP call: {url}")
-                return None
-
-            start_time = time.time()
-            response = None
-            error = None
-
-            try:
-                self.total_requests += 1
-
-                # Prepare request
-                request_headers = self.session.headers.copy()
-                if headers:
-                    request_headers.update(headers)
-
-                print(f"Making {method} request to: {url} (attempt {attempt + 1})")
-
-                # AGGRESSIVE: Use much shorter timeout if termination is requested
-                request_timeout = self.timeout
-                if self._is_stop_requested():
-                    request_timeout = 2  # Max 2 seconds if termination requested
-                    print(f"Stop requested - using short timeout: {request_timeout}s")
-
-                # Make request
-                if method.upper() == "GET":
-                    response = self.session.get(
-                        url,
-                        params=params,
-                        headers=request_headers,
-                        timeout=request_timeout
-                    )
-                elif method.upper() == "POST":
-                    response = self.session.post(
-                        url,
-                        json=params,
-                        headers=request_headers,
-                        timeout=request_timeout
-                    )
-                else:
-                    raise ValueError(f"Unsupported HTTP method: {method}")
-
-                print(f"Response status: {response.status_code}")
-                response.raise_for_status()
-                self.successful_requests += 1
-                
-                # Success - log, cache, and return
-                duration_ms = (time.time() - start_time) * 1000
-                self.logger.log_api_request(
-                    provider=self.name,
-                    url=url,
-                    method=method.upper(),
-                    status_code=response.status_code,
-                    response_size=len(response.content),
-                    duration_ms=duration_ms,
-                    error=None,
-                    target_indicator=target_indicator
+            if method.upper() == "GET":
+                response = self.session.get(
+                    url,
+                    params=params,
+                    headers=request_headers,
+                    timeout=self.timeout
                )
-                # Cache the successful response to disk
-                with open(cache_path, 'w') as f:
-                    json.dump({
-                        'status_code': response.status_code,
-                        'content': response.text,
-                        'headers': dict(response.headers)
-                    }, f)
-                return response
+            elif method.upper() == "POST":
+                response = self.session.post(
+                    url,
+                    json=params,
+                    headers=request_headers,
+                    timeout=self.timeout
+                )
+            else:
+                raise ValueError(f"Unsupported HTTP method: {method}")

-            except requests.exceptions.RequestException as e:
-                error = str(e)
-                self.failed_requests += 1
-                print(f"Request failed (attempt {attempt + 1}): {error}")
-                last_exception = e
-                
-                # AGGRESSIVE: Immediately abort retries if stop requested
-                if self._is_stop_requested():
-                    print(f"Stop requested - aborting retries for: {url}")
-                    break
-                
-                # Check if we should retry (but only if stop not requested)
-                if attempt < effective_max_retries and self._should_retry(e):
-                    # Use a longer, more respectful backoff for 429 errors
-                    if isinstance(e, requests.exceptions.HTTPError) and e.response and e.response.status_code == 429:
-                        # Start with a 10-second backoff and increase exponentially
-                        backoff_time = 10 * (2 ** attempt)
-                        print(f"Rate limit hit. Retrying in {backoff_time} seconds...")
-                    else:
-                        backoff_time = min(1.0, (2 ** attempt) * 0.5)  # Shorter backoff for other errors
-                        print(f"Retrying in {backoff_time} seconds...")
-                    
-                    # AGGRESSIVE: Much shorter backoff and more frequent checking
-                    if not self._sleep_with_cancellation_check(backoff_time):
-                        print(f"Stop requested during backoff - aborting: {url}")
-                        return None
-                    continue
-                else:
-                    break
+            print(f"Response status: {response.status_code}")
+            response.raise_for_status()
+            self.successful_requests += 1
+            
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.log_api_request(
+                provider=self.name,
+                url=url,
+                method=method.upper(),
+                status_code=response.status_code,
+                response_size=len(response.content),
+                duration_ms=duration_ms,
+                error=None,
+                target_indicator=target_indicator
+            )
+            
+            return response

-            except Exception as e:
-                error = f"Unexpected error: {str(e)}"
-                self.failed_requests += 1
-                print(f"Unexpected error: {error}")
-                last_exception = e
-                break
-
-        # All attempts failed - log and return None
-        duration_ms = (time.time() - start_time) * 1000
-        self.logger.log_api_request(
-            provider=self.name,
-            url=url,
-            method=method.upper(),
-            status_code=response.status_code if response else None,
-            response_size=len(response.content) if response else None,
-            duration_ms=duration_ms,
-            error=error,
-            target_indicator=target_indicator
-        )
-        
-        if error and last_exception:
-            raise last_exception
-        
-        return None
+        except requests.exceptions.RequestException as e:
+            error = str(e)
+            self.failed_requests += 1
+            duration_ms = (time.time() - start_time) * 1000
+            self.logger.log_api_request(
+                provider=self.name,
+                url=url,
+                method=method.upper(),
+                status_code=response.status_code if response else None,
+                response_size=len(response.content) if response else None,
+                duration_ms=duration_ms,
+                error=error,
+                target_indicator=target_indicator
+            )
+            raise e

    def _is_stop_requested(self) -> bool:
        """
@@ -348,44 +246,6 @@ class BaseProvider(ABC):
            return True
        return False

-
-    def _wait_with_cancellation_check(self) -> bool:
-        """
-        Wait for rate limiting while aggressively checking for cancellation.
-        Returns False if cancelled during wait.
-        """
-        current_time = time.time()
-        time_since_last = current_time - self.rate_limiter.last_request_time
-
-        if time_since_last < self.rate_limiter.min_interval:
-            sleep_time = self.rate_limiter.min_interval - time_since_last
-            if not self._sleep_with_cancellation_check(sleep_time):
-                return False
-
-        self.rate_limiter.last_request_time = time.time()
-        return True
-
-    def _sleep_with_cancellation_check(self, sleep_time: float) -> bool:
-        """
-        Sleep for the specified time while aggressively checking for cancellation.
-        
-        Args:
-            sleep_time: Time to sleep in seconds
-            
-        Returns:
-            bool: True if sleep completed, False if cancelled
-        """
-        sleep_start = time.time()
-        check_interval = 0.05  # Check every 50ms for aggressive responsiveness
-        
-        while time.time() - sleep_start < sleep_time:
-            if self._is_stop_requested():
-                return False
-            remaining_time = sleep_time - (time.time() - sleep_start)
-            time.sleep(min(check_interval, remaining_time))
-        
-        return True
-
    def set_stop_event(self, stop_event: threading.Event) -> None:
        """
        Set the stop event for this provider to enable cancellation.
@@ -395,28 +255,6 @@ class BaseProvider(ABC):
        """
        self._stop_event = stop_event

-    def _should_retry(self, exception: requests.exceptions.RequestException) -> bool:
-        """
-        Determine if a request should be retried based on the exception.
-        
-        Args:
-            exception: The request exception that occurred
-            
-        Returns:
-            True if the request should be retried
-        """
-        # Retry on connection errors and timeouts
-        if isinstance(exception, (requests.exceptions.ConnectionError, 
-                                requests.exceptions.Timeout)):
-            return True
-        
-        if isinstance(exception, requests.exceptions.HTTPError):
-            if hasattr(exception, 'response') and exception.response:
-                # Retry on server errors (5xx) AND on rate-limiting errors (429)
-                return exception.response.status_code >= 500 or exception.response.status_code == 429
-        
-        return False
-
    def log_relationship_discovery(self, source_node: str, target_node: str,
                                 relationship_type: str,
                                 confidence_score: float,
--- a/providers/crtsh_provider.py
+++ b/providers/crtsh_provider.py
@@ -1,44 +1,60 @@
-"""
-Certificate Transparency provider using crt.sh.
-Discovers domain relationships through certificate SAN analysis with comprehensive certificate tracking.
-Stores certificates as metadata on domain nodes rather than creating certificate nodes.
-"""
+# dnsrecon/providers/crtsh_provider.py

 import json
 import re
+import os
+from pathlib import Path
 from typing import List, Dict, Any, Tuple, Set
-from urllib.parse import quote
 from datetime import datetime, timezone
-import requests
+
+# New dependency required for this provider
+try:
+    import psycopg2
+    import psycopg2.extras
+    PSYCOPG2_AVAILABLE = True
+except ImportError:
+    PSYCOPG2_AVAILABLE = False

 from .base_provider import BaseProvider
 from utils.helpers import _is_valid_domain

+# We use requests only to raise the same exception type for compatibility with core retry logic
+import requests
+

 class CrtShProvider(BaseProvider):
    """
-    Provider for querying crt.sh certificate transparency database.
-    Now uses session-specific configuration and caching.
+    Provider for querying crt.sh certificate transparency database via its public PostgreSQL endpoint.
+    This version is designed to be a drop-in, high-performance replacement for the API-based provider.
+    It preserves the same caching and data processing logic.
    """
-    
-    def __init__(self, session_config=None):
-        """Initialize CrtSh provider with session-specific configuration."""
+
+    def __init__(self, name=None, session_config=None):
+        """Initialize CrtShDB provider with session-specific configuration."""
        super().__init__(
            name="crtsh",
-            rate_limit=60,
-            timeout=15,
+            rate_limit=0,  # No rate limit for direct DB access
+            timeout=60,    # Increased timeout for potentially long DB queries
            session_config=session_config
        )
-        self.base_url = "https://crt.sh/"
+        # Database connection details
+        self.db_host = "crt.sh"
+        self.db_port = 5432
+        self.db_name = "certwatch"
+        self.db_user = "guest"
        self._stop_event = None
-    
+
+        # Initialize cache directory (same as original provider)
+        self.cache_dir = Path('cache') / 'crtsh'
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
    def get_name(self) -> str:
        """Return the provider name."""
        return "crtsh"
-    
+
    def get_display_name(self) -> str:
        """Return the provider display name for the UI."""
-        return "crt.sh"
+        return "crt.sh (DB)"

    def requires_api_key(self) -> bool:
        """Return True if the provider requires an API key."""
@@ -50,499 +66,448 @@ class CrtShProvider(BaseProvider):

    def is_available(self) -> bool:
        """
-        Check if the provider is configured to be used.
-        This method is intentionally simple and does not perform a network request
-        to avoid blocking application startup.
+        Check if the provider can be used. Requires the psycopg2 library.
        """
+        if not PSYCOPG2_AVAILABLE:
+            self.logger.logger.warning("psycopg2 library not found. CrtShDBProvider is unavailable. "
+                                      "Please run 'pip install psycopg2-binary'.")
+            return False
        return True
+
+    def _query_crtsh(self, domain: str) -> List[Dict[str, Any]]:
+        """
+        Query the crt.sh PostgreSQL database for raw certificate data.
+        Raises exceptions for DB/network errors to allow core logic to retry.
+        """
+        conn = None
+        certificates = []
+        
+        # SQL Query to find all certificate IDs related to the domain (including subdomains),
+        # then retrieve comprehensive details for each certificate, mimicking the JSON API structure.
+        sql_query = """
+        WITH certificates_of_interest AS (
+            SELECT DISTINCT ci.certificate_id
+            FROM certificate_identity ci
+            WHERE ci.name_value ILIKE %(domain_wildcard)s OR ci.name_value = %(domain)s
+        )
+        SELECT
+            c.id,
+            c.serial_number,
+            c.not_before,
+            c.not_after,
+            (SELECT min(entry_timestamp) FROM ct_log_entry cle WHERE cle.certificate_id = c.id) as entry_timestamp,
+            ca.id as issuer_ca_id,
+            ca.name as issuer_name,
+            (SELECT array_to_string(array_agg(DISTINCT ci.name_value), E'\n') FROM certificate_identity ci WHERE ci.certificate_id = c.id) as name_value,
+            (SELECT name_value FROM certificate_identity ci WHERE ci.certificate_id = c.id AND ci.name_type = 'commonName' LIMIT 1) as common_name
+        FROM
+            certificate c
+        JOIN ca ON c.issuer_ca_id = ca.id
+        WHERE c.id IN (SELECT certificate_id FROM certificates_of_interest);
+        """
+        
+        try:
+            conn = psycopg2.connect(
+                dbname=self.db_name,
+                user=self.db_user,
+                host=self.db_host,
+                port=self.db_port,
+                connect_timeout=self.timeout
+            )
+            
+            with conn.cursor(cursor_factory=psycopg2.extras.RealDictCursor) as cursor:
+                cursor.execute(sql_query, {'domain': domain, 'domain_wildcard': f'%.{domain}'})
+                results = cursor.fetchall()
+                certificates = [dict(row) for row in results]
+                
+            self.logger.logger.info(f"crt.sh DB query for '{domain}' returned {len(certificates)} certificates.")
+
+        except psycopg2.Error as e:
+            self.logger.logger.error(f"PostgreSQL query failed for {domain}: {e}")
+            # Raise a RequestException to be compatible with the existing retry logic in the core application
+            raise requests.exceptions.RequestException(f"PostgreSQL query failed: {e}") from e
+        finally:
+            if conn:
+                conn.close()
+        
+        return certificates
+
+    def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
+        """
+        Query crt.sh for certificates containing the domain with caching support.
+        Properly raises exceptions for network errors to allow core logic retries.
+        """
+        if not _is_valid_domain(domain):
+            return []
+        
+        if self._stop_event and self._stop_event.is_set():
+            return []
+        
+        cache_file = self._get_cache_file_path(domain)
+        cache_status = self._get_cache_status(cache_file)
+        
+        certificates = []
+        
+        try:
+            if cache_status == "fresh":
+                certificates = self._load_cached_certificates(cache_file)
+                self.logger.logger.info(f"Using cached data for {domain} ({len(certificates)} certificates)")
+                
+            elif cache_status == "not_found":
+                # Fresh query from DB, create new cache
+                certificates = self._query_crtsh(domain)
+                if certificates:
+                    self._create_cache_file(cache_file, domain, self._serialize_certs_for_cache(certificates))
+                else:
+                    self.logger.logger.info(f"No certificates found for {domain}, not caching")
+                    
+            elif cache_status == "stale":
+                try:
+                    new_certificates = self._query_crtsh(domain)
+                    if new_certificates:
+                        certificates = self._append_to_cache(cache_file, self._serialize_certs_for_cache(new_certificates))
+                    else:
+                        certificates = self._load_cached_certificates(cache_file)
+                except requests.exceptions.RequestException:
+                    certificates = self._load_cached_certificates(cache_file)
+                    if certificates:
+                        self.logger.logger.warning(f"DB query failed for {domain}, using stale cache data.")
+                    else:
+                        raise
+            
+        except requests.exceptions.RequestException as e:
+            # Re-raise so core logic can retry
+            self.logger.logger.error(f"DB query failed for {domain}: {e}")
+            raise e
+        except json.JSONDecodeError as e:
+            # JSON parsing errors from cache should also be handled
+            self.logger.logger.error(f"Failed to parse JSON from cache for {domain}: {e}")
+            raise e
+        
+        if self._stop_event and self._stop_event.is_set():
+            return []
+        
+        if not certificates:
+            return []
+        
+        return self._process_certificates_to_relationships(domain, certificates)
+        
+    def _serialize_certs_for_cache(self, certificates: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """
+        Serialize certificate data for JSON caching, converting datetime objects to ISO strings.
+        """
+        serialized_certs = []
+        for cert in certificates:
+            serialized_cert = cert.copy()
+            for key in ['not_before', 'not_after', 'entry_timestamp']:
+                if isinstance(serialized_cert.get(key), datetime):
+                    # Ensure datetime is timezone-aware before converting
+                    dt_obj = serialized_cert[key]
+                    if dt_obj.tzinfo is None:
+                        dt_obj = dt_obj.replace(tzinfo=timezone.utc)
+                    serialized_cert[key] = dt_obj.isoformat()
+            serialized_certs.append(serialized_cert)
+        return serialized_certs
+
+    # --- All methods below are copied directly from the original CrtShProvider ---
+    # They are compatible because _query_crtsh returns data in the same format
+    # as the original _query_crtsh_api method. A small adjustment is made to
+    # _parse_certificate_date to handle datetime objects directly from the DB.
+
+    def _get_cache_file_path(self, domain: str) -> Path:
+        """Generate cache file path for a domain."""
+        safe_domain = domain.replace('.', '_').replace('/', '_').replace('\\', '_')
+        return self.cache_dir / f"{safe_domain}.json"
    
-    def _parse_certificate_date(self, date_string: str) -> datetime:
+    def _get_cache_status(self, cache_file_path: Path) -> str:
+        """Check cache status for a domain."""
+        if not cache_file_path.exists():
+            return "not_found"
+        
+        try:
+            with open(cache_file_path, 'r') as f:
+                cache_data = json.load(f)
+            
+            last_query_str = cache_data.get("last_upstream_query")
+            if not last_query_str:
+                return "stale"
+            
+            last_query = datetime.fromisoformat(last_query_str.replace('Z', '+00:00'))
+            hours_since_query = (datetime.now(timezone.utc) - last_query).total_seconds() / 3600
+            
+            cache_timeout = self.config.cache_timeout_hours
+            if hours_since_query < cache_timeout:
+                return "fresh"
+            else:
+                return "stale"
+                
+        except (json.JSONDecodeError, ValueError, KeyError) as e:
+            self.logger.logger.warning(f"Invalid cache file format for {cache_file_path}: {e}")
+            return "stale"
+    
+    def _load_cached_certificates(self, cache_file_path: Path) -> List[Dict[str, Any]]:
+        """Load certificates from cache file."""
+        try:
+            with open(cache_file_path, 'r') as f:
+                cache_data = json.load(f)
+            return cache_data.get('certificates', [])
+        except (json.JSONDecodeError, FileNotFoundError, KeyError) as e:
+            self.logger.logger.error(f"Failed to load cached certificates from {cache_file_path}: {e}")
+            return []
+    
+    def _create_cache_file(self, cache_file_path: Path, domain: str, certificates: List[Dict[str, Any]]) -> None:
+        """Create new cache file with certificates."""
+        try:
+            cache_data = {
+                "domain": domain,
+                "first_cached": datetime.now(timezone.utc).isoformat(),
+                "last_upstream_query": datetime.now(timezone.utc).isoformat(),
+                "upstream_query_count": 1,
+                "certificates": certificates
+            }
+            cache_file_path.parent.mkdir(parents=True, exist_ok=True)
+            with open(cache_file_path, 'w') as f:
+                json.dump(cache_data, f, separators=(',', ':'))
+            self.logger.logger.info(f"Created cache file for {domain} with {len(certificates)} certificates")
+        except Exception as e:
+            self.logger.logger.warning(f"Failed to create cache file for {domain}: {e}")
+    
+    def _append_to_cache(self, cache_file_path: Path, new_certificates: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """Append new certificates to existing cache and return all certificates."""
+        try:
+            with open(cache_file_path, 'r') as f:
+                cache_data = json.load(f)
+            
+            existing_ids = {cert.get('id') for cert in cache_data.get('certificates', [])}
+            added_count = 0
+            for cert in new_certificates:
+                cert_id = cert.get('id')
+                if cert_id and cert_id not in existing_ids:
+                    cache_data['certificates'].append(cert)
+                    existing_ids.add(cert_id)
+                    added_count += 1
+            
+            cache_data['last_upstream_query'] = datetime.now(timezone.utc).isoformat()
+            cache_data['upstream_query_count'] = cache_data.get('upstream_query_count', 0) + 1
+            
+            with open(cache_file_path, 'w') as f:
+                json.dump(cache_data, f, separators=(',', ':'))
+            
+            total_certs = len(cache_data['certificates'])
+            self.logger.logger.info(f"Appended {added_count} new certificates to cache. Total: {total_certs}")
+            return cache_data['certificates']
+        except Exception as e:
+            self.logger.logger.warning(f"Failed to append to cache: {e}")
+            return new_certificates
+    
+    def _parse_issuer_organization(self, issuer_dn: str) -> str:
+        """Parse the issuer Distinguished Name to extract just the organization name."""
+        if not issuer_dn: return issuer_dn
+        try:
+            components = [comp.strip() for comp in issuer_dn.split(',')]
+            for component in components:
+                if component.startswith('O='):
+                    org_name = component[2:].strip()
+                    if org_name.startswith('"') and org_name.endswith('"'):
+                        org_name = org_name[1:-1]
+                    return org_name
+            return issuer_dn
+        except Exception as e:
+            self.logger.logger.debug(f"Failed to parse issuer DN '{issuer_dn}': {e}")
+            return issuer_dn
+    
+    def _parse_certificate_date(self, date_input: Any) -> datetime:
        """
-        Parse certificate date from crt.sh format.
-
-        Args:
-            date_string: Date string from crt.sh API
-
-        Returns:
-            Parsed datetime object in UTC
+        Parse certificate date from various formats (string from cache, datetime from DB).
        """
+        if isinstance(date_input, datetime):
+            # If it's already a datetime object from the DB, just ensure it's UTC
+            if date_input.tzinfo is None:
+                return date_input.replace(tzinfo=timezone.utc)
+            return date_input
+
+        date_string = str(date_input)
        if not date_string:
            raise ValueError("Empty date string")

        try:
-            # Handle various possible formats from crt.sh
-            if date_string.endswith('Z'):
-                return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc)
-            elif '+' in date_string or date_string.endswith('UTC'):
-                # Handle timezone-aware strings
-                date_string = date_string.replace('UTC', '').strip()
-                if '+' in date_string:
-                    date_string = date_string.split('+')[0]
-                return datetime.fromisoformat(date_string).replace(tzinfo=timezone.utc)
-            else:
-                # Assume UTC if no timezone specified
-                return datetime.fromisoformat(date_string).replace(tzinfo=timezone.utc)
-        except Exception as e:
-            # Fallback: try parsing without timezone info and assume UTC
+            if 'Z' in date_string:
+                return datetime.fromisoformat(date_string.replace('Z', '+00:00'))
+            # Handle standard ISO format with or without timezone
+            dt = datetime.fromisoformat(date_string)
+            if dt.tzinfo is None:
+                return dt.replace(tzinfo=timezone.utc)
+            return dt
+        except ValueError as e:
            try:
+                # Fallback for other formats
                return datetime.strptime(date_string[:19], "%Y-%m-%dT%H:%M:%S").replace(tzinfo=timezone.utc)
            except Exception:
                raise ValueError(f"Unable to parse date: {date_string}") from e

    def _is_cert_valid(self, cert_data: Dict[str, Any]) -> bool:
-        """
-        Check if a certificate is currently valid based on its expiry date.
-
-        Args:
-            cert_data: Certificate data from crt.sh
-
-        Returns:
-            True if certificate is currently valid (not expired)
-        """
+        """Check if a certificate is currently valid based on its expiry date."""
        try:
            not_after_str = cert_data.get('not_after')
-            if not not_after_str:
-                return False
+            if not not_after_str: return False

            not_after_date = self._parse_certificate_date(not_after_str)
            not_before_str = cert_data.get('not_before')
-
            now = datetime.now(timezone.utc)
-
-            # Check if certificate is within valid date range
            is_not_expired = not_after_date > now

            if not_before_str:
                not_before_date = self._parse_certificate_date(not_before_str)
                is_not_before_valid = not_before_date <= now
                return is_not_expired and is_not_before_valid
-
            return is_not_expired
-
        except Exception as e:
            self.logger.logger.debug(f"Certificate validity check failed: {e}")
            return False

    def _extract_certificate_metadata(self, cert_data: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Extract comprehensive metadata from certificate data.
-        
-        Args:
-            cert_data: Raw certificate data from crt.sh
-            
-        Returns:
-            Comprehensive certificate metadata dictionary
-        """
+        # This method works as-is.
+        raw_issuer_name = cert_data.get('issuer_name', '')
+        parsed_issuer_name = self._parse_issuer_organization(raw_issuer_name)
        metadata = {
            'certificate_id': cert_data.get('id'),
            'serial_number': cert_data.get('serial_number'),
-            'issuer_name': cert_data.get('issuer_name'),
+            'issuer_name': parsed_issuer_name,
            'issuer_ca_id': cert_data.get('issuer_ca_id'),
            'common_name': cert_data.get('common_name'),
            'not_before': cert_data.get('not_before'),
            'not_after': cert_data.get('not_after'),
            'entry_timestamp': cert_data.get('entry_timestamp'),
-            'source': 'crt.sh'
+            'source': 'crt.sh (DB)'
        }
-        
        try:
            if metadata['not_before'] and metadata['not_after']:
                not_before = self._parse_certificate_date(metadata['not_before'])
                not_after = self._parse_certificate_date(metadata['not_after'])
-                
                metadata['validity_period_days'] = (not_after - not_before).days
                metadata['is_currently_valid'] = self._is_cert_valid(cert_data)
                metadata['expires_soon'] = (not_after - datetime.now(timezone.utc)).days <= 30
-                
-                # Add human-readable dates
                metadata['not_before'] = not_before.strftime('%Y-%m-%d %H:%M:%S UTC')
                metadata['not_after'] = not_after.strftime('%Y-%m-%d %H:%M:%S UTC')
-                
        except Exception as e:
            self.logger.logger.debug(f"Error computing certificate metadata: {e}")
            metadata['is_currently_valid'] = False
            metadata['expires_soon'] = False
-        
        return metadata

-    def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
-        """
-        Query crt.sh for certificates containing the domain.
-        """
-        if not _is_valid_domain(domain):
-            return []
-        
-        # Check for cancellation before starting
-        if self._stop_event and self._stop_event.is_set():
-            print(f"CrtSh query cancelled before start for domain: {domain}")
-            return []
-        
+    def _process_certificates_to_relationships(self, domain: str, certificates: List[Dict[str, Any]]) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
+        # This method works as-is.
        relationships = []
-        
-        try:
-            # Query crt.sh for certificates
-            url = f"{self.base_url}?q={quote(domain)}&output=json"
-            response = self.make_request(url, target_indicator=domain, max_retries=3)
-            
-            if not response or response.status_code != 200:
-                return []
-            
-            # Check for cancellation after request
-            if self._stop_event and self._stop_event.is_set():
-                print(f"CrtSh query cancelled after request for domain: {domain}")
-                return []
-
-            certificates = response.json()
-            
-            if not certificates:
-                return []
-            
-            # Check for cancellation before processing
-            if self._stop_event and self._stop_event.is_set():
-                print(f"CrtSh query cancelled before processing for domain: {domain}")
-                return []
-
-            # Aggregate certificate data by domain
-            domain_certificates = {}
-            all_discovered_domains = set()
-            
-            # Process certificates with cancellation checking
-            for i, cert_data in enumerate(certificates):
-                # Check for cancellation every 5 certificates instead of 10 for faster response
-                if i % 5 == 0 and self._stop_event and self._stop_event.is_set():
-                    print(f"CrtSh processing cancelled at certificate {i} for domain: {domain}")
-                    break
-                    
-                cert_metadata = self._extract_certificate_metadata(cert_data)
-                cert_domains = self._extract_domains_from_certificate(cert_data)
-                
-                # Add all domains from this certificate to our tracking
-                for cert_domain in cert_domains:
-                    # Additional stop check during domain processing
-                    if i % 20 == 0 and self._stop_event and self._stop_event.is_set():
-                        print(f"CrtSh domain processing cancelled for domain: {domain}")
-                        break
-                        
-                    if not _is_valid_domain(cert_domain):
-                        continue
-                    
-                    all_discovered_domains.add(cert_domain)
-                    
-                    # Initialize domain certificate list if needed
-                    if cert_domain not in domain_certificates:
-                        domain_certificates[cert_domain] = []
-                    
-                    # Add this certificate to the domain's certificate list
-                    domain_certificates[cert_domain].append(cert_metadata)
-            
-            # Final cancellation check before creating relationships
-            if self._stop_event and self._stop_event.is_set():
-                print(f"CrtSh query cancelled before relationship creation for domain: {domain}")
-                return []
-
-            # Create relationships from query domain to ALL discovered domains with stop checking
-            for i, discovered_domain in enumerate(all_discovered_domains):
-                if discovered_domain == domain:
-                    continue  # Skip self-relationships
-                
-                # Check for cancellation every 10 relationships
-                if i % 10 == 0 and self._stop_event and self._stop_event.is_set():
-                    print(f"CrtSh relationship creation cancelled for domain: {domain}")
-                    break
-
-                if not _is_valid_domain(discovered_domain):
-                    continue
-                
-                # Get certificates for both domains
-                query_domain_certs = domain_certificates.get(domain, [])
-                discovered_domain_certs = domain_certificates.get(discovered_domain, [])
-                
-                # Find shared certificates (for metadata purposes)
-                shared_certificates = self._find_shared_certificates(query_domain_certs, discovered_domain_certs)
-                
-                # Calculate confidence based on relationship type and shared certificates
-                confidence = self._calculate_domain_relationship_confidence(
-                    domain, discovered_domain, shared_certificates, all_discovered_domains
-                )
-                
-                # Create comprehensive raw data for the relationship
-                relationship_raw_data = {
-                    'relationship_type': 'certificate_discovery',
-                    'shared_certificates': shared_certificates,
-                    'total_shared_certs': len(shared_certificates),
-                    'discovery_context': self._determine_relationship_context(discovered_domain, domain),
-                    'domain_certificates': {
-                        domain: self._summarize_certificates(query_domain_certs),
-                        discovered_domain: self._summarize_certificates(discovered_domain_certs)
-                    }
+        if self._stop_event and self._stop_event.is_set(): return []
+        domain_certificates = {}
+        all_discovered_domains = set()
+        for i, cert_data in enumerate(certificates):
+            if i % 5 == 0 and self._stop_event and self._stop_event.is_set(): break
+            cert_metadata = self._extract_certificate_metadata(cert_data)
+            cert_domains = self._extract_domains_from_certificate(cert_data)
+            all_discovered_domains.update(cert_domains)
+            for cert_domain in cert_domains:
+                if not _is_valid_domain(cert_domain): continue
+                if cert_domain not in domain_certificates:
+                    domain_certificates[cert_domain] = []
+                domain_certificates[cert_domain].append(cert_metadata)
+        if self._stop_event and self._stop_event.is_set(): return []
+        for i, discovered_domain in enumerate(all_discovered_domains):
+            if discovered_domain == domain: continue
+            if i % 10 == 0 and self._stop_event and self._stop_event.is_set(): break
+            if not _is_valid_domain(discovered_domain): continue
+            query_domain_certs = domain_certificates.get(domain, [])
+            discovered_domain_certs = domain_certificates.get(discovered_domain, [])
+            shared_certificates = self._find_shared_certificates(query_domain_certs, discovered_domain_certs)
+            confidence = self._calculate_domain_relationship_confidence(
+                domain, discovered_domain, shared_certificates, all_discovered_domains
+            )
+            relationship_raw_data = {
+                'relationship_type': 'certificate_discovery',
+                'shared_certificates': shared_certificates,
+                'total_shared_certs': len(shared_certificates),
+                'discovery_context': self._determine_relationship_context(discovered_domain, domain),
+                'domain_certificates': {
+                    domain: self._summarize_certificates(query_domain_certs),
+                    discovered_domain: self._summarize_certificates(discovered_domain_certs)
                }
-                
-                # Create domain -> domain relationship
-                relationships.append((
-                    domain,
-                    discovered_domain,
-                    'san_certificate',
-                    confidence,
-                    relationship_raw_data
-                ))
-                
-                # Log the relationship discovery
-                self.log_relationship_discovery(
-                    source_node=domain,
-                    target_node=discovered_domain,
-                    relationship_type='san_certificate',
-                    confidence_score=confidence,
-                    raw_data=relationship_raw_data,
-                    discovery_method="certificate_transparency_analysis"
-                )
-
-        except json.JSONDecodeError as e:
-            self.logger.logger.error(f"Failed to parse JSON response from crt.sh: {e}")
-        except requests.exceptions.RequestException as e:
-            self.logger.logger.error(f"HTTP request to crt.sh failed: {e}")
-
-        
+            }
+            relationships.append((
+                domain, discovered_domain, 'san_certificate', confidence, relationship_raw_data
+            ))
+            self.log_relationship_discovery(
+                source_node=domain, target_node=discovered_domain, relationship_type='san_certificate',
+                confidence_score=confidence, raw_data=relationship_raw_data,
+                discovery_method="certificate_transparency_analysis"
+            )
        return relationships
+    
+    # --- All remaining helper methods are identical to the original and fully compatible ---
+    # They are included here for completeness.

    def _find_shared_certificates(self, certs1: List[Dict[str, Any]], certs2: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """
-        Find certificates that are shared between two domain certificate lists.
-        
-        Args:
-            certs1: First domain's certificates
-            certs2: Second domain's certificates
-            
-        Returns:
-            List of shared certificate metadata
-        """
-        shared = []
-        
-        # Create a set of certificate IDs from the first list for quick lookup
        cert1_ids = {cert.get('certificate_id') for cert in certs1 if cert.get('certificate_id')}
-        
-        # Find certificates in the second list that match
-        for cert in certs2:
-            if cert.get('certificate_id') in cert1_ids:
-                shared.append(cert)
-        
-        return shared
+        return [cert for cert in certs2 if cert.get('certificate_id') in cert1_ids]

    def _summarize_certificates(self, certificates: List[Dict[str, Any]]) -> Dict[str, Any]:
-        """
-        Create a summary of certificates for a domain.
-        
-        Args:
-            certificates: List of certificate metadata
-            
-        Returns:
-            Summary dictionary with aggregate statistics
-        """
-        if not certificates:
-            return {
-                'total_certificates': 0,
-                'valid_certificates': 0,
-                'expired_certificates': 0,
-                'expires_soon_count': 0,
-                'unique_issuers': [],
-                'latest_certificate': None,
-                'has_valid_cert': False
-            }
-        
+        if not certificates: return {'total_certificates': 0, 'valid_certificates': 0, 'expired_certificates': 0, 'expires_soon_count': 0, 'unique_issuers': [], 'latest_certificate': None, 'has_valid_cert': False}
        valid_count = sum(1 for cert in certificates if cert.get('is_currently_valid'))
-        expired_count = len(certificates) - valid_count
        expires_soon_count = sum(1 for cert in certificates if cert.get('expires_soon'))
-        
-        # Get unique issuers
        unique_issuers = list(set(cert.get('issuer_name') for cert in certificates if cert.get('issuer_name')))
-        
-        # Find the most recent certificate
-        latest_cert = None
-        latest_date = None
-        
+        latest_cert, latest_date = None, None
        for cert in certificates:
            try:
                if cert.get('not_before'):
                    cert_date = self._parse_certificate_date(cert['not_before'])
                    if latest_date is None or cert_date > latest_date:
-                        latest_date = cert_date
-                        latest_cert = cert
-            except Exception:
-                continue
-        
-        return {
-            'total_certificates': len(certificates),
-            'valid_certificates': valid_count,
-            'expired_certificates': expired_count,
-            'expires_soon_count': expires_soon_count,
-            'unique_issuers': unique_issuers,
-            'latest_certificate': latest_cert,
-            'has_valid_cert': valid_count > 0,
-            'certificate_details': certificates  # Full details for forensic analysis
-        }
+                        latest_date, latest_cert = cert_date, cert
+            except Exception: continue
+        return {'total_certificates': len(certificates), 'valid_certificates': valid_count, 'expired_certificates': len(certificates) - valid_count, 'expires_soon_count': expires_soon_count, 'unique_issuers': unique_issuers, 'latest_certificate': latest_cert, 'has_valid_cert': valid_count > 0, 'certificate_details': certificates}

-    def _calculate_domain_relationship_confidence(self, domain1: str, domain2: str, 
-                                                shared_certificates: List[Dict[str, Any]],
-                                                all_discovered_domains: Set[str]) -> float:
-        """
-        Calculate confidence score for domain relationship based on various factors.
-        
-        Args:
-            domain1: Source domain (query domain)
-            domain2: Target domain (discovered domain)
-            shared_certificates: List of shared certificate metadata
-            all_discovered_domains: All domains discovered in this query
-            
-        Returns:
-            Confidence score between 0.0 and 1.0
-        """
-        base_confidence = 0.9
-        
-        # Adjust confidence based on domain relationship context
+    def _calculate_domain_relationship_confidence(self, domain1: str, domain2: str, shared_certificates: List[Dict[str, Any]], all_discovered_domains: Set[str]) -> float:
+        base_confidence, context_bonus, shared_bonus, validity_bonus, issuer_bonus = 0.9, 0.0, 0.0, 0.0, 0.0
        relationship_context = self._determine_relationship_context(domain2, domain1)
-        
-        if relationship_context == 'exact_match':
-            context_bonus = 0.0  # This shouldn't happen, but just in case
-        elif relationship_context == 'subdomain':
-            context_bonus = 0.1  # High confidence for subdomains
-        elif relationship_context == 'parent_domain':
-            context_bonus = 0.05  # Medium confidence for parent domains
-        else:
-            context_bonus = 0.0  # Related domains get base confidence
-        
-        # Adjust confidence based on shared certificates
-        if shared_certificates:
-            shared_count = len(shared_certificates)
-            if shared_count >= 3:
-                shared_bonus = 0.1
-            elif shared_count >= 2:
-                shared_bonus = 0.05
-            else:
-                shared_bonus = 0.02
-            
-            # Additional bonus for valid shared certificates
-            valid_shared = sum(1 for cert in shared_certificates if cert.get('is_currently_valid'))
-            if valid_shared > 0:
-                validity_bonus = 0.05
-            else:
-                validity_bonus = 0.0
-        else:
-            # Even without shared certificates, domains found in the same query have some relationship
-            shared_bonus = 0.0
-            validity_bonus = 0.0
-        
-        # Adjust confidence based on certificate issuer reputation (if shared certificates exist)
-        issuer_bonus = 0.0
+        if relationship_context == 'subdomain': context_bonus = 0.1
+        elif relationship_context == 'parent_domain': context_bonus = 0.05
        if shared_certificates:
+            if len(shared_certificates) >= 3: shared_bonus = 0.1
+            elif len(shared_certificates) >= 2: shared_bonus = 0.05
+            else: shared_bonus = 0.02
+            if any(cert.get('is_currently_valid') for cert in shared_certificates): validity_bonus = 0.05
            for cert in shared_certificates:
-                issuer = cert.get('issuer_name', '').lower()
-                if any(trusted_ca in issuer for trusted_ca in ['let\'s encrypt', 'digicert', 'sectigo', 'globalsign']):
+                if any(ca in cert.get('issuer_name', '').lower() for ca in ['let\'s encrypt', 'digicert', 'sectigo', 'globalsign']):
                    issuer_bonus = max(issuer_bonus, 0.03)
                    break
-        
-        # Calculate final confidence
-        final_confidence = base_confidence + context_bonus + shared_bonus + validity_bonus + issuer_bonus
-        return max(0.1, min(1.0, final_confidence))  # Clamp between 0.1 and 1.0
+        return max(0.1, min(1.0, base_confidence + context_bonus + shared_bonus + validity_bonus + issuer_bonus))

    def _determine_relationship_context(self, cert_domain: str, query_domain: str) -> str:
-        """
-        Determine the context of the relationship between certificate domain and query domain.
-        
-        Args:
-            cert_domain: Domain found in certificate
-            query_domain: Original query domain
-            
-        Returns:
-            String describing the relationship context
-        """
-        if cert_domain == query_domain:
-            return 'exact_match'
-        elif cert_domain.endswith(f'.{query_domain}'):
-            return 'subdomain'
-        elif query_domain.endswith(f'.{cert_domain}'):
-            return 'parent_domain'
-        else:
-            return 'related_domain'
+        if cert_domain == query_domain: return 'exact_match'
+        if cert_domain.endswith(f'.{query_domain}'): return 'subdomain'
+        if query_domain.endswith(f'.{cert_domain}'): return 'parent_domain'
+        return 'related_domain'
    
    def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
-        """
-        Query crt.sh for certificates containing the IP address.
-        Note: crt.sh doesn't typically index by IP, so this returns empty results.
-        
-        Args:
-            ip: IP address to investigate
-            
-        Returns:
-            Empty list (crt.sh doesn't support IP-based certificate queries effectively)
-        """
-        # crt.sh doesn't effectively support IP-based certificate queries
        return []
    
    def _extract_domains_from_certificate(self, cert_data: Dict[str, Any]) -> Set[str]:
-        """
-        Extract all domains from certificate data.
-        
-        Args:
-            cert_data: Certificate data from crt.sh API
-            
-        Returns:
-            Set of unique domain names found in the certificate
-        """
        domains = set()
-        
-        # Extract from common name
-        common_name = cert_data.get('common_name', '')
-        if common_name:
-            cleaned_cn = self._clean_domain_name(common_name)
-            if cleaned_cn:
-                domains.update(cleaned_cn)
-        
-        # Extract from name_value field (contains SANs)
-        name_value = cert_data.get('name_value', '')
-        if name_value:
-            # Split by newlines and clean each domain
-            for line in name_value.split('\n'):
-                cleaned_domains = self._clean_domain_name(line.strip())
-                if cleaned_domains:
-                    domains.update(cleaned_domains)
-        
+        if cn := cert_data.get('common_name'):
+            if cleaned := self._clean_domain_name(cn):
+                domains.update(cleaned)
+        if nv := cert_data.get('name_value'):
+            for line in nv.split('\n'):
+                if cleaned := self._clean_domain_name(line.strip()):
+                    domains.update(cleaned)
        return domains
    
    def _clean_domain_name(self, domain_name: str) -> List[str]:
-        """
-        Clean and normalize domain name from certificate data.
-        Now returns a list to handle wildcards correctly.
-        """
-        if not domain_name:
-            return []
-
-        domain = domain_name.strip().lower()
-
-        # Remove protocol if present
-        if domain.startswith(('http://', 'https://')):
-            domain = domain.split('://', 1)[1]
-
-        # Remove path if present
-        if '/' in domain:
-            domain = domain.split('/', 1)[0]
-
-        # Remove port if present
-        if ':' in domain and not domain.count(':') > 1:  # Avoid breaking IPv6
-            domain = domain.split(':', 1)[0]
-
-        # Handle wildcard domains
-        cleaned_domains = []
-        if domain.startswith('*.'):
-            # Add both the wildcard and the base domain
-            cleaned_domains.append(domain)
-            cleaned_domains.append(domain[2:])
-        else:
-            cleaned_domains.append(domain)
-
-        # Remove any remaining invalid characters and validate
+        if not domain_name: return []
+        domain = domain_name.strip().lower().split('://', 1)[-1].split('/', 1)[0]
+        if ':' in domain and not domain.count(':') > 1: domain = domain.split(':', 1)[0]
+        cleaned_domains = [domain, domain[2:]] if domain.startswith('*.') else [domain]
        final_domains = []
        for d in cleaned_domains:
            d = re.sub(r'[^\w\-\.]', '', d)
            if d and not d.startswith(('.', '-')) and not d.endswith(('.', '-')):
                final_domains.append(d)
-
        return [d for d in final_domains if _is_valid_domain(d)]
--- a/providers/dns_provider.py
+++ b/providers/dns_provider.py
@@ -1,7 +1,6 @@
 # dnsrecon/providers/dns_provider.py

-import dns.resolver
-import dns.reversename
+from dns import resolver, reversename
 from typing import List, Dict, Any, Tuple
 from .base_provider import BaseProvider
 from utils.helpers import _is_valid_ip, _is_valid_domain
@@ -13,7 +12,7 @@ class DNSProvider(BaseProvider):
    Now uses session-specific configuration.
    """

-    def __init__(self, session_config=None):
+    def __init__(self, name=None, session_config=None):
        """Initialize DNS provider with session-specific configuration."""
        super().__init__(
            name="dns",
@@ -23,7 +22,7 @@ class DNSProvider(BaseProvider):
        )

        # Configure DNS resolver
-        self.resolver = dns.resolver.Resolver()
+        self.resolver = resolver.Resolver()
        self.resolver.timeout = 5
        self.resolver.lifetime = 10
        #self.resolver.nameservers = ['127.0.0.1']
@@ -51,12 +50,7 @@ class DNSProvider(BaseProvider):
    def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
        """
        Query DNS records for the domain to discover relationships.
-
-        Args:
-            domain: Domain to investigate
-
-        Returns:
-            List of relationships discovered from DNS analysis
+        ...
        """
        if not _is_valid_domain(domain):
            return []
@@ -65,7 +59,15 @@ class DNSProvider(BaseProvider):

        # Query all record types
        for record_type in ['A', 'AAAA', 'CNAME', 'MX', 'NS', 'SOA', 'TXT', 'SRV', 'CAA']:
-            relationships.extend(self._query_record(domain, record_type))
+            try:
+                relationships.extend(self._query_record(domain, record_type))
+            except resolver.NoAnswer:
+                # This is not an error, just a confirmation that the record doesn't exist.
+                self.logger.logger.debug(f"No {record_type} record found for {domain}")
+            except Exception as e:
+                self.failed_requests += 1
+                self.logger.logger.debug(f"{record_type} record query failed for {domain}: {e}")
+                # Optionally, you might want to re-raise other, more serious exceptions.

        return relationships

@@ -87,7 +89,7 @@ class DNSProvider(BaseProvider):
        try:
            # Perform reverse DNS lookup
            self.total_requests += 1
-            reverse_name = dns.reversename.from_address(ip)
+            reverse_name = reversename.from_address(ip)
            response = self.resolver.resolve(reverse_name, 'PTR')
            self.successful_requests += 1

@@ -119,9 +121,14 @@ class DNSProvider(BaseProvider):
                        discovery_method="reverse_dns_lookup"
                    )

+        except resolver.NXDOMAIN:
+            self.failed_requests += 1
+            self.logger.logger.debug(f"Reverse DNS lookup failed for {ip}: NXDOMAIN")
        except Exception as e:
            self.failed_requests += 1
            self.logger.logger.debug(f"Reverse DNS lookup failed for {ip}: {e}")
+            # Re-raise the exception so the scanner can handle the failure
+            raise e

        return relationships

@@ -185,5 +192,7 @@ class DNSProvider(BaseProvider):
        except Exception as e:
            self.failed_requests += 1
            self.logger.logger.debug(f"{record_type} record query failed for {domain}: {e}")
+            # Re-raise the exception so the scanner can handle it
+            raise e

        return relationships
--- a/providers/shodan_provider.py
+++ b/providers/shodan_provider.py
@@ -1,7 +1,4 @@
-"""
-Shodan provider for DNSRecon.
-Discovers IP relationships and infrastructure context through Shodan API.
-"""
+# dnsrecon/providers/shodan_provider.py

 import json
 from typing import List, Dict, Any, Tuple
@@ -15,7 +12,7 @@ class ShodanProvider(BaseProvider):
    Now uses session-specific API keys.
    """

-    def __init__(self, session_config=None):
+    def __init__(self, name=None, session_config=None):
        """Initialize Shodan provider with session-specific configuration."""
        super().__init__(
            name="shodan",
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,6 @@ Werkzeug>=2.3.7
 urllib3>=2.0.0
 dnspython>=2.4.2
 gunicorn
-redis
+redis
+python-dotenv
+psycopg2-binary
--- a/static/css/main.css
+++ b/static/css/main.css
@@ -272,8 +272,24 @@ input[type="text"]:focus, select:focus {
    text-shadow: 0 0 3px rgba(0, 255, 65, 0.3);
 }

+.progress-container {
+    padding: 0 1.5rem 1.5rem;
+}
+
+.progress-info {
+    display: flex;
+    justify-content: space-between;
+    font-size: 0.8rem;
+    color: #999;
+    margin-bottom: 0.5rem;
+}
+
+#progress-compact {
+    color: #00ff41;
+    font-weight: 500;
+}
+
 .progress-bar {
-    margin: 1rem 1.5rem;
    height: 8px;
    background-color: #1a1a1a;
    border: 1px solid #444;
@@ -517,7 +533,7 @@ input[type="text"]:focus, select:focus {
    color: #e0e0e0;
 }

-.provider-stats {
+.provider-stats, .provider-task-stats {
    font-size: 0.8rem;
    color: #999;
    display: grid;
@@ -526,6 +542,13 @@ input[type="text"]:focus, select:focus {
    margin-top: 0.5rem;
 }

+.provider-task-stats {
+    border-top: 1px solid #333;
+    padding-top: 0.5rem;
+    margin-top: 0.5rem;
+}
+
+
 .provider-stat {
    display: flex;
    justify-content: space-between;
--- a/static/js/graph.js
+++ b/static/js/graph.js
@@ -216,12 +216,8 @@ class GraphManager {
            }
        });

-        // FIX: Comment out the problematic context menu handler
        this.network.on('oncontext', (params) => {
            params.event.preventDefault();
-            // if (params.nodes.length > 0) {
-            //     this.showNodeContextMenu(params.pointer.DOM, params.nodes[0]);
-            // }
        });

        // Stabilization events with progress
@@ -380,7 +376,7 @@ class GraphManager {
                // Single correlation value
                const value = Array.isArray(values) && values.length > 0 ? values[0] : (metadata.value || 'Unknown');
                const displayValue = typeof value === 'string' && value.length > 20 ? value.substring(0, 17) + '...' : value;
-                processedNode.label = `Corr: ${displayValue}`;
+                processedNode.label = `${displayValue}`;
                processedNode.title = `Correlation: ${value}`;
            }
        }
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -1,7 +1,6 @@
 /**
 * Main application logic for DNSRecon web interface
 * Handles UI interactions, API communication, and data flow
- * DEBUG VERSION WITH EXTRA LOGGING
 */

 class DNSReconApp {
@@ -61,9 +60,8 @@ class DNSReconApp {
            scanStatus: document.getElementById('scan-status'),
            targetDisplay: document.getElementById('target-display'),
            depthDisplay: document.getElementById('depth-display'),
-            progressDisplay: document.getElementById('progress-display'),
-            indicatorsDisplay: document.getElementById('indicators-display'),
            relationshipsDisplay: document.getElementById('relationships-display'),
+            progressCompact: document.getElementById('progress-compact'),
            progressFill: document.getElementById('progress-fill'),
            
            // Provider elements
@@ -447,7 +445,7 @@ class DNSReconApp {
                // Handle status changes
                if (status.status !== this.scanStatus) {
                    console.log(`*** STATUS CHANGED: ${this.scanStatus} -> ${status.status} ***`);
-                    this.handleStatusChange(status.status);
+                    this.handleStatusChange(status.status, status.task_queue_size);
                }
                
                this.scanStatus = status.status;
@@ -542,17 +540,19 @@ class DNSReconApp {
            if (this.elements.depthDisplay) {
                this.elements.depthDisplay.textContent = `${status.current_depth}/${status.max_depth}`;
            }
-            if (this.elements.progressDisplay) {
-                this.elements.progressDisplay.textContent = `${status.progress_percentage.toFixed(1)}%`;
-            }
-            if (this.elements.indicatorsDisplay) {
-                this.elements.indicatorsDisplay.textContent = status.indicators_processed || 0;
-            }
            
-            // Update progress bar with smooth animation
+            // Update progress bar and compact display
            if (this.elements.progressFill) {
-                this.elements.progressFill.style.width = `${status.progress_percentage}%`;
-                
+                const completed = status.indicators_completed || 0;
+                const enqueued = status.task_queue_size || 0;
+                const totalTasks = completed + enqueued;
+                const progressPercentage = totalTasks > 0 ? (completed / totalTasks) * 100 : 0;
+
+                this.elements.progressFill.style.width = `${progressPercentage}%`;
+                if (this.elements.progressCompact) {
+                    this.elements.progressCompact.textContent = `${completed}/${totalTasks} - ${Math.round(progressPercentage)}%`;
+                }
+
                // Add pulsing animation for active scans
                if (status.status === 'running') {
                    this.elements.progressFill.parentElement.classList.add('scanning');
@@ -574,6 +574,8 @@ class DNSReconApp {
                    this.elements.sessionId.textContent = 'Session: Loading...';
                }
            }
+
+            this.setUIState(status.status, status.task_queue_size);
            
            console.log('Status display updated successfully');
        } catch (error) {
@@ -585,12 +587,12 @@ class DNSReconApp {
     * Handle status changes with improved state synchronization
     * @param {string} newStatus - New scan status
     */
-    handleStatusChange(newStatus) {
+    handleStatusChange(newStatus, task_queue_size) {
        console.log(`=== STATUS CHANGE: ${this.scanStatus} -> ${newStatus} ===`);
        
        switch (newStatus) {
            case 'running':
-                this.setUIState('scanning');
+                this.setUIState('scanning', task_queue_size);
                this.showSuccess('Scan is running');
                // Increase polling frequency for active scans
                this.startPolling(1000); // Poll every 1 second for running scans
@@ -598,7 +600,7 @@ class DNSReconApp {
                break;
                
            case 'completed':
-                this.setUIState('completed');
+                this.setUIState('completed', task_queue_size);
                this.stopPolling();
                this.showSuccess('Scan completed successfully');
                this.updateConnectionStatus('completed');
@@ -609,7 +611,7 @@ class DNSReconApp {
                break;
                
            case 'failed':
-                this.setUIState('failed');
+                this.setUIState('failed', task_queue_size);
                this.stopPolling();
                this.showError('Scan failed');
                this.updateConnectionStatus('error');
@@ -617,7 +619,7 @@ class DNSReconApp {
                break;
                
            case 'stopped':
-                this.setUIState('stopped');
+                this.setUIState('stopped', task_queue_size);
                this.stopPolling();
                this.showSuccess('Scan stopped');
                this.updateConnectionStatus('stopped');
@@ -625,7 +627,7 @@ class DNSReconApp {
                break;
                
            case 'idle':
-                this.setUIState('idle');
+                this.setUIState('idle', task_queue_size);
                this.stopPolling();
                this.updateConnectionStatus('idle');
                break;
@@ -670,9 +672,11 @@ class DNSReconApp {
    /**
     * UI state management with immediate button updates
     */
-    setUIState(state) {
+    setUIState(state, task_queue_size) {
        console.log(`Setting UI state to: ${state}`);
        
+        const isQueueEmpty = task_queue_size === 0;
+
        switch (state) {
            case 'scanning':
                this.isScanning = true;
@@ -701,12 +705,12 @@ class DNSReconApp {
            case 'stopped':
                this.isScanning = false;
                if (this.elements.startScan) {
-                    this.elements.startScan.disabled = false;
+                    this.elements.startScan.disabled = !isQueueEmpty;
                    this.elements.startScan.classList.remove('loading');
                    this.elements.startScan.innerHTML = '<span class="btn-icon">[RUN]</span><span>Start Reconnaissance</span>';
                }
                if (this.elements.addToGraph) {
-                    this.elements.addToGraph.disabled = false;
+                    this.elements.addToGraph.disabled = !isQueueEmpty;
                    this.elements.addToGraph.classList.remove('loading');
                }
                if (this.elements.stopScan) {
--- a/templates/index.html
+++ b/templates/index.html
@@ -90,22 +90,20 @@
                        <span class="status-label">Depth:</span>
                        <span id="depth-display" class="status-value">0/0</span>
                    </div>
-                    <div class="status-row">
-                        <span class="status-label">Progress:</span>
-                        <span id="progress-display" class="status-value">0%</span>
-                    </div>
-                    <div class="status-row">
-                        <span class="status-label">Indicators:</span>
-                        <span id="indicators-display" class="status-value">0</span>
-                    </div>
                    <div class="status-row">
                        <span class="status-label">Relationships:</span>
                        <span id="relationships-display" class="status-value">0</span>
                    </div>
                </div>
                
-                <div class="progress-bar">
-                    <div id="progress-fill" class="progress-fill"></div>
+                <div class="progress-container">
+                    <div class="progress-info">
+                        <span id="progress-label">Progress:</span>
+                        <span id="progress-compact">0/0</span>
+                    </div>
+                    <div class="progress-bar">
+                        <div id="progress-fill" class="progress-fill"></div>
+                    </div>
                </div>
            </section>
Author	SHA1	Message	Date
overcuriousity	9f3b17e658	try_db	2025-09-14 22:54:37 +02:00
overcuriousity	eb9eea127b	it	2025-09-14 22:37:23 +02:00
overcuriousity	ae07635ab6	update edge labels	2025-09-14 20:50:09 +02:00
overcuriousity	d7adf9ad8b	it	2025-09-14 20:22:09 +02:00
overcuriousity	39ce0e9d11	great progress	2025-09-14 19:12:12 +02:00
overcuriousity	926f9e1096	fixes	2025-09-14 19:06:20 +02:00
overcuriousity	9499e62ccc	it	2025-09-14 18:45:02 +02:00
overcuriousity	89ae06482e	it	2025-09-14 18:02:15 +02:00
overcuriousity	7fe7ca41ba	it	2025-09-14 17:40:18 +02:00
overcuriousity	949fbdbb45	itteration	2025-09-14 17:18:56 +02:00
overcuriousity	689e8c00d4	unify config	2025-09-14 16:17:26 +02:00
overcuriousity	3511f18f9a	it	2025-09-14 16:07:58 +02:00
overcuriousity	72f7056bc7	it	2025-09-14 15:31:18 +02:00
overcuriousity	2ae33bc5ba	it	2025-09-14 15:00:00 +02:00
overcuriousity	c91913fa13	it	2025-09-14 14:28:04 +02:00