iteration on ws implementation

try to implement websockets
Merge pull request 'remove-large-entity-temporarily' (#3 ) from remove-large-entity-temporarily into main
2025-09-20 16:52:05 +02:00 · 2025-09-20 14:17:17 +02:00 · 2025-09-19 12:29:26 +00:00 · 2025-09-19 14:28:37 +02:00 · 2025-09-19 12:35:28 +02:00 · 2025-09-19 01:10:07 +02:00
14 changed files with 1834 additions and 775 deletions
--- a/app.py
+++ b/app.py
@@ -3,9 +3,9 @@
 """
 Flask application entry point for DNSRecon web interface.
 Provides REST API endpoints and serves the web interface with user session support.
+FIXED: Enhanced WebSocket integration with proper connection management.
 """

-import json
 import traceback
 from flask import Flask, render_template, request, jsonify, send_file, session
 from datetime import datetime, timezone, timedelta
@@ -13,6 +13,7 @@ import io
 import os

 from core.session_manager import session_manager
+from flask_socketio import SocketIO
 from config import config
 from core.graph_manager import NodeType
 from utils.helpers import is_valid_target
@@ -21,29 +22,38 @@ from decimal import Decimal


 app = Flask(__name__)
+socketio = SocketIO(app, cors_allowed_origins="*")
 app.config['SECRET_KEY'] = config.flask_secret_key
 app.config['PERMANENT_SESSION_LIFETIME'] = timedelta(hours=config.flask_permanent_session_lifetime_hours)

 def get_user_scanner():
    """
-    Retrieves the scanner for the current session, or creates a new one if none exists.
+    FIXED: Retrieves the scanner for the current session with proper socketio management.
    """
    current_flask_session_id = session.get('dnsrecon_session_id')
    
    if current_flask_session_id:
        existing_scanner = session_manager.get_session(current_flask_session_id)
        if existing_scanner:
+            # FIXED: Ensure socketio is properly maintained
+            existing_scanner.socketio = socketio
+            print(f"✓ Retrieved existing scanner for session {current_flask_session_id[:8]}... with socketio restored")
            return current_flask_session_id, existing_scanner
    
-    new_session_id = session_manager.create_session()
+    # FIXED: Register socketio connection when creating new session
+    new_session_id = session_manager.create_session(socketio)
    new_scanner = session_manager.get_session(new_session_id)
    
    if not new_scanner:
        raise Exception("Failed to create new scanner session")
    
+    # FIXED: Ensure new scanner has socketio reference and register the connection
+    new_scanner.socketio = socketio
+    session_manager.register_socketio_connection(new_session_id, socketio)
    session['dnsrecon_session_id'] = new_session_id
    session.permanent = True
    
+    print(f"✓ Created new scanner for session {new_session_id[:8]}... with socketio registered")
    return new_session_id, new_scanner


@@ -56,7 +66,7 @@ def index():
@app.route('/api/scan/start', methods=['POST'])
 def start_scan():
    """
-    Starts a new reconnaissance scan.
+    FIXED: Starts a new reconnaissance scan with proper socketio management.
    """
    try:
        data = request.get_json()
@@ -80,9 +90,17 @@ def start_scan():
        if not scanner:
            return jsonify({'success': False, 'error': 'Failed to get scanner instance.'}), 500
        
+        # FIXED: Ensure scanner has socketio reference and is registered
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        print(f"🚀 Starting scan for {target} with socketio enabled and registered")
+        
        success = scanner.start_scan(target, max_depth, clear_graph=clear_graph, force_rescan_target=force_rescan_target)
        
        if success:
+            # Update session with socketio-enabled scanner
+            session_manager.update_session_scanner(user_session_id, scanner)
+            
            return jsonify({
                'success': True,
                'message': 'Reconnaissance scan started successfully',
@@ -111,6 +129,10 @@ def stop_scan():
        if not scanner.session_id:
            scanner.session_id = user_session_id
        
+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        scanner.stop_scan()
        session_manager.set_stop_signal(user_session_id)
        session_manager.update_scanner_status(user_session_id, 'stopped')
@@ -127,37 +149,83 @@ def stop_scan():
        return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500


-@app.route('/api/scan/status', methods=['GET'])
+@socketio.on('connect')
+def handle_connect():
+    """
+    FIXED: Handle WebSocket connection with proper session management.
+    """
+    print(f'✓ WebSocket client connected: {request.sid}')
+    
+    # Try to restore existing session connection
+    current_flask_session_id = session.get('dnsrecon_session_id')
+    if current_flask_session_id:
+        # Register this socketio connection for the existing session
+        session_manager.register_socketio_connection(current_flask_session_id, socketio)
+        print(f'✓ Registered WebSocket for existing session: {current_flask_session_id[:8]}...')
+    
+    # Immediately send current status to new connection
+    get_scan_status()
+
+
+@socketio.on('disconnect')
+def handle_disconnect():
+    """
+    FIXED: Handle WebSocket disconnection gracefully.
+    """
+    print(f'✗ WebSocket client disconnected: {request.sid}')
+    
+    # Note: We don't immediately remove the socketio connection from session_manager
+    # because the user might reconnect. The cleanup will happen during session cleanup.
+
+
+@socketio.on('get_status')
 def get_scan_status():
-    """Get current scan status."""
+    """
+    FIXED: Get current scan status and emit real-time update with proper error handling.
+    """
    try:
        user_session_id, scanner = get_user_scanner()
        
        if not scanner:
-            return jsonify({
-                'success': True,
-                'status': {
-                    'status': 'idle', 'target_domain': None, 'current_depth': 0,
-                    'max_depth': 0, 'progress_percentage': 0.0,
-                    'user_session_id': user_session_id
-                }
-            })
+            status = {
+                'status': 'idle', 
+                'target_domain': None, 
+                'current_depth': 0,
+                'max_depth': 0, 
+                'progress_percentage': 0.0,
+                'user_session_id': user_session_id,
+                'graph': {'nodes': [], 'edges': [], 'statistics': {'node_count': 0, 'edge_count': 0}}
+            }
+            print(f"📡 Emitting idle status for session {user_session_id[:8] if user_session_id else 'none'}...")
+        else:
+            if not scanner.session_id:
+                scanner.session_id = user_session_id
            
-        if not scanner.session_id:
-            scanner.session_id = user_session_id
+            # FIXED: Ensure scanner has socketio reference for future updates
+            scanner.socketio = socketio
+            session_manager.register_socketio_connection(user_session_id, socketio)
            
-        status = scanner.get_scan_status()
-        status['user_session_id'] = user_session_id
+            status = scanner.get_scan_status()
+            status['user_session_id'] = user_session_id
            
-        return jsonify({'success': True, 'status': status})
+            print(f"📡 Emitting status update: {status['status']} - "
+                  f"Nodes: {len(status.get('graph', {}).get('nodes', []))}, "
+                  f"Edges: {len(status.get('graph', {}).get('edges', []))}")
+            
+            # Update session with socketio-enabled scanner
+            session_manager.update_session_scanner(user_session_id, scanner)
+        
+        socketio.emit('scan_update', status)
    
    except Exception as e:
        traceback.print_exc()
-        return jsonify({
-            'success': False, 'error': f'Internal server error: {str(e)}',
-            'fallback_status': {'status': 'error', 'progress_percentage': 0.0}
-        }), 500
-
+        error_status = {
+            'status': 'error', 
+            'message': 'Failed to get status',
+            'graph': {'nodes': [], 'edges': [], 'statistics': {'node_count': 0, 'edge_count': 0}}
+        }
+        print(f"⚠️ Error getting status, emitting error status")
+        socketio.emit('scan_update', error_status)


@app.route('/api/graph', methods=['GET'])
@@ -174,6 +242,10 @@ def get_graph_data():
        if not scanner:
            return jsonify({'success': True, 'graph': empty_graph, 'user_session_id': user_session_id})
        
+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        graph_data = scanner.get_graph_data() or empty_graph
        
        return jsonify({'success': True, 'graph': graph_data, 'user_session_id': user_session_id})
@@ -200,6 +272,10 @@ def extract_from_large_entity():
        if not scanner:
            return jsonify({'success': False, 'error': 'No active session found'}), 404

+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        success = scanner.extract_node_from_large_entity(large_entity_id, node_id)
        
        if success:
@@ -220,6 +296,10 @@ def delete_graph_node(node_id):
        if not scanner:
            return jsonify({'success': False, 'error': 'No active session found'}), 404

+        # FIXED: Ensure scanner has socketio reference  
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        success = scanner.graph.remove_node(node_id)
        
        if success:
@@ -245,6 +325,10 @@ def revert_graph_action():
        if not scanner:
            return jsonify({'success': False, 'error': 'No active session found'}), 404

+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+
        action_type = data['type']
        action_data = data['data']
        
@@ -289,6 +373,10 @@ def export_results():
        if not scanner:
            return jsonify({'success': False, 'error': 'No active scanner session found'}), 404
        
+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        # Get export data using the new export manager
        try:
            results = export_manager.export_scan_results(scanner)
@@ -340,6 +428,10 @@ def export_targets():
        if not scanner:
            return jsonify({'success': False, 'error': 'No active scanner session found'}), 404

+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+
        # Use export manager for targets export
        targets_txt = export_manager.export_targets_list(scanner)
        
@@ -370,6 +462,10 @@ def export_summary():
        if not scanner:
            return jsonify({'success': False, 'error': 'No active scanner session found'}), 404

+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+
        # Use export manager for summary generation
        summary_txt = export_manager.generate_executive_summary(scanner)

@@ -402,6 +498,10 @@ def set_api_keys():
        user_session_id, scanner = get_user_scanner()
        session_config = scanner.config
        
+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        updated_providers = []
        
        for provider_name, api_key in data.items():
@@ -434,6 +534,10 @@ def get_providers():
        user_session_id, scanner = get_user_scanner()
        base_provider_info = scanner.get_provider_info()
        
+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        # Enhance provider info with API key source information
        enhanced_provider_info = {}
        
@@ -498,6 +602,10 @@ def configure_providers():
        user_session_id, scanner = get_user_scanner()
        session_config = scanner.config
        
+        # FIXED: Ensure scanner has socketio reference
+        scanner.socketio = socketio
+        session_manager.register_socketio_connection(user_session_id, socketio)
+        
        updated_providers = []
        
        for provider_name, settings in data.items():
@@ -526,7 +634,6 @@ def configure_providers():
        return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500


-
@app.errorhandler(404)
 def not_found(error):
    """Handle 404 errors."""
@@ -542,9 +649,9 @@ def internal_error(error):

 if __name__ == '__main__':
    config.load_from_env()
-    app.run(
-        host=config.flask_host,
-        port=config.flask_port,
-        debug=config.flask_debug,
-        threaded=True
-    )
+    print("🚀 Starting DNSRecon with enhanced WebSocket support...")
+    print(f"   Host: {config.flask_host}")
+    print(f"   Port: {config.flask_port}")
+    print(f"   Debug: {config.flask_debug}")
+    print("   WebSocket: Enhanced connection management enabled")
+    socketio.run(app, host=config.flask_host, port=config.flask_port, debug=config.flask_debug)
--- a/core/graph_manager.py
+++ b/core/graph_manager.py
@@ -4,8 +4,7 @@
 Graph data model for DNSRecon using NetworkX.
 Manages in-memory graph storage with confidence scoring and forensic metadata.
 Now fully compatible with the unified ProviderResult data model.
-UPDATED: Fixed correlation exclusion keys to match actual attribute names.
-UPDATED: Removed export_json() method - now handled by ExportManager.
+FIXED: Added proper pickle support to prevent weakref serialization errors.
 """
 import re
 from datetime import datetime, timezone
@@ -33,6 +32,7 @@ class GraphManager:
    Thread-safe graph manager for DNSRecon infrastructure mapping.
    Uses NetworkX for in-memory graph storage with confidence scoring.
    Compatible with unified ProviderResult data model.
+    FIXED: Added proper pickle support to handle NetworkX graph serialization.
    """

    def __init__(self):
@@ -41,6 +41,57 @@ class GraphManager:
        self.creation_time = datetime.now(timezone.utc).isoformat()
        self.last_modified = self.creation_time

+    def __getstate__(self):
+        """Prepare GraphManager for pickling by converting NetworkX graph to serializable format."""
+        state = self.__dict__.copy()
+        
+        # Convert NetworkX graph to a serializable format
+        if hasattr(self, 'graph') and self.graph:
+            # Extract all nodes with their data
+            nodes_data = {}
+            for node_id, attrs in self.graph.nodes(data=True):
+                nodes_data[node_id] = dict(attrs)
+            
+            # Extract all edges with their data
+            edges_data = []
+            for source, target, attrs in self.graph.edges(data=True):
+                edges_data.append({
+                    'source': source,
+                    'target': target,
+                    'attributes': dict(attrs)
+                })
+            
+            # Replace the NetworkX graph with serializable data
+            state['_graph_nodes'] = nodes_data
+            state['_graph_edges'] = edges_data
+            del state['graph']
+        
+        return state
+
+    def __setstate__(self, state):
+        """Restore GraphManager after unpickling by reconstructing NetworkX graph."""
+        # Restore basic attributes
+        self.__dict__.update(state)
+        
+        # Reconstruct NetworkX graph from serializable data
+        self.graph = nx.DiGraph()
+        
+        # Restore nodes
+        if hasattr(self, '_graph_nodes'):
+            for node_id, attrs in self._graph_nodes.items():
+                self.graph.add_node(node_id, **attrs)
+            del self._graph_nodes
+        
+        # Restore edges
+        if hasattr(self, '_graph_edges'):
+            for edge_data in self._graph_edges:
+                self.graph.add_edge(
+                    edge_data['source'], 
+                    edge_data['target'], 
+                    **edge_data['attributes']
+                )
+            del self._graph_edges
+        
    def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None,
                description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
        """
@@ -114,36 +165,6 @@ class GraphManager:
        self.last_modified = datetime.now(timezone.utc).isoformat()
        return True

-    def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
-        """
-        Removes a node from a large entity's internal lists and updates its count.
-        This prepares the large entity for the node's promotion to a regular node.
-        """
-        if not self.graph.has_node(large_entity_id):
-            return False
-            
-        node_data = self.graph.nodes[large_entity_id]
-        attributes = node_data.get('attributes', [])
-        
-        # Find the 'nodes' attribute dictionary in the list
-        nodes_attr = next((attr for attr in attributes if attr.get('name') == 'nodes'), None)
-        
-        # Remove from the list of member nodes
-        if nodes_attr and 'value' in nodes_attr and isinstance(nodes_attr['value'], list) and node_id_to_extract in nodes_attr['value']:
-            nodes_attr['value'].remove(node_id_to_extract)
-            
-            # Find the 'count' attribute and update it
-            count_attr = next((attr for attr in attributes if attr.get('name') == 'count'), None)
-            if count_attr:
-                count_attr['value'] = len(nodes_attr['value'])
-        else:
-            # This can happen if the node was already extracted, which is not an error.
-            print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.")
-            return True # Proceed as if successful
-            
-        self.last_modified = datetime.now(timezone.utc).isoformat()
-        return True
-
    def remove_node(self, node_id: str) -> bool:
        """Remove a node and its connected edges from the graph."""
        if not self.graph.has_node(node_id):
--- a/core/logger.py
+++ b/core/logger.py
@@ -40,6 +40,7 @@ class ForensicLogger:
    """
    Thread-safe forensic logging system for DNSRecon.
    Maintains detailed audit trail of all reconnaissance activities.
+    FIXED: Enhanced pickle support to prevent weakref issues in logging handlers.
    """
    
    def __init__(self, session_id: str = ""):
@@ -65,45 +66,74 @@ class ForensicLogger:
            'target_domains': set()
        }
        
-        # Configure standard logger
+        # Configure standard logger with simple setup to avoid weakrefs
        self.logger = logging.getLogger(f'dnsrecon.{self.session_id}')
        self.logger.setLevel(logging.INFO)
        
-        # Create formatter for structured logging
+        # Create minimal formatter
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
        
-        # Add console handler if not already present
+        # Add console handler only if not already present (avoid duplicate handlers)
        if not self.logger.handlers:
            console_handler = logging.StreamHandler()
            console_handler.setFormatter(formatter)
            self.logger.addHandler(console_handler)

    def __getstate__(self):
-        """Prepare ForensicLogger for pickling by excluding unpicklable objects."""
+        """
+        FIXED: Prepare ForensicLogger for pickling by excluding problematic objects.
+        """
        state = self.__dict__.copy()
-        # Remove the unpickleable 'logger' attribute
-        if 'logger' in state:
-            del state['logger']
-        if 'lock' in state:
-            del state['lock']
+        
+        # Remove potentially unpickleable attributes that may contain weakrefs
+        unpicklable_attrs = ['logger', 'lock']
+        for attr in unpicklable_attrs:
+            if attr in state:
+                del state[attr]
+        
+        # Convert sets to lists for JSON serialization compatibility
+        if 'session_metadata' in state:
+            metadata = state['session_metadata'].copy()
+            if 'providers_used' in metadata and isinstance(metadata['providers_used'], set):
+                metadata['providers_used'] = list(metadata['providers_used'])
+            if 'target_domains' in metadata and isinstance(metadata['target_domains'], set):
+                metadata['target_domains'] = list(metadata['target_domains'])
+            state['session_metadata'] = metadata
+        
        return state

    def __setstate__(self, state):
-        """Restore ForensicLogger after unpickling by reconstructing logger."""
+        """
+        FIXED: Restore ForensicLogger after unpickling by reconstructing components.
+        """
        self.__dict__.update(state)
-        # Re-initialize the 'logger' attribute
+        
+        # Re-initialize threading lock
+        self.lock = threading.Lock()
+        
+        # Re-initialize logger with minimal setup
        self.logger = logging.getLogger(f'dnsrecon.{self.session_id}')
        self.logger.setLevel(logging.INFO)
+        
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
+        
+        # Only add handler if not already present
        if not self.logger.handlers:
            console_handler = logging.StreamHandler()
            console_handler.setFormatter(formatter)
            self.logger.addHandler(console_handler)
-        self.lock = threading.Lock()
+        
+        # Convert lists back to sets if needed
+        if 'session_metadata' in self.__dict__:
+            metadata = self.session_metadata
+            if 'providers_used' in metadata and isinstance(metadata['providers_used'], list):
+                metadata['providers_used'] = set(metadata['providers_used'])
+            if 'target_domains' in metadata and isinstance(metadata['target_domains'], list):
+                metadata['target_domains'] = set(metadata['target_domains'])

    def _generate_session_id(self) -> str:
        """Generate unique session identifier."""
@@ -143,18 +173,23 @@ class ForensicLogger:
            discovery_context=discovery_context
        )
        
-        self.api_requests.append(api_request)
-        self.session_metadata['total_requests'] += 1
-        self.session_metadata['providers_used'].add(provider)
+        with self.lock:
+            self.api_requests.append(api_request)
+            self.session_metadata['total_requests'] += 1
+            self.session_metadata['providers_used'].add(provider)
            
-        if target_indicator:
-            self.session_metadata['target_domains'].add(target_indicator)
+            if target_indicator:
+                self.session_metadata['target_domains'].add(target_indicator)
        
-        # Log to standard logger
-        if error:
-            self.logger.error(f"API Request Failed.")
-        else:
-            self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
+        # Log to standard logger with error handling
+        try:
+            if error:
+                self.logger.error(f"API Request Failed - {provider}: {url}")
+            else:
+                self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
+        except Exception:
+            # If logging fails, continue without breaking the application
+            pass
    
    def log_relationship_discovery(self, source_node: str, target_node: str,
                                 relationship_type: str, confidence_score: float,
@@ -183,29 +218,44 @@ class ForensicLogger:
            discovery_method=discovery_method
        )
        
-        self.relationships.append(relationship)
-        self.session_metadata['total_relationships'] += 1
+        with self.lock:
+            self.relationships.append(relationship)
+            self.session_metadata['total_relationships'] += 1
        
-        self.logger.info(
-            f"Relationship Discovered - {source_node} -> {target_node} "
-            f"({relationship_type}) - Confidence: {confidence_score:.2f} - Provider: {provider}"
-        )
+        # Log to standard logger with error handling
+        try:
+            self.logger.info(
+                f"Relationship Discovered - {source_node} -> {target_node} "
+                f"({relationship_type}) - Confidence: {confidence_score:.2f} - Provider: {provider}"
+            )
+        except Exception:
+            # If logging fails, continue without breaking the application
+            pass
    
    def log_scan_start(self, target_domain: str, recursion_depth: int, 
                      enabled_providers: List[str]) -> None:
        """Log the start of a reconnaissance scan."""
-        self.logger.info(f"Scan Started - Target: {target_domain}, Depth: {recursion_depth}")
-        self.logger.info(f"Enabled Providers: {', '.join(enabled_providers)}")
+        try:
+            self.logger.info(f"Scan Started - Target: {target_domain}, Depth: {recursion_depth}")
+            self.logger.info(f"Enabled Providers: {', '.join(enabled_providers)}")
            
-        self.session_metadata['target_domains'].update(target_domain)
+            with self.lock:
+                self.session_metadata['target_domains'].add(target_domain)
+        except Exception:
+            pass
    
    def log_scan_complete(self) -> None:
        """Log the completion of a reconnaissance scan."""
-        self.session_metadata['end_time'] = datetime.now(timezone.utc).isoformat()
-        self.session_metadata['providers_used'] = list(self.session_metadata['providers_used'])
-        self.session_metadata['target_domains'] = list(self.session_metadata['target_domains'])
+        with self.lock:
+            self.session_metadata['end_time'] = datetime.now(timezone.utc).isoformat()
+            # Convert sets to lists for serialization
+            self.session_metadata['providers_used'] = list(self.session_metadata['providers_used'])
+            self.session_metadata['target_domains'] = list(self.session_metadata['target_domains'])
        
-        self.logger.info(f"Scan Complete - Session: {self.session_id}")
+        try:
+            self.logger.info(f"Scan Complete - Session: {self.session_id}")
+        except Exception:
+            pass
    
    def export_audit_trail(self) -> Dict[str, Any]:
        """
@@ -214,12 +264,13 @@ class ForensicLogger:
        Returns:
            Dictionary containing complete session audit trail
        """
-        return {
-            'session_metadata': self.session_metadata.copy(),
-            'api_requests': [asdict(req) for req in self.api_requests],
-            'relationships': [asdict(rel) for rel in self.relationships],
-            'export_timestamp': datetime.now(timezone.utc).isoformat()
-        }
+        with self.lock:
+            return {
+                'session_metadata': self.session_metadata.copy(),
+                'api_requests': [asdict(req) for req in self.api_requests],
+                'relationships': [asdict(rel) for rel in self.relationships],
+                'export_timestamp': datetime.now(timezone.utc).isoformat()
+            }
    
    def get_forensic_summary(self) -> Dict[str, Any]:
        """
@@ -229,7 +280,13 @@ class ForensicLogger:
            Dictionary containing summary statistics
        """
        provider_stats = {}
-        for provider in self.session_metadata['providers_used']:
+        
+        # Ensure providers_used is a set for iteration
+        providers_used = self.session_metadata['providers_used']
+        if isinstance(providers_used, list):
+            providers_used = set(providers_used)
+        
+        for provider in providers_used:
            provider_requests = [req for req in self.api_requests if req.provider == provider]
            provider_relationships = [rel for rel in self.relationships if rel.provider == provider]
            
--- a/core/scanner.py
+++ b/core/scanner.py
--- a/core/session_manager.py
+++ b/core/session_manager.py
@@ -6,6 +6,7 @@ import uuid
 import redis
 import pickle
 from typing import Dict, Optional, Any
+import copy

 from core.scanner import Scanner
 from config import config
@@ -13,7 +14,7 @@ from config import config
 class SessionManager:
    """
    FIXED: Manages multiple scanner instances for concurrent user sessions using Redis.
-    Now more conservative about session creation to preserve API keys and configuration.
+    Enhanced to properly maintain WebSocket connections throughout scan lifecycle.
    """
    
    def __init__(self, session_timeout_minutes: int = 0):
@@ -30,6 +31,9 @@ class SessionManager:
        # FIXED: Add a creation lock to prevent race conditions
        self.creation_lock = threading.Lock()
        
+        # Track active socketio connections per session
+        self.active_socketio_connections = {}
+        
        # Start cleanup thread
        self.cleanup_thread = threading.Thread(target=self._cleanup_loop, daemon=True)
        self.cleanup_thread.start()
@@ -40,7 +44,7 @@ class SessionManager:
        """Prepare SessionManager for pickling."""
        state = self.__dict__.copy()
        # Exclude unpickleable attributes - Redis client and threading objects
-        unpicklable_attrs = ['lock', 'cleanup_thread', 'redis_client', 'creation_lock']
+        unpicklable_attrs = ['lock', 'cleanup_thread', 'redis_client', 'creation_lock', 'active_socketio_connections']
        for attr in unpicklable_attrs:
            if attr in state:
                del state[attr]
@@ -53,6 +57,7 @@ class SessionManager:
        self.redis_client = redis.StrictRedis(db=0, decode_responses=False)
        self.lock = threading.Lock()
        self.creation_lock = threading.Lock()
+        self.active_socketio_connections = {}
        self.cleanup_thread = threading.Thread(target=self._cleanup_loop, daemon=True)
        self.cleanup_thread.start()

@@ -64,22 +69,70 @@ class SessionManager:
        """Generates the Redis key for a session's stop signal."""
        return f"dnsrecon:stop:{session_id}"

-    def create_session(self) -> str:
+    def register_socketio_connection(self, session_id: str, socketio) -> None:
        """
-        FIXED: Create a new user session with thread-safe creation to prevent duplicates.
+        FIXED: Register a socketio connection for a session.
+        This ensures the connection is maintained throughout the session lifecycle.
+        """
+        with self.lock:
+            self.active_socketio_connections[session_id] = socketio
+            print(f"Registered socketio connection for session {session_id}")
+
+    def get_socketio_connection(self, session_id: str):
+        """
+        FIXED: Get the active socketio connection for a session.
+        """
+        with self.lock:
+            return self.active_socketio_connections.get(session_id)
+
+    def _prepare_scanner_for_storage(self, scanner: Scanner, session_id: str) -> Scanner:
+        """
+        FIXED: Prepare scanner for storage by ensuring proper cleanup of unpicklable objects.
+        Now preserves socketio connection info for restoration.
+        """
+        # Set the session ID on the scanner for cross-process stop signal management
+        scanner.session_id = session_id
+        
+        # FIXED: Don't set socketio to None if we want to preserve real-time updates
+        # Instead, we'll restore it when loading the scanner
+        scanner.socketio = None
+        
+        # Force cleanup of any threading objects that might cause issues
+        if hasattr(scanner, 'stop_event'):
+            scanner.stop_event = None
+        if hasattr(scanner, 'scan_thread'):
+            scanner.scan_thread = None
+        if hasattr(scanner, 'executor'):
+            scanner.executor = None
+        if hasattr(scanner, 'status_logger_thread'):
+            scanner.status_logger_thread = None
+        if hasattr(scanner, 'status_logger_stop_event'):
+            scanner.status_logger_stop_event = None
+        
+        return scanner
+
+    def create_session(self, socketio=None) -> str:
+        """
+        FIXED: Create a new user session with enhanced WebSocket management.
        """
        # FIXED: Use creation lock to prevent race conditions
        with self.creation_lock:
            session_id = str(uuid.uuid4())
            print(f"=== CREATING SESSION {session_id} IN REDIS ===")
            
+            # FIXED: Register socketio connection first
+            if socketio:
+                self.register_socketio_connection(session_id, socketio)
+            
            try:
                from core.session_config import create_session_config
                session_config = create_session_config()
-                scanner_instance = Scanner(session_config=session_config)
                
-                # Set the session ID on the scanner for cross-process stop signal management
-                scanner_instance.session_id = session_id
+                # Create scanner WITHOUT socketio to avoid weakref issues
+                scanner_instance = Scanner(session_config=session_config, socketio=None)
+                
+                # Prepare scanner for storage (removes problematic objects)
+                scanner_instance = self._prepare_scanner_for_storage(scanner_instance, session_id)
                
                session_data = {
                    'scanner': scanner_instance,
@@ -89,12 +142,24 @@ class SessionManager:
                    'status': 'active'
                }
                
-                # Serialize the entire session data dictionary using pickle
-                serialized_data = pickle.dumps(session_data)
+                # Test serialization before storing to catch issues early
+                try:
+                    test_serialization = pickle.dumps(session_data)
+                    print(f"Session serialization test successful ({len(test_serialization)} bytes)")
+                except Exception as pickle_error:
+                    print(f"PICKLE TEST FAILED: {pickle_error}")
+                    # Try to identify the problematic object
+                    for key, value in session_data.items():
+                        try:
+                            pickle.dumps(value)
+                            print(f"  {key}: OK")
+                        except Exception as item_error:
+                            print(f"  {key}: FAILED - {item_error}")
+                    raise pickle_error
                
                # Store in Redis
                session_key = self._get_session_key(session_id)
-                self.redis_client.setex(session_key, self.session_timeout, serialized_data)
+                self.redis_client.setex(session_key, self.session_timeout, test_serialization)
                
                # Initialize stop signal as False
                stop_key = self._get_stop_signal_key(session_id)
@@ -106,6 +171,8 @@ class SessionManager:
                
            except Exception as e:
                print(f"ERROR: Failed to create session {session_id}: {e}")
+                import traceback
+                traceback.print_exc()
                raise

    def set_stop_signal(self, session_id: str) -> bool:
@@ -175,31 +242,63 @@ class SessionManager:
                # Ensure the scanner has the correct session ID for stop signal checking
                if 'scanner' in session_data and session_data['scanner']:
                    session_data['scanner'].session_id = session_id
+                    # FIXED: Restore socketio connection from our registry
+                    socketio_conn = self.get_socketio_connection(session_id)
+                    if socketio_conn:
+                        session_data['scanner'].socketio = socketio_conn
+                        print(f"Restored socketio connection for session {session_id}")
+                    else:
+                        print(f"No socketio connection found for session {session_id}")
+                        session_data['scanner'].socketio = None
                return session_data
            return None
        except Exception as e:
            print(f"ERROR: Failed to get session data for {session_id}: {e}")
+            import traceback
+            traceback.print_exc()
            return None

    def _save_session_data(self, session_id: str, session_data: Dict[str, Any]) -> bool:
        """
        Serializes and saves session data back to Redis with updated TTL.
+        FIXED: Now preserves socketio connection during storage.
        
        Returns:
            bool: True if save was successful
        """
        try:
            session_key = self._get_session_key(session_id)
-            serialized_data = pickle.dumps(session_data)
+            
+            # Create a deep copy to avoid modifying the original scanner object
+            session_data_to_save = copy.deepcopy(session_data)
+
+            # Prepare scanner for storage if it exists
+            if 'scanner' in session_data_to_save and session_data_to_save['scanner']:
+                # FIXED: Preserve the original socketio connection before preparing for storage
+                original_socketio = session_data_to_save['scanner'].socketio
+                
+                session_data_to_save['scanner'] = self._prepare_scanner_for_storage(
+                    session_data_to_save['scanner'], 
+                    session_id
+                )
+                
+                # FIXED: If we had a socketio connection, make sure it's registered
+                if original_socketio and session_id not in self.active_socketio_connections:
+                    self.register_socketio_connection(session_id, original_socketio)
+            
+            serialized_data = pickle.dumps(session_data_to_save)
            result = self.redis_client.setex(session_key, self.session_timeout, serialized_data)
            return result
        except Exception as e:
            print(f"ERROR: Failed to save session data for {session_id}: {e}")
+            import traceback
+            traceback.print_exc()
            return False

    def update_session_scanner(self, session_id: str, scanner: 'Scanner') -> bool:
        """
-        Updates just the scanner object in a session with immediate persistence.
+        FIXED: Updates just the scanner object in a session with immediate persistence.
+        Now maintains socketio connection throughout the update process.
        
        Returns:
            bool: True if update was successful
@@ -207,21 +306,27 @@ class SessionManager:
        try:
            session_data = self._get_session_data(session_id)
            if session_data:
-                # Ensure scanner has the session ID
-                scanner.session_id = session_id
+                # FIXED: Preserve socketio connection before preparing for storage
+                original_socketio = scanner.socketio
+                
+                # Prepare scanner for storage
+                scanner = self._prepare_scanner_for_storage(scanner, session_id)
                session_data['scanner'] = scanner
                session_data['last_activity'] = time.time()
                
+                # FIXED: Restore socketio connection after preparation
+                if original_socketio:
+                    self.register_socketio_connection(session_id, original_socketio)
+                    session_data['scanner'].socketio = original_socketio
+                
                # Immediately save to Redis for GUI updates
                success = self._save_session_data(session_id, session_data)
                if success:
                    # Only log occasionally to reduce noise
                    if hasattr(self, '_last_update_log'):
                        if time.time() - self._last_update_log > 5:  # Log every 5 seconds max
-                            #print(f"Scanner state updated for session {session_id} (status: {scanner.status})")
                            self._last_update_log = time.time()
                    else:
-                        #print(f"Scanner state updated for session {session_id} (status: {scanner.status})")
                        self._last_update_log = time.time()
                else:
                    print(f"WARNING: Failed to save scanner state for session {session_id}")
@@ -231,6 +336,8 @@ class SessionManager:
                return False
        except Exception as e:
            print(f"ERROR: Failed to update scanner for session {session_id}: {e}")
+            import traceback
+            traceback.print_exc()
            return False

    def update_scanner_status(self, session_id: str, status: str) -> bool:
@@ -263,7 +370,7 @@ class SessionManager:

    def get_session(self, session_id: str) -> Optional[Scanner]:
        """
-        Get scanner instance for a session from Redis with session ID management.
+        FIXED: Get scanner instance for a session from Redis with proper socketio restoration.
        """
        if not session_id:
            return None
@@ -282,6 +389,15 @@ class SessionManager:
            # Ensure the scanner can check the Redis-based stop signal
            scanner.session_id = session_id
            
+            # FIXED: Restore socketio connection from our registry
+            socketio_conn = self.get_socketio_connection(session_id)
+            if socketio_conn:
+                scanner.socketio = socketio_conn
+                print(f"✓ Restored socketio connection for session {session_id}")
+            else:
+                scanner.socketio = None
+                print(f"⚠️ No socketio connection found for session {session_id}")
+        
        return scanner

    def get_session_status_only(self, session_id: str) -> Optional[str]:
@@ -333,6 +449,12 @@ class SessionManager:
            # Wait a moment for graceful shutdown
            time.sleep(0.5)
            
+            # FIXED: Clean up socketio connection
+            with self.lock:
+                if session_id in self.active_socketio_connections:
+                    del self.active_socketio_connections[session_id]
+                    print(f"Cleaned up socketio connection for session {session_id}")
+            
            # Delete session data and stop signal from Redis
            session_key = self._get_session_key(session_id)
            stop_key = self._get_stop_signal_key(session_id)
@@ -344,6 +466,8 @@ class SessionManager:
            
        except Exception as e:
            print(f"ERROR: Failed to terminate session {session_id}: {e}")
+            import traceback
+            traceback.print_exc()
            return False

    def _cleanup_loop(self) -> None:
@@ -364,6 +488,12 @@ class SessionManager:
                        self.redis_client.delete(stop_key)
                        print(f"Cleaned up orphaned stop signal for session {session_id}")
                        
+                        # Also clean up socketio connection
+                        with self.lock:
+                            if session_id in self.active_socketio_connections:
+                                del self.active_socketio_connections[session_id]
+                                print(f"Cleaned up orphaned socketio for session {session_id}")
+                        
            except Exception as e:
                print(f"Error in cleanup loop: {e}")
            
@@ -387,14 +517,16 @@ class SessionManager:
            return {
                'total_active_sessions': active_sessions,
                'running_scans': running_scans,
-                'total_stop_signals': len(stop_keys)
+                'total_stop_signals': len(stop_keys),
+                'active_socketio_connections': len(self.active_socketio_connections)
            }
        except Exception as e:
            print(f"ERROR: Failed to get statistics: {e}")
            return {
                'total_active_sessions': 0,
                'running_scans': 0,
-                'total_stop_signals': 0
+                'total_stop_signals': 0,
+                'active_socketio_connections': 0
            }

 # Global session manager instance
--- a/providers/base_provider.py
+++ b/providers/base_provider.py
@@ -15,6 +15,7 @@ class BaseProvider(ABC):
    """
    Abstract base class for all DNSRecon data providers.
    Now supports session-specific configuration and returns standardized ProviderResult objects.
+    FIXED: Enhanced pickle support to prevent weakref serialization errors.
    """

    def __init__(self, name: str, rate_limit: int = 60, timeout: int = 30, session_config=None):
@@ -53,22 +54,57 @@ class BaseProvider(ABC):
    def __getstate__(self):
        """Prepare BaseProvider for pickling by excluding unpicklable objects."""
        state = self.__dict__.copy()
-        # Exclude the unpickleable '_local' attribute and stop event
-        unpicklable_attrs = ['_local', '_stop_event']
+        
+        # Exclude unpickleable attributes that may contain weakrefs
+        unpicklable_attrs = [
+            '_local',           # Thread-local storage (contains requests.Session)
+            '_stop_event',      # Threading event
+            'logger',           # Logger may contain weakrefs in handlers
+        ]
+        
        for attr in unpicklable_attrs:
            if attr in state:
                del state[attr]
+        
+        # Also handle any potential weakrefs in the config object
+        if 'config' in state and hasattr(state['config'], '__getstate__'):
+            # If config has its own pickle support, let it handle itself
+            pass
+        elif 'config' in state:
+            # Otherwise, ensure config doesn't contain unpicklable objects
+            try:
+                # Test if config can be pickled
+                import pickle
+                pickle.dumps(state['config'])
+            except (TypeError, AttributeError):
+                # If config can't be pickled, we'll recreate it during unpickling
+                state['_config_class'] = type(state['config']).__name__
+                del state['config']
+        
        return state

    def __setstate__(self, state):
        """Restore BaseProvider after unpickling by reconstructing threading objects."""
        self.__dict__.update(state)
-        # Re-initialize the '_local' attribute and stop event
+        
+        # Re-initialize unpickleable attributes
        self._local = threading.local()
        self._stop_event = None
+        self.logger = get_forensic_logger()
+        
+        # Recreate config if it was removed during pickling
+        if not hasattr(self, 'config') and hasattr(self, '_config_class'):
+            if self._config_class == 'Config':
+                from config import config as global_config
+                self.config = global_config
+            elif self._config_class == 'SessionConfig':
+                from core.session_config import create_session_config
+                self.config = create_session_config()
+            del self._config_class

    @property
    def session(self):
+        """Get or create thread-local requests session."""
        if not hasattr(self._local, 'session'):
            self._local.session = requests.Session()
            self._local.session.headers.update({
--- a/providers/correlation_provider.py
+++ b/providers/correlation_provider.py
@@ -10,6 +10,7 @@ from core.graph_manager import NodeType, GraphManager
 class CorrelationProvider(BaseProvider):
    """
    A provider that finds correlations between nodes in the graph.
+    FIXED: Enhanced pickle support to prevent weakref issues with graph references.
    """

    def __init__(self, name: str = "correlation", session_config=None):
@@ -26,6 +27,7 @@ class CorrelationProvider(BaseProvider):
            'cert_common_name',
            'cert_validity_period_days',
            'cert_issuer_name',
+            'cert_serial_number',
            'cert_entry_timestamp',
            'cert_not_before',
            'cert_not_after',
@@ -37,6 +39,38 @@ class CorrelationProvider(BaseProvider):
            'query_timestamp',
        ]

+    def __getstate__(self):
+        """
+        FIXED: Prepare CorrelationProvider for pickling by excluding graph reference.
+        """
+        state = super().__getstate__()
+        
+        # Remove graph reference to prevent circular dependencies and weakrefs
+        if 'graph' in state:
+            del state['graph']
+        
+        # Also handle correlation_index which might contain complex objects
+        if 'correlation_index' in state:
+            # Clear correlation index as it will be rebuilt when needed
+            state['correlation_index'] = {}
+        
+        return state
+
+    def __setstate__(self, state):
+        """
+        FIXED: Restore CorrelationProvider after unpickling.
+        """
+        super().__setstate__(state)
+        
+        # Re-initialize graph reference (will be set by scanner)
+        self.graph = None
+        
+        # Re-initialize correlation index
+        self.correlation_index = {}
+        
+        # Re-compile regex pattern
+        self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
+
    def get_name(self) -> str:
        """Return the provider name."""
        return "correlation"
@@ -78,13 +112,20 @@ class CorrelationProvider(BaseProvider):
    def _find_correlations(self, node_id: str) -> ProviderResult:
        """
        Find correlations for a given node.
+        FIXED: Added safety checks to prevent issues when graph is None.
        """
        result = ProviderResult()
-        # FIXED: Ensure self.graph is not None before proceeding.
+        
+        # FIXED: Ensure self.graph is not None before proceeding
        if not self.graph or not self.graph.graph.has_node(node_id):
            return result

-        node_attributes = self.graph.graph.nodes[node_id].get('attributes', [])
+        try:
+            node_attributes = self.graph.graph.nodes[node_id].get('attributes', [])
+        except Exception as e:
+            # If there's any issue accessing the graph, return empty result
+            print(f"Warning: Could not access graph for correlation analysis: {e}")
+            return result

        for attr in node_attributes:
            attr_name = attr.get('name')
@@ -133,6 +174,7 @@ class CorrelationProvider(BaseProvider):

            if len(self.correlation_index[attr_value]['nodes']) > 1:
                self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result)
+                
        return result

    def _create_correlation_relationships(self, value: Any, correlation_data: Dict[str, Any], result: ProviderResult):
--- a/providers/crtsh_provider.py
+++ b/providers/crtsh_provider.py
@@ -3,7 +3,7 @@
 import json
 import re
 from pathlib import Path
-from typing import List, Dict, Any, Set
+from typing import List, Dict, Any, Set, Optional
 from urllib.parse import quote
 from datetime import datetime, timezone
 import requests
@@ -11,6 +11,7 @@ import requests
 from .base_provider import BaseProvider
 from core.provider_result import ProviderResult
 from utils.helpers import _is_valid_domain
+from core.logger import get_forensic_logger


 class CrtShProvider(BaseProvider):
@@ -114,51 +115,42 @@ class CrtShProvider(BaseProvider):
        
        result = ProviderResult()

-        try:
-            if cache_status == "fresh":
-                result = self._load_from_cache(cache_file)
-                self.logger.logger.info(f"Using fresh cached crt.sh data for {domain}")
+        if cache_status == "fresh":
+            result = self._load_from_cache(cache_file)
+            self.logger.logger.info(f"Using fresh cached crt.sh data for {domain}")
        
-            else:  # "stale" or "not_found"
-                # Query the API for the latest certificates
-                new_raw_certs = self._query_crtsh_api(domain)
+        else:  # "stale" or "not_found"
+            # Query the API for the latest certificates
+            new_raw_certs = self._query_crtsh_api(domain)
            
-                if self._stop_event and self._stop_event.is_set():
-                    return ProviderResult()
+            if self._stop_event and self._stop_event.is_set():
+                return ProviderResult()

-                # Combine with old data if cache is stale
-                if cache_status == "stale":
-                    old_raw_certs = self._load_raw_data_from_cache(cache_file)
-                    combined_certs = old_raw_certs + new_raw_certs
+            # Combine with old data if cache is stale
+            if cache_status == "stale":
+                old_raw_certs = self._load_raw_data_from_cache(cache_file)
+                combined_certs = old_raw_certs + new_raw_certs
                
-                    # Deduplicate the combined list
-                    seen_ids = set()
-                    unique_certs = []
-                    for cert in combined_certs:
-                        cert_id = cert.get('id')
-                        if cert_id not in seen_ids:
-                            unique_certs.append(cert)
-                            seen_ids.add(cert_id)
+                # Deduplicate the combined list
+                seen_ids = set()
+                unique_certs = []
+                for cert in combined_certs:
+                    cert_id = cert.get('id')
+                    if cert_id not in seen_ids:
+                        unique_certs.append(cert)
+                        seen_ids.add(cert_id)
                
-                    raw_certificates_to_process = unique_certs
-                    self.logger.logger.info(f"Refreshed and merged cache for {domain}. Total unique certs: {len(raw_certificates_to_process)}")
-                else:  # "not_found"
-                    raw_certificates_to_process = new_raw_certs
+                raw_certificates_to_process = unique_certs
+                self.logger.logger.info(f"Refreshed and merged cache for {domain}. Total unique certs: {len(raw_certificates_to_process)}")
+            else:  # "not_found"
+                raw_certificates_to_process = new_raw_certs
            
-                # FIXED: Process certificates to create proper domain and CA nodes
-                result = self._process_certificates_to_result_fixed(domain, raw_certificates_to_process)
-                self.logger.logger.info(f"Created fresh result for {domain} ({result.get_relationship_count()} relationships)")
+            # FIXED: Process certificates to create proper domain and CA nodes
+            result = self._process_certificates_to_result_fixed(domain, raw_certificates_to_process)
+            self.logger.logger.info(f"Created fresh result for {domain} ({result.get_relationship_count()} relationships)")

-                # Save the new result and the raw data to the cache
-                self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
-
-        except requests.exceptions.RequestException as e:
-            self.logger.logger.error(f"API query failed for {domain}: {e}")
-            if cache_status != "not_found":
-                result = self._load_from_cache(cache_file)
-                self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.")
-            else:
-                raise e  # Re-raise if there's no cache to fall back on
+            # Save the new result and the raw data to the cache
+            self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)

        return result

@@ -286,6 +278,17 @@ class CrtShProvider(BaseProvider):
            self.logger.logger.info(f"CrtSh processing cancelled before processing for domain: {query_domain}")
            return result
        
+        incompleteness_warning = self._check_for_incomplete_data(query_domain, certificates)
+        if incompleteness_warning:
+            result.add_attribute(
+                target_node=query_domain,
+                name="crtsh_data_warning",
+                value=incompleteness_warning,
+                attr_type='metadata',
+                provider=self.name,
+                confidence=1.0
+            )
+
        all_discovered_domains = set()
        processed_issuers = set()

@@ -457,6 +460,8 @@ class CrtShProvider(BaseProvider):
            raise ValueError("Empty date string")

        try:
+            if isinstance(date_string, datetime):
+                return date_string.replace(tzinfo=timezone.utc)
            if date_string.endswith('Z'):
                return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc)
            elif '+' in date_string or date_string.endswith('UTC'):
@@ -578,3 +583,29 @@ class CrtShProvider(BaseProvider):
            return 'parent_domain'
        else:
            return 'related_domain'
+        
+    def _check_for_incomplete_data(self, domain: str, certificates: List[Dict[str, Any]]) -> Optional[str]:
+        """
+        Analyzes the certificate list to heuristically detect if the data from crt.sh is incomplete.
+        """
+        cert_count = len(certificates)
+
+        # Heuristic 1: Check if the number of certs hits a known hard limit.
+        if cert_count >= 10000:
+            return f"Result likely truncated; received {cert_count} certificates, which may be the maximum limit."
+
+        # Heuristic 2: Check if all returned certificates are old.
+        if cert_count > 1000: # Only apply this for a reasonable number of certs
+            latest_expiry = None
+            for cert in certificates:
+                try:
+                    not_after = self._parse_certificate_date(cert.get('not_after'))
+                    if latest_expiry is None or not_after > latest_expiry:
+                        latest_expiry = not_after
+                except (ValueError, TypeError):
+                    continue
+
+            if latest_expiry and (datetime.now(timezone.utc) - latest_expiry).days > 365:
+                 return f"Incomplete data suspected: The latest certificate expired more than a year ago ({latest_expiry.strftime('%Y-%m-%d')})."
+
+        return None
--- a/providers/dns_provider.py
+++ b/providers/dns_provider.py
@@ -11,6 +11,7 @@ class DNSProvider(BaseProvider):
    """
    Provider for standard DNS resolution and reverse DNS lookups.
    Now returns standardized ProviderResult objects with IPv4 and IPv6 support.
+    FIXED: Enhanced pickle support to prevent resolver serialization issues.
    """

    def __init__(self, name=None, session_config=None):
@@ -27,6 +28,22 @@ class DNSProvider(BaseProvider):
        self.resolver.timeout = 5
        self.resolver.lifetime = 10

+    def __getstate__(self):
+        """Prepare the object for pickling by excluding resolver."""
+        state = super().__getstate__()
+        # Remove the unpickleable 'resolver' attribute
+        if 'resolver' in state:
+            del state['resolver']
+        return state
+
+    def __setstate__(self, state):
+        """Restore the object after unpickling by reconstructing resolver."""
+        super().__setstate__(state)
+        # Re-initialize the 'resolver' attribute
+        self.resolver = resolver.Resolver()
+        self.resolver.timeout = 5
+        self.resolver.lifetime = 10
+
    def get_name(self) -> str:
        """Return the provider name."""
        return "dns"
@@ -106,10 +123,10 @@ class DNSProvider(BaseProvider):
                if _is_valid_domain(hostname):
                    # Determine appropriate forward relationship type based on IP version
                    if ip_version == 6:
-                        relationship_type = 'dns_aaaa_record'
+                        relationship_type = 'shodan_aaaa_record'
                        record_prefix = 'AAAA'
                    else:
-                        relationship_type = 'dns_a_record'
+                        relationship_type = 'shodan_a_record'
                        record_prefix = 'A'
                    
                    # Add the relationship
--- a/providers/shodan_provider.py
+++ b/providers/shodan_provider.py
@@ -27,26 +27,62 @@ class ShodanProvider(BaseProvider):
        )
        self.base_url = "https://api.shodan.io"
        self.api_key = self.config.get_api_key('shodan')
-        self._is_active = self._check_api_connection()
+        
+        # FIXED: Don't fail initialization on connection issues - defer to actual usage
+        self._connection_tested = False
+        self._connection_works = False

        # Initialize cache directory
        self.cache_dir = Path('cache') / 'shodan'
        self.cache_dir.mkdir(parents=True, exist_ok=True)

+    def __getstate__(self):
+        """Prepare the object for pickling."""
+        state = super().__getstate__()
+        return state
+
+    def __setstate__(self, state):
+        """Restore the object after unpickling."""
+        super().__setstate__(state)
+
    def _check_api_connection(self) -> bool:
-        """Checks if the Shodan API is reachable."""
+        """
+        FIXED: Lazy connection checking - only test when actually needed.
+        Don't block provider initialization on network issues.
+        """
+        if self._connection_tested:
+            return self._connection_works
+            
        if not self.api_key:
-            return False
-        try:
-            response = self.session.get(f"{self.base_url}/api-info?key={self.api_key}", timeout=5)
-            self.logger.logger.debug("Shodan is reacheable")
-            return response.status_code == 200
-        except requests.exceptions.RequestException:
+            self._connection_tested = True
+            self._connection_works = False
            return False
            
+        try:
+            print(f"Testing Shodan API connection with key: {self.api_key[:8]}...")
+            response = self.session.get(f"{self.base_url}/api-info?key={self.api_key}", timeout=5)
+            self._connection_works = response.status_code == 200
+            print(f"Shodan API test result: {response.status_code} - {'Success' if self._connection_works else 'Failed'}")
+        except requests.exceptions.RequestException as e:
+            print(f"Shodan API connection test failed: {e}")
+            self._connection_works = False
+        finally:
+            self._connection_tested = True
+            
+        return self._connection_works
+
    def is_available(self) -> bool:
-        """Check if Shodan provider is available (has valid API key in this session)."""
-        return self._is_active and self.api_key is not None and len(self.api_key.strip()) > 0
+        """
+        FIXED: Check if Shodan provider is available based on API key presence.
+        Don't require successful connection test during initialization.
+        """
+        has_api_key = self.api_key is not None and len(self.api_key.strip()) > 0
+        
+        if not has_api_key:
+            return False
+            
+        # FIXED: Only test connection on first actual usage, not during initialization
+        return True

    def get_name(self) -> str:
        """Return the provider name."""
@@ -117,6 +153,7 @@ class ShodanProvider(BaseProvider):
    def query_ip(self, ip: str) -> ProviderResult:
        """
        Query Shodan for information about an IP address (IPv4 or IPv6), with caching of processed data.
+        FIXED: Proper 404 handling to prevent unnecessary retries.
        
        Args:
            ip: IP address to investigate (IPv4 or IPv6)
@@ -127,7 +164,12 @@ class ShodanProvider(BaseProvider):
        Raises:
            Exception: For temporary failures that should be retried (timeouts, 502/503 errors, connection issues)
        """
-        if not _is_valid_ip(ip) or not self.is_available():
+        if not _is_valid_ip(ip):
+            return ProviderResult()
+            
+        # Test connection only when actually making requests
+        if not self._check_api_connection():
+            print(f"Shodan API not available for {ip} - API key: {'present' if self.api_key else 'missing'}")
            return ProviderResult()
        
        # Normalize IP address for consistent processing
@@ -151,26 +193,40 @@ class ShodanProvider(BaseProvider):
            response = self.make_request(url, method="GET", params=params, target_indicator=normalized_ip)
            
            if not response:
-                # Connection failed - use stale cache if available, otherwise retry
+                self.logger.logger.warning(f"Shodan API unreachable for {normalized_ip} - network failure")
                if cache_status == "stale":
-                    self.logger.logger.info(f"Using stale cache for {normalized_ip} due to connection failure")
+                    self.logger.logger.info(f"Using stale cache for {normalized_ip} due to network failure")
                    return self._load_from_cache(cache_file)
                else:
-                    raise requests.exceptions.RequestException("No response from Shodan API - should retry")
+                    # FIXED: Treat network failures as "no information" rather than retryable errors
+                    self.logger.logger.info(f"No Shodan data available for {normalized_ip} due to network failure")
+                    result = ProviderResult()  # Empty result
+                    network_failure_data = {'shodan_status': 'network_unreachable', 'error': 'API unreachable'}
+                    self._save_to_cache(cache_file, result, network_failure_data)
+                    return result
            
+            # FIXED: Handle different status codes more precisely
            if response.status_code == 200:
                self.logger.logger.debug(f"Shodan returned data for {normalized_ip}")
-                data = response.json()
-                result = self._process_shodan_data(normalized_ip, data)
-                self._save_to_cache(cache_file, result, data)
-                return result
+                try:
+                    data = response.json()
+                    result = self._process_shodan_data(normalized_ip, data)
+                    self._save_to_cache(cache_file, result, data)
+                    return result
+                except json.JSONDecodeError as e:
+                    self.logger.logger.error(f"Invalid JSON response from Shodan for {normalized_ip}: {e}")
+                    if cache_status == "stale":
+                        return self._load_from_cache(cache_file)
+                    else:
+                        raise requests.exceptions.RequestException("Invalid JSON response from Shodan - should retry")
            
            elif response.status_code == 404:
-                # 404 = "no information available" - successful but empty result, don't retry
+                # FIXED: 404 = "no information available" - successful but empty result, don't retry
                self.logger.logger.debug(f"Shodan has no information for {normalized_ip} (404)")
                result = ProviderResult()  # Empty but successful result
                # Cache the empty result to avoid repeated queries
-                self._save_to_cache(cache_file, result, {'shodan_status': 'no_information', 'status_code': 404})
+                empty_data = {'shodan_status': 'no_information', 'status_code': 404}
+                self._save_to_cache(cache_file, result, empty_data)
                return result
            
            elif response.status_code in [401, 403]:
@@ -178,7 +234,7 @@ class ShodanProvider(BaseProvider):
                self.logger.logger.error(f"Shodan API authentication failed for {normalized_ip} (HTTP {response.status_code})")
                return ProviderResult()  # Empty result, don't retry
            
-            elif response.status_code in [429]:
+            elif response.status_code == 429:
                # Rate limiting - should be handled by rate limiter, but if we get here, retry
                self.logger.logger.warning(f"Shodan API rate limited for {normalized_ip} (HTTP {response.status_code})")
                if cache_status == "stale":
@@ -197,13 +253,12 @@ class ShodanProvider(BaseProvider):
                    raise requests.exceptions.RequestException(f"Shodan API server error (HTTP {response.status_code}) - should retry")
            
            else:
-                # Other HTTP error codes - treat as temporary failures
-                self.logger.logger.warning(f"Shodan API returned unexpected status {response.status_code} for {normalized_ip}")
-                if cache_status == "stale":
-                    self.logger.logger.info(f"Using stale cache for {normalized_ip} due to unexpected API error")
-                    return self._load_from_cache(cache_file)
-                else:
-                    raise requests.exceptions.RequestException(f"Shodan API error (HTTP {response.status_code}) - should retry")
+                # FIXED: Other HTTP status codes - treat as no information available, don't retry
+                self.logger.logger.info(f"Shodan returned status {response.status_code} for {normalized_ip} - treating as no information")
+                result = ProviderResult()  # Empty result
+                no_info_data = {'shodan_status': 'no_information', 'status_code': response.status_code}
+                self._save_to_cache(cache_file, result, no_info_data)
+                return result
                
        except requests.exceptions.Timeout:
            # Timeout errors - should be retried
@@ -223,17 +278,8 @@ class ShodanProvider(BaseProvider):
            else:
                raise  # Re-raise connection error for retry
        
-        except requests.exceptions.RequestException:
-            # Other request exceptions - should be retried
-            self.logger.logger.warning(f"Shodan API request exception for {normalized_ip}")
-            if cache_status == "stale":
-                self.logger.logger.info(f"Using stale cache for {normalized_ip} due to request exception")
-                return self._load_from_cache(cache_file)
-            else:
-                raise  # Re-raise request exception for retry
-        
        except json.JSONDecodeError:
-            # JSON parsing error on 200 response - treat as temporary failure
+            # JSON parsing error - treat as temporary failure
            self.logger.logger.error(f"Invalid JSON response from Shodan for {normalized_ip}")
            if cache_status == "stale":
                self.logger.logger.info(f"Using stale cache for {normalized_ip} due to JSON parsing error")
@@ -241,14 +287,16 @@ class ShodanProvider(BaseProvider):
            else:
                raise requests.exceptions.RequestException("Invalid JSON response from Shodan - should retry")
        
+        # FIXED: Remove the generic RequestException handler that was causing 404s to retry
+        # Now only specific exceptions that should be retried are re-raised
+        
        except Exception as e:
-            # Unexpected exceptions - log and treat as temporary failures
-            self.logger.logger.error(f"Unexpected exception in Shodan query for {normalized_ip}: {e}")
-            if cache_status == "stale":
-                self.logger.logger.info(f"Using stale cache for {normalized_ip} due to unexpected exception")
-                return self._load_from_cache(cache_file)
-            else:
-                raise requests.exceptions.RequestException(f"Unexpected error in Shodan query: {e}") from e
+            # FIXED: Unexpected exceptions - log but treat as no information available, don't retry
+            self.logger.logger.warning(f"Unexpected exception in Shodan query for {normalized_ip}: {e}")
+            result = ProviderResult()  # Empty result
+            error_data = {'shodan_status': 'error', 'error': str(e)}
+            self._save_to_cache(cache_file, result, error_data)
+            return result

    def _load_from_cache(self, cache_file_path: Path) -> ProviderResult:
        """Load processed Shodan data from a cache file."""
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,3 +8,6 @@ dnspython
 gunicorn
 redis
 python-dotenv
+psycopg2-binary
+Flask-SocketIO
+eventlet
--- a/static/js/graph.js
+++ b/static/js/graph.js
@@ -1,3 +1,4 @@
+// dnsrecon-reduced/static/js/graph.js
 /**
 * Graph visualization module for DNSRecon
 * Handles network graph rendering using vis.js with proper large entity node hiding
@@ -362,100 +363,60 @@ class GraphManager {
        }

        try {
-            // Initialize if not already done
            if (!this.isInitialized) {
                this.initialize();
            }

            this.initialTargetIds = new Set(graphData.initial_targets || []);
-            // Check if we have actual data to display
            const hasData = graphData.nodes.length > 0 || graphData.edges.length > 0;
            
-            // Handle placeholder visibility
            const placeholder = this.container.querySelector('.graph-placeholder');
            if (placeholder) {
-                if (hasData) {
-                    placeholder.style.display = 'none';
-                } else {
-                    placeholder.style.display = 'flex';
-                    // Early return if no data to process
-                    return;
-                }
+                placeholder.style.display = hasData ? 'none' : 'flex';
+            }
+            if (!hasData) {
+                this.nodes.clear();
+                this.edges.clear();
+                return;
            }

-            this.largeEntityMembers.clear();
-            const largeEntityMap = new Map();
+            const nodeMap = new Map(graphData.nodes.map(node => [node.id, node]));

-            graphData.nodes.forEach(node => {
-                if (node.type === 'large_entity' && node.attributes) {
-                    const nodesAttribute = this.findAttributeByName(node.attributes, 'nodes');
-                    if (nodesAttribute && Array.isArray(nodesAttribute.value)) {
-                        nodesAttribute.value.forEach(nodeId => {
-                            largeEntityMap.set(nodeId, node.id);
-                            this.largeEntityMembers.add(nodeId);
-                        });
-                    }
-                }
-            });
+            // Filter out hidden nodes before processing for rendering
+            const filteredNodes = graphData.nodes.filter(node => 
+                !(node.metadata && node.metadata.large_entity_id)
+            );

-            const filteredNodes = graphData.nodes.filter(node => {
-                return !this.largeEntityMembers.has(node.id) || node.type === 'large_entity';
-            });
-
-            console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`);
-
-            // Process nodes with proper certificate coloring
-            const processedNodes = filteredNodes.map(node => {
+            const processedNodes = graphData.nodes.map(node => {
                const processed = this.processNode(node);
-                
-                // Apply certificate-based coloring here in frontend
-                if (node.type === 'domain' && Array.isArray(node.attributes)) {
-                    const certInfo = this.analyzeCertificateInfo(node.attributes);
-                    
-                    if (certInfo.hasExpiredOnly) {
-                        // Red for domains with only expired/invalid certificates
-                        processed.color = { background: '#ff6b6b', border: '#cc5555' };
-                    } else if (!certInfo.hasCertificates) {
-                        // Grey for domains with no certificates
-                        processed.color = { background: '#c7c7c7', border: '#999999' };
-                    }
-                    // Valid certificates use default green (handled by processNode)
-                }
-                
-                return processed;
-            });
-
-            const mergedEdges = {};
-            graphData.edges.forEach(edge => {
-                const fromNode = largeEntityMap.has(edge.from) ? largeEntityMap.get(edge.from) : edge.from;
-                const toNode = largeEntityMap.has(edge.to) ? largeEntityMap.get(edge.to) : edge.to;
-                const mergeKey = `${fromNode}-${toNode}-${edge.label}`;
-
-                if (!mergedEdges[mergeKey]) {
-                    mergedEdges[mergeKey] = {
-                        ...edge,
-                        from: fromNode,
-                        to: toNode,
-                        count: 0,
-                        confidence_score: 0
-                    };
-                }
-
-                mergedEdges[mergeKey].count++;
-                if (edge.confidence_score > mergedEdges[mergeKey].confidence_score) {
-                    mergedEdges[mergeKey].confidence_score = edge.confidence_score;
-                }
-            });
-
-            const processedEdges = Object.values(mergedEdges).map(edge => {
-                const processed = this.processEdge(edge);
-                if (edge.count > 1) {
-                    processed.label = `${edge.label} (${edge.count})`;
+                if (node.metadata && node.metadata.large_entity_id) {
+                    processed.hidden = true;
                }
                return processed;
            });
            
-            // Update datasets with animation
+            const processedEdges = graphData.edges.map(edge => {
+                let fromNode = nodeMap.get(edge.from);
+                let toNode = nodeMap.get(edge.to);
+                let fromId = edge.from;
+                let toId = edge.to;
+
+                if (fromNode && fromNode.metadata && fromNode.metadata.large_entity_id) {
+                    fromId = fromNode.metadata.large_entity_id;
+                }
+                if (toNode && toNode.metadata && toNode.metadata.large_entity_id) {
+                    toId = toNode.metadata.large_entity_id;
+                }
+                
+                // Avoid self-referencing edges from re-routing
+                if (fromId === toId) {
+                    return null; 
+                }
+
+                const reRoutedEdge = { ...edge, from: fromId, to: toId };
+                return this.processEdge(reRoutedEdge);
+            }).filter(Boolean); // Remove nulls from self-referencing edges
+
            const existingNodeIds = this.nodes.getIds();
            const existingEdgeIds = this.edges.getIds();

@@ -472,13 +433,10 @@ class GraphManager {
                setTimeout(() => this.highlightNewElements(newNodes, newEdges), 100);
            }

-            if (processedNodes.length <= 10 || existingNodeIds.length === 0) {
+            if (this.nodes.length <= 10 || existingNodeIds.length === 0) {
                setTimeout(() => this.fitView(), 800);
            }
            
-            console.log(`Graph updated: ${processedNodes.length} nodes, ${processedEdges.length} edges (${newNodes.length} new nodes, ${newEdges.length} new edges)`);
-            console.log(`Large entity members hidden: ${this.largeEntityMembers.size}`);
-            
        } catch (error) {
            console.error('Failed to update graph:', error);
            this.showError('Failed to update visualization');
@@ -606,7 +564,7 @@ class GraphManager {
    processEdge(edge) {
        const confidence = edge.confidence_score || 0;
        const processedEdge = {
-            id: `${edge.from}-${edge.to}`,
+            id: `${edge.from}-${edge.to}-${edge.label}`,
            from: edge.from,
            to: edge.to,
            label: this.formatEdgeLabel(edge.label, confidence),
@@ -1053,7 +1011,7 @@ class GraphManager {
        this.nodes.clear();
        this.edges.clear();
        this.history = [];
-        this.largeEntityMembers.clear(); // Clear large entity tracking
+        this.largeEntityMembers.clear();
        this.initialTargetIds.clear();

        // Show placeholder
@@ -1211,7 +1169,6 @@ class GraphManager {
        const basicStats = {
            nodeCount: this.nodes.length,
            edgeCount: this.edges.length,
-            largeEntityMembersHidden: this.largeEntityMembers.size
        };

        // Add forensic statistics
@@ -1608,14 +1565,43 @@ class GraphManager {
    }

    /**
-     * Unhide all hidden nodes
+     * FIXED: Unhide all hidden nodes, excluding large entity members and disconnected nodes.
+     * This prevents orphaned large entity members from appearing as free-floating nodes.
     */
    unhideAll() {
-        const allNodes = this.nodes.get({
-            filter: (node) => node.hidden === true
+        const allHiddenNodes = this.nodes.get({
+            filter: (node) => {
+                // Skip nodes that are part of a large entity
+                if (node.metadata && node.metadata.large_entity_id) {
+                    return false;
+                }
+                
+                // Skip nodes that are not hidden
+                if (node.hidden !== true) {
+                    return false;
+                }
+                
+                // Skip nodes that have no edges (would appear disconnected)
+                const nodeId = node.id;
+                const hasIncomingEdges = this.edges.get().some(edge => edge.to === nodeId && !edge.hidden);
+                const hasOutgoingEdges = this.edges.get().some(edge => edge.from === nodeId && !edge.hidden);
+                
+                if (!hasIncomingEdges && !hasOutgoingEdges) {
+                    console.log(`Skipping disconnected node ${nodeId} from unhide`);
+                    return false;
+                }
+                
+                return true;
+            }
        });
-        const updates = allNodes.map(node => ({ id: node.id, hidden: false }));
-        this.nodes.update(updates);
+        
+        if (allHiddenNodes.length > 0) {
+            console.log(`Unhiding ${allHiddenNodes.length} nodes with valid connections`);
+            const updates = allHiddenNodes.map(node => ({ id: node.id, hidden: false }));
+            this.nodes.update(updates);
+        } else {
+            console.log('No eligible nodes to unhide');
+        }
    }
    
 }
--- a/static/js/main.js
+++ b/static/js/main.js
@@ -1,15 +1,15 @@
 /**
 * Main application logic for DNSRecon web interface
 * Handles UI interactions, API communication, and data flow
- * UPDATED: Now compatible with a strictly flat, unified data model for attributes.
+ * FIXED: Enhanced real-time WebSocket graph updates
 */

 class DNSReconApp {
    constructor() {
        console.log('DNSReconApp constructor called');
        this.graphManager = null;
+        this.socket = null;
        this.scanStatus = 'idle';
-        this.pollInterval = null;
        this.currentSessionId = null;
        
        this.elements = {};
@@ -17,6 +17,14 @@ class DNSReconApp {
        this.isScanning = false;
        this.lastGraphUpdate = null;
        
+        // FIXED: Add connection state tracking
+        this.isConnected = false;
+        this.reconnectAttempts = 0;
+        this.maxReconnectAttempts = 5;
+        
+        // FIXED: Track last graph data for debugging
+        this.lastGraphData = null;
+        
        this.init();
    }
    
@@ -31,13 +39,11 @@ class DNSReconApp {
                this.initializeElements();
                this.setupEventHandlers();
                this.initializeGraph();
-                this.updateStatus();
+                this.initializeSocket();
                this.loadProviders();
                this.initializeEnhancedModals();
                this.addCheckboxStyling();
                
-                this.updateGraph();
-                
                console.log('DNSRecon application initialized successfully');
            } catch (error) {
                console.error('Failed to initialize DNSRecon application:', error);
@@ -46,6 +52,162 @@ class DNSReconApp {
        });
    }

+    initializeSocket() {
+        console.log('🔌 Initializing WebSocket connection...');
+        
+        try {
+            this.socket = io({
+                transports: ['websocket', 'polling'],
+                timeout: 10000,
+                reconnection: true,
+                reconnectionAttempts: 5,
+                reconnectionDelay: 2000
+            });
+
+            this.socket.on('connect', () => {
+                console.log('✅ WebSocket connected successfully');
+                this.isConnected = true;
+                this.reconnectAttempts = 0;
+                this.updateConnectionStatus('idle');
+                
+                console.log('📡 Requesting initial status...');
+                this.socket.emit('get_status');
+            });
+
+            this.socket.on('disconnect', (reason) => {
+                console.log('❌ WebSocket disconnected:', reason);
+                this.isConnected = false;
+                this.updateConnectionStatus('error');
+            });
+
+            this.socket.on('connect_error', (error) => {
+                console.error('❌ WebSocket connection error:', error);
+                this.reconnectAttempts++;
+                this.updateConnectionStatus('error');
+                
+                if (this.reconnectAttempts >= 5) {
+                    this.showError('WebSocket connection failed. Please refresh the page.');
+                }
+            });
+
+            this.socket.on('reconnect', (attemptNumber) => {
+                console.log('✅ WebSocket reconnected after', attemptNumber, 'attempts');
+                this.isConnected = true;
+                this.reconnectAttempts = 0;
+                this.updateConnectionStatus('idle');
+                this.socket.emit('get_status');
+            });
+
+            // FIXED: Enhanced scan_update handler with detailed graph processing and debugging
+            this.socket.on('scan_update', (data) => {
+                console.log('📨 WebSocket update received:', {
+                    status: data.status,
+                    target: data.target_domain,
+                    progress: data.progress_percentage,
+                    graphNodes: data.graph?.nodes?.length || 0,
+                    graphEdges: data.graph?.edges?.length || 0,
+                    timestamp: new Date().toISOString()
+                });
+                
+                try {
+                    // Handle status change
+                    if (data.status !== this.scanStatus) {
+                        console.log(`📄 Status change: ${this.scanStatus} → ${data.status}`);
+                        this.handleStatusChange(data.status, data.task_queue_size);
+                    }
+                    this.scanStatus = data.status;
+                    
+                    // Update status display
+                    this.updateStatusDisplay(data);
+                    
+                    // FIXED: Always update graph if data is present and graph manager exists
+                    if (data.graph && this.graphManager) {
+                        console.log('📊 Processing graph update:', {
+                            nodes: data.graph.nodes?.length || 0,
+                            edges: data.graph.edges?.length || 0,
+                            hasNodes: Array.isArray(data.graph.nodes),
+                            hasEdges: Array.isArray(data.graph.edges),
+                            isInitialized: this.graphManager.isInitialized
+                        });
+                        
+                        // FIXED: Initialize graph manager if not already done
+                        if (!this.graphManager.isInitialized) {
+                            console.log('🎯 Initializing graph manager...');
+                            this.graphManager.initialize();
+                        }
+                        
+                        // FIXED: Force graph update and verify it worked
+                        const previousNodeCount = this.graphManager.nodes ? this.graphManager.nodes.length : 0;
+                        const previousEdgeCount = this.graphManager.edges ? this.graphManager.edges.length : 0;
+                        
+                        console.log('🔄 Before update - Nodes:', previousNodeCount, 'Edges:', previousEdgeCount);
+                        
+                        // Store the data for debugging
+                        this.lastGraphData = data.graph;
+                        
+                        // Update the graph
+                        this.graphManager.updateGraph(data.graph);
+                        this.lastGraphUpdate = Date.now();
+                        
+                        // Verify the update worked
+                        const newNodeCount = this.graphManager.nodes ? this.graphManager.nodes.length : 0;
+                        const newEdgeCount = this.graphManager.edges ? this.graphManager.edges.length : 0;
+                        
+                        console.log('🔄 After update - Nodes:', newNodeCount, 'Edges:', newEdgeCount);
+                        
+                        if (newNodeCount !== data.graph.nodes.length || newEdgeCount !== data.graph.edges.length) {
+                            console.warn('⚠️ Graph update mismatch!', {
+                                expectedNodes: data.graph.nodes.length,
+                                actualNodes: newNodeCount,
+                                expectedEdges: data.graph.edges.length,
+                                actualEdges: newEdgeCount
+                            });
+                            
+                            // Force a complete rebuild if there's a mismatch
+                            console.log('🔧 Force rebuilding graph...');
+                            this.graphManager.clear();
+                            this.graphManager.updateGraph(data.graph);
+                        }
+                        
+                        console.log('✅ Graph updated successfully');
+                        
+                        // FIXED: Force network redraw if we're using vis.js
+                        if (this.graphManager.network) {
+                            try {
+                                this.graphManager.network.redraw();
+                                console.log('🎨 Network redrawn');
+                            } catch (redrawError) {
+                                console.warn('⚠️ Network redraw failed:', redrawError);
+                            }
+                        }
+                        
+                    } else {
+                        if (!data.graph) {
+                            console.log('⚠️ No graph data in WebSocket update');
+                        }
+                        if (!this.graphManager) {
+                            console.log('⚠️ Graph manager not available');
+                        }
+                    }
+                    
+                } catch (error) {
+                    console.error('❌ Error processing WebSocket update:', error);
+                    console.error('Update data:', data);
+                    console.error('Stack trace:', error.stack);
+                }
+            });
+
+            this.socket.on('error', (error) => {
+                console.error('❌ WebSocket error:', error);
+                this.showError('WebSocket communication error');
+            });
+
+        } catch (error) {
+            console.error('❌ Failed to initialize WebSocket:', error);
+            this.showError('Failed to establish real-time connection');
+        }
+    }
+        
    /**
     * Initialize DOM element references
     */
@@ -263,12 +425,36 @@ class DNSReconApp {
    }
    
    /**
-     * Initialize graph visualization
+     * FIXED: Initialize graph visualization with enhanced debugging
     */
    initializeGraph() {
        try {
            console.log('Initializing graph manager...');
            this.graphManager = new GraphManager('network-graph');
+            
+            // FIXED: Add debugging hooks to graph manager
+            if (this.graphManager) {
+                // Override updateGraph to add debugging
+                const originalUpdateGraph = this.graphManager.updateGraph.bind(this.graphManager);
+                this.graphManager.updateGraph = (graphData) => {
+                    console.log('🔧 GraphManager.updateGraph called with:', {
+                        nodes: graphData?.nodes?.length || 0,
+                        edges: graphData?.edges?.length || 0,
+                        timestamp: new Date().toISOString()
+                    });
+                    
+                    const result = originalUpdateGraph(graphData);
+                    
+                    console.log('🔧 GraphManager.updateGraph completed, network state:', {
+                        networkExists: !!this.graphManager.network,
+                        nodeDataSetLength: this.graphManager.nodes?.length || 0,
+                        edgeDataSetLength: this.graphManager.edges?.length || 0
+                    });
+                    
+                    return result;
+                };
+            }
+            
            console.log('Graph manager initialized successfully');
        } catch (error) {
            console.error('Failed to initialize graph manager:', error);
@@ -288,7 +474,6 @@ class DNSReconApp {
            
            console.log(`Target: "${target}", Max depth: ${maxDepth}`);
            
-            // Validation
            if (!target) {
                console.log('Validation failed: empty target');
                this.showError('Please enter a target domain or IP');
@@ -303,6 +488,19 @@ class DNSReconApp {
                return;
            }
            
+            // FIXED: Ensure WebSocket connection before starting scan
+            if (!this.isConnected) {
+                console.log('WebSocket not connected, attempting to connect...');
+                this.socket.connect();
+                
+                // Wait a moment for connection
+                await new Promise(resolve => setTimeout(resolve, 1000));
+                
+                if (!this.isConnected) {
+                    this.showWarning('WebSocket connection not established. Updates may be delayed.');
+                }
+            }
+            
            console.log('Validation passed, setting UI state to scanning...');
            this.setUIState('scanning');
            this.showInfo('Starting reconnaissance scan...');
@@ -320,23 +518,28 @@ class DNSReconApp {
                        
            if (response.success) {
                this.currentSessionId = response.scan_id;
-                this.showSuccess('Reconnaissance scan started successfully');
+                this.showSuccess('Reconnaissance scan started - watching for real-time updates');

-                if (clearGraph) {
+                if (clearGraph && this.graphManager) {
+                    console.log('🧹 Clearing graph for new scan');
                    this.graphManager.clear();
                }
                
-                console.log(`Scan started for ${target} with depth ${maxDepth}`);
+                console.log(`✅ Scan started for ${target} with depth ${maxDepth}`);
                
-                // Start polling immediately with faster interval for responsiveness
-                this.startPolling(1000);
+                // FIXED: Immediately start listening for updates
+                if (this.socket && this.isConnected) {
+                    console.log('📡 Requesting initial status update...');
+                    this.socket.emit('get_status');
                    
-                // Force an immediate status update
-                console.log('Forcing immediate status update...');
-                setTimeout(() => {
-                    this.updateStatus();
-                    this.updateGraph();
-                }, 100);
+                    // Set up periodic status requests as backup (every 5 seconds during scan)
+                    /*this.statusRequestInterval = setInterval(() => {
+                        if (this.isScanning && this.socket && this.isConnected) {
+                            console.log('📡 Periodic status request...');
+                            this.socket.emit('get_status');
+                        }
+                    }, 5000);*/
+                }
                
            } else {
                throw new Error(response.error || 'Failed to start scan');
@@ -348,20 +551,23 @@ class DNSReconApp {
            this.setUIState('idle');
        }
    }
-    /**
-     * Scan stop with immediate UI feedback
-     */
+
+    // FIXED: Enhanced stop scan with interval cleanup
    async stopScan() {
        try {
            console.log('Stopping scan...');
            
-            // Immediately disable stop button and show stopping state
+            // Clear status request interval
+            /*if (this.statusRequestInterval) {
+                clearInterval(this.statusRequestInterval);
+                this.statusRequestInterval = null;
+            }*/
+            
            if (this.elements.stopScan) {
                this.elements.stopScan.disabled = true;
                this.elements.stopScan.innerHTML = '<span class="btn-icon">[STOPPING]</span><span>Stopping...</span>';
            }
            
-            // Show immediate feedback
            this.showInfo('Stopping scan...');
            
            const response = await this.apiCall('/api/scan/stop', 'POST');
@@ -369,21 +575,10 @@ class DNSReconApp {
            if (response.success) {
                this.showSuccess('Scan stop requested');
                
-                // Force immediate status update
-                setTimeout(() => {
-                    this.updateStatus();
-                }, 100);
-                
-                // Continue polling for a bit to catch the status change
-                this.startPolling(500); // Fast polling to catch status change
-                
-                // Stop fast polling after 10 seconds
-                setTimeout(() => {
-                    if (this.scanStatus === 'stopped' || this.scanStatus === 'idle') {
-                        this.stopPolling();
-                    }
-                }, 10000);
-                
+                // Request final status update
+                if (this.socket && this.isConnected) {
+                    setTimeout(() => this.socket.emit('get_status'), 500);
+                }
            } else {
                throw new Error(response.error || 'Failed to stop scan');
            }
@@ -392,7 +587,6 @@ class DNSReconApp {
            console.error('Failed to stop scan:', error);
            this.showError(`Failed to stop scan: ${error.message}`);
            
-            // Re-enable stop button on error
            if (this.elements.stopScan) {
                this.elements.stopScan.disabled = false;
                this.elements.stopScan.innerHTML = '<span class="btn-icon">[STOP]</span><span>Terminate Scan</span>';
@@ -549,85 +743,24 @@ class DNSReconApp {
    }
    
    /**
-     * Start polling for scan updates with configurable interval
-     */
-    startPolling(interval = 2000) {
-        console.log('=== STARTING POLLING ===');
-        
-        if (this.pollInterval) {
-            console.log('Clearing existing poll interval');
-            clearInterval(this.pollInterval);
-        }
-        
-        this.pollInterval = setInterval(() => {
-            this.updateStatus();
-            this.updateGraph();
-            this.loadProviders();
-        }, interval);
-        
-        console.log(`Polling started with ${interval}ms interval`);
-    }
-    
-    /**
-     * Stop polling for updates
-     */
-    stopPolling() {
-        console.log('=== STOPPING POLLING ===');
-        if (this.pollInterval) {
-            clearInterval(this.pollInterval);
-            this.pollInterval = null;
-        }
-    }
-    
-    /**
-     * Status update with better error handling
-     */
-    async updateStatus() {
-        try {
-            const response = await this.apiCall('/api/scan/status');
-            
-            
-            if (response.success && response.status) {
-                const status = response.status;
-                
-                this.updateStatusDisplay(status);
-                
-                // Handle status changes
-                if (status.status !== this.scanStatus) {
-                    console.log(`*** STATUS CHANGED: ${this.scanStatus} -> ${status.status} ***`);
-                    this.handleStatusChange(status.status, status.task_queue_size);
-                }
-                
-                this.scanStatus = status.status;
-            } else {
-                console.error('Status update failed:', response);
-                // Don't show error for status updates to avoid spam
-            }
-            
-        } catch (error) {
-            console.error('Failed to update status:', error);
-            this.showConnectionError();
-        }
-    }
-    
-    /**
-     * Update graph from server
+     * FIXED: Update graph from server with enhanced debugging
     */
    async updateGraph() {
        try {
-            console.log('Updating graph...');
+            console.log('Updating graph via API call...');
            const response = await this.apiCall('/api/graph');
            
            
            if (response.success) {
                const graphData = response.graph;
                
-                console.log('Graph data received:');
+                console.log('Graph data received from API:');
                console.log('- Nodes:', graphData.nodes ? graphData.nodes.length : 0);
                console.log('- Edges:', graphData.edges ? graphData.edges.length : 0);
                
                // FIXED: Always update graph, even if empty - let GraphManager handle placeholder
                if (this.graphManager) {
+                    console.log('🔧 Calling GraphManager.updateGraph from API response...');
                    this.graphManager.updateGraph(graphData);
                    this.lastGraphUpdate = Date.now();
                    
@@ -636,6 +769,8 @@ class DNSReconApp {
                    if (this.elements.relationshipsDisplay) {
                        this.elements.relationshipsDisplay.textContent = edgeCount;
                    }
+                    
+                    console.log('✅ Manual graph update completed');
                }
            } else {
                console.error('Graph update failed:', response);
@@ -731,48 +866,70 @@ class DNSReconApp {
     * @param {string} newStatus - New scan status
     */
    handleStatusChange(newStatus, task_queue_size) {
-        console.log(`=== STATUS CHANGE: ${this.scanStatus} -> ${newStatus} ===`);
+        console.log(`📄 Status change handler: ${this.scanStatus} → ${newStatus}`);
        
        switch (newStatus) {
            case 'running':
                this.setUIState('scanning', task_queue_size);
-                this.showSuccess('Scan is running');
-                // Increase polling frequency for active scans
-                this.startPolling(1000); // Poll every 1 second for running scans
+                this.showSuccess('Scan is running - updates in real-time');
                this.updateConnectionStatus('active');
                break;
                
            case 'completed':
                this.setUIState('completed', task_queue_size);
-                this.stopPolling();
                this.showSuccess('Scan completed successfully');
                this.updateConnectionStatus('completed');
                this.loadProviders();
-                // Force a final graph update
-                console.log('Scan completed - forcing final graph update');
-                setTimeout(() => this.updateGraph(), 100);
+                console.log('✅ Scan completed - requesting final graph update');
+                // Request final status to ensure we have the complete graph
+                setTimeout(() => {
+                    if (this.socket && this.isConnected) {
+                        this.socket.emit('get_status');
+                    }
+                }, 1000);
+                
+                // Clear status request interval
+                /*if (this.statusRequestInterval) {
+                    clearInterval(this.statusRequestInterval);
+                    this.statusRequestInterval = null;
+                }*/
                break;
                
            case 'failed':
                this.setUIState('failed', task_queue_size);
-                this.stopPolling();
                this.showError('Scan failed');
                this.updateConnectionStatus('error');
                this.loadProviders();
+                
+                // Clear status request interval
+                /*if (this.statusRequestInterval) {
+                    clearInterval(this.statusRequestInterval);
+                    this.statusRequestInterval = null;
+                }*/
                break;
                
            case 'stopped':
                this.setUIState('stopped', task_queue_size);
-                this.stopPolling();
                this.showSuccess('Scan stopped');
                this.updateConnectionStatus('stopped');
                this.loadProviders();
+                
+                // Clear status request interval
+                if (this.statusRequestInterval) {
+                    clearInterval(this.statusRequestInterval);
+                    this.statusRequestInterval = null;
+                }
                break;
                
            case 'idle':
                this.setUIState('idle', task_queue_size);
-                this.stopPolling();
                this.updateConnectionStatus('idle');
+                
+                // Clear status request interval
+                /*if (this.statusRequestInterval) {
+                    clearInterval(this.statusRequestInterval);
+                    this.statusRequestInterval = null;
+                }*/
                break;
                
            default:
@@ -824,6 +981,7 @@ class DNSReconApp {
                if (this.graphManager) {
                    this.graphManager.isScanning = true;
                }
+                
                if (this.elements.startScan) {
                    this.elements.startScan.disabled = true;
                    this.elements.startScan.classList.add('loading');
@@ -851,6 +1009,7 @@ class DNSReconApp {
                if (this.graphManager) {
                    this.graphManager.isScanning = false;
                }
+                
                if (this.elements.startScan) {
                    this.elements.startScan.disabled = !isQueueEmpty;
                    this.elements.startScan.classList.remove('loading');
@@ -1093,7 +1252,7 @@ class DNSReconApp {
                } else {
                    // API key not configured - ALWAYS show input field
                    const statusClass = info.enabled ? 'enabled' : 'api-key-required';
-                    const statusText = info.enabled ? '○ Ready for API Key' : '⚠️ API Key Required';
+                    const statusText = info.enabled ? '◯ Ready for API Key' : '⚠️ API Key Required';
                    
                    inputGroup.innerHTML = `
                        <div class="provider-header">
@@ -1397,11 +1556,32 @@ class DNSReconApp {
    }

    /**
-     * UPDATED: Generate details for standard nodes with organized attribute grouping
+     * UPDATED: Generate details for standard nodes with organized attribute grouping and data warnings
     */
    generateStandardNodeDetails(node) {
        let html = '';

+        // Check for and display a crt.sh data warning if it exists
+        const crtshWarningAttr = this.findAttributeByName(node.attributes, 'crtsh_data_warning');
+        if (crtshWarningAttr) {
+            html += `
+                <div class="modal-section" style="border-left: 3px solid #ff9900; background: rgba(255, 153, 0, 0.05);">
+                    <details open>
+                        <summary style="color: #ff9900;">
+                            <span>⚠️ Data Integrity Warning</span>
+                        </summary>
+                        <div class="modal-section-content">
+                            <p class="placeholder-subtext" style="color: #e0e0e0; font-size: 0.8rem; line-height: 1.5;">
+                                ${this.escapeHtml(crtshWarningAttr.value)}
+                                <br><br>
+                                This can occur for very large domains (e.g., google.com) where crt.sh may return a limited subset of all available certificates. As a result, the certificate status may not be fully representative.
+                            </p>
+                        </div>
+                    </details>
+                </div>
+            `;
+        }
+
        // Relationships sections
        html += this.generateRelationshipsSection(node);

@@ -1419,6 +1599,19 @@ class DNSReconApp {
        return html;
    }

+    /**
+     * Helper method to find an attribute by name in the standardized attributes list
+     * @param {Array} attributes - List of StandardAttribute objects
+     * @param {string} name - Attribute name to find
+     * @returns {Object|null} The attribute object if found, null otherwise
+     */
+    findAttributeByName(attributes, name) {
+        if (!Array.isArray(attributes)) {
+            return null;
+        }
+        return attributes.find(attr => attr.name === name) || null;
+    }
+
    generateOrganizedAttributesSection(attributes, nodeType) {
        if (!Array.isArray(attributes) || attributes.length === 0) {
            return '';
@@ -1997,14 +2190,12 @@ class DNSReconApp {
            if (response.success) {
                this.showSuccess(response.message);
                
-                this.hideModal();
-
                // If the scanner was idle, it's now running. Start polling to see the new node appear.
                if (this.scanStatus === 'idle') {
-                    this.startPolling(1000);
+                    this.socket.emit('get_status');
                } else {
                    // If already scanning, force a quick graph update to see the change sooner.
-                    setTimeout(() => this.updateGraph(), 500);
+                    setTimeout(() => this.socket.emit('get_status'), 500);
                }

            } else {
@@ -2043,8 +2234,8 @@ class DNSReconApp {
     */
    getNodeTypeIcon(nodeType) {
        const icons = {
-            'domain': '🌍',
-            'ip': '📍',
+            'domain': '🌐',
+            'ip': '🔢',
            'asn': '🏢',
            'large_entity': '📦',
            'correlation_object': '🔗'
--- a/templates/index.html
+++ b/templates/index.html
@@ -7,6 +7,7 @@
    <title>DNSRecon - Infrastructure Reconnaissance</title>
    <link rel="stylesheet" href="{{ url_for('static', filename='css/main.css') }}">
    <script src="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/socket.io/4.7.2/socket.io.js"></script>
    <link href="https://cdnjs.cloudflare.com/ajax/libs/vis/4.21.0/vis.min.css" rel="stylesheet" type="text/css">
    <link
        href="https://fonts.googleapis.com/css2?family=Roboto+Mono:wght@300;400;500;700&family=Special+Elite&display=swap"
Author	SHA1	Message	Date
overcuriousity	c4e6a8998a	iteration on ws implementation	2025-09-20 16:52:05 +02:00
overcuriousity	75a595c9cb	try to implement websockets	2025-09-20 14:17:17 +02:00
Mario Stöckl	3ee23c9d05	Merge pull request 'remove-large-entity-temporarily' (#3 ) from remove-large-entity-temporarily into main Reviewed-on: mstoeck3/dnsrecon#3	2025-09-19 12:29:26 +00:00
overcuriousity	8d402ab4b1	postgres	2025-09-19 14:28:37 +02:00
overcuriousity	7472e6f416	fixes to hint for incomplete data	2025-09-19 12:35:28 +02:00
overcuriousity	eabb532557	almost fixed	2025-09-19 01:10:07 +02:00
overcuriousity	0a6d12de9a	large entity recreation	2025-09-19 00:38:26 +02:00
overcuriousity	332805709d	remove	2025-09-18 23:44:24 +02:00
overcuriousity	1558731c1c	attempt fix large entity	2025-09-18 23:22:49 +02:00
overcuriousity	95cebbf935	bug fixes, improvements	2025-09-18 22:39:12 +02:00