full re implementation

2025-09-10 13:53:32 +02:00
parent 29e36e34be
commit 696cec0723
32 changed files with 4731 additions and 7955 deletions
--- a/core/init.py
+++ b/core/init.py
@@ -0,0 +1,22 @@
+"""
+Core modules for DNSRecon passive reconnaissance tool.
+Contains graph management, scanning orchestration, and forensic logging.
+"""
+
+from .graph_manager import GraphManager, NodeType, RelationshipType
+from .scanner import Scanner, ScanStatus, scanner
+from .logger import ForensicLogger, get_forensic_logger, new_session
+
+__all__ = [
+    'GraphManager',
+    'NodeType', 
+    'RelationshipType',
+    'Scanner',
+    'ScanStatus',
+    'scanner',
+    'ForensicLogger',
+    'get_forensic_logger',
+    'new_session'
+]
+
+__version__ = "1.0.0-phase1"
--- a/core/graph_manager.py
+++ b/core/graph_manager.py
@@ -0,0 +1,355 @@
+"""
+Graph data model for DNSRecon using NetworkX.
+Manages in-memory graph storage with confidence scoring and forensic metadata.
+"""
+
+import json
+import threading
+from datetime import datetime
+from typing import Dict, List, Any, Optional, Tuple, Set
+from enum import Enum
+
+import networkx as nx
+
+
+class NodeType(Enum):
+    """Enumeration of supported node types."""
+    DOMAIN = "domain"
+    IP = "ip"
+    CERTIFICATE = "certificate"
+    ASN = "asn"
+
+
+class RelationshipType(Enum):
+    """Enumeration of supported relationship types with confidence scores."""
+    SAN_CERTIFICATE = ("san", 0.9)           # Certificate SAN relationships
+    A_RECORD = ("a_record", 0.8)             # A/AAAA record relationships
+    CNAME_RECORD = ("cname", 0.8)            # CNAME relationships
+    PASSIVE_DNS = ("passive_dns", 0.6)       # Passive DNS relationships
+    ASN_MEMBERSHIP = ("asn", 0.7)            # ASN relationships
+    MX_RECORD = ("mx_record", 0.7)           # MX record relationships
+    NS_RECORD = ("ns_record", 0.7)           # NS record relationships
+    
+    def __init__(self, relationship_name: str, default_confidence: float):
+        self.relationship_name = relationship_name
+        self.default_confidence = default_confidence
+
+
+class GraphManager:
+    """
+    Thread-safe graph manager for DNSRecon infrastructure mapping.
+    Uses NetworkX for in-memory graph storage with confidence scoring.
+    """
+    
+    def __init__(self):
+        """Initialize empty directed graph."""
+        self.graph = nx.DiGraph()
+        #self.lock = threading.Lock()
+        self.creation_time = datetime.now(datetime.UTC).isoformat()
+        self.last_modified = self.creation_time
+        
+    def add_node(self, node_id: str, node_type: NodeType, 
+                 metadata: Optional[Dict[str, Any]] = None) -> bool:
+        """
+        Add a node to the graph.
+        
+        Args:
+            node_id: Unique identifier for the node
+            node_type: Type of the node (Domain, IP, Certificate, ASN)
+            metadata: Additional metadata for the node
+            
+        Returns:
+            bool: True if node was added, False if it already exists
+        """
+        if self.graph.has_node(node_id):
+            # Update metadata if node exists
+            existing_metadata = self.graph.nodes[node_id].get('metadata', {})
+            if metadata:
+                existing_metadata.update(metadata)
+                self.graph.nodes[node_id]['metadata'] = existing_metadata
+            return False
+        
+        node_attributes = {
+            'type': node_type.value,
+            'added_timestamp': datetime.now(datetime.UTC).isoformat(),
+            'metadata': metadata or {}
+        }
+        
+        self.graph.add_node(node_id, **node_attributes)
+        self.last_modified = datetime.now(datetime.UTC).isoformat()
+        return True
+    
+    def add_edge(self, source_id: str, target_id: str, 
+                 relationship_type: RelationshipType,
+                 confidence_score: Optional[float] = None,
+                 source_provider: str = "unknown",
+                 raw_data: Optional[Dict[str, Any]] = None) -> bool:
+        """
+        Add an edge between two nodes.
+        
+        Args:
+            source_id: Source node identifier
+            target_id: Target node identifier  
+            relationship_type: Type of relationship
+            confidence_score: Custom confidence score (overrides default)
+            source_provider: Provider that discovered this relationship
+            raw_data: Raw data from provider response
+            
+        Returns:
+            bool: True if edge was added, False if it already exists
+        """
+        #with self.lock:
+        # Ensure both nodes exist
+        if not self.graph.has_node(source_id) or not self.graph.has_node(target_id):
+            return False
+        
+        # Check if edge already exists
+        if self.graph.has_edge(source_id, target_id):
+            # Update confidence score if new score is higher
+            existing_confidence = self.graph.edges[source_id, target_id]['confidence_score']
+            new_confidence = confidence_score or relationship_type.default_confidence
+            
+            if new_confidence > existing_confidence:
+                self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
+                self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(datetime.UTC).isoformat()
+                self.graph.edges[source_id, target_id]['updated_by'] = source_provider
+            
+            return False
+        
+        edge_attributes = {
+            'relationship_type': relationship_type.relationship_name,
+            'confidence_score': confidence_score or relationship_type.default_confidence,
+            'source_provider': source_provider,
+            'discovery_timestamp': datetime.now(datetime.UTC).isoformat(),
+            'raw_data': raw_data or {}
+        }
+        
+        self.graph.add_edge(source_id, target_id, **edge_attributes)
+        self.last_modified = datetime.now(datetime.UTC).isoformat()
+        return True
+
+    def get_node_count(self) -> int:
+        """Get total number of nodes in the graph."""
+        #with self.lock:
+        return self.graph.number_of_nodes()
+    
+    def get_edge_count(self) -> int:
+        """Get total number of edges in the graph."""
+        #with self.lock:
+        return self.graph.number_of_edges()
+    
+    def get_nodes_by_type(self, node_type: NodeType) -> List[str]:
+        """
+        Get all nodes of a specific type.
+        
+        Args:
+            node_type: Type of nodes to retrieve
+            
+        Returns:
+            List of node identifiers
+        """
+        #with self.lock:
+        return [
+            node_id for node_id, attributes in self.graph.nodes(data=True)
+            if attributes.get('type') == node_type.value
+        ]
+    
+    def get_neighbors(self, node_id: str) -> List[str]:
+        """
+        Get all neighboring nodes (both incoming and outgoing).
+        
+        Args:
+            node_id: Node identifier
+            
+        Returns:
+            List of neighboring node identifiers
+        """
+        #with self.lock:
+        if not self.graph.has_node(node_id):
+            return []
+        
+        predecessors = list(self.graph.predecessors(node_id))
+        successors = list(self.graph.successors(node_id))
+        return list(set(predecessors + successors))
+    
+    def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
+        """
+        Get edges with confidence score above threshold.
+        
+        Args:
+            min_confidence: Minimum confidence threshold
+            
+        Returns:
+            List of tuples (source, target, attributes)
+        """
+        #with self.lock:
+        return [
+            (source, target, attributes)
+            for source, target, attributes in self.graph.edges(data=True)
+            if attributes.get('confidence_score', 0) >= min_confidence
+        ]
+    
+    def get_graph_data(self) -> Dict[str, Any]:
+        """
+        Export graph data for visualization.
+        
+        Returns:
+            Dictionary containing nodes and edges for frontend visualization
+        """
+        #with self.lock:
+        nodes = []
+        edges = []
+        
+        # Format nodes for visualization
+        for node_id, attributes in self.graph.nodes(data=True):
+            node_data = {
+                'id': node_id,
+                'label': node_id,
+                'type': attributes.get('type', 'unknown'),
+                'metadata': attributes.get('metadata', {}),
+                'added_timestamp': attributes.get('added_timestamp')
+            }
+            
+            # Color coding by type
+            type_colors = {
+                'domain': '#00ff41',     # Green for domains
+                'ip': '#ff9900',         # Amber for IPs
+                'certificate': '#c7c7c7', # Gray for certificates
+                'asn': '#00aaff'         # Blue for ASNs
+            }
+            node_data['color'] = type_colors.get(attributes.get('type'), '#ffffff')
+            nodes.append(node_data)
+        
+        # Format edges for visualization
+        for source, target, attributes in self.graph.edges(data=True):
+            edge_data = {
+                'from': source,
+                'to': target,
+                'label': attributes.get('relationship_type', ''),
+                'confidence_score': attributes.get('confidence_score', 0),
+                'source_provider': attributes.get('source_provider', ''),
+                'discovery_timestamp': attributes.get('discovery_timestamp')
+            }
+            
+            # Edge styling based on confidence
+            confidence = attributes.get('confidence_score', 0)
+            if confidence >= 0.8:
+                edge_data['color'] = '#00ff41'  # Green for high confidence
+                edge_data['width'] = 3
+            elif confidence >= 0.6:
+                edge_data['color'] = '#ff9900'  # Amber for medium confidence
+                edge_data['width'] = 2
+            else:
+                edge_data['color'] = '#444444'  # Dark gray for low confidence
+                edge_data['width'] = 1
+            
+            edges.append(edge_data)
+        
+        return {
+            'nodes': nodes,
+            'edges': edges,
+            'statistics': {
+                'node_count': len(nodes),
+                'edge_count': len(edges),
+                'creation_time': self.creation_time,
+                'last_modified': self.last_modified
+            }
+        }
+    
+    def export_json(self) -> Dict[str, Any]:
+        """
+        Export complete graph data as JSON for download.
+        
+        Returns:
+            Dictionary containing complete graph data with metadata
+        """
+        #with self.lock:
+        # Get basic graph data
+        graph_data = self.get_graph_data()
+        
+        # Add comprehensive metadata
+        export_data = {
+            'export_metadata': {
+                'export_timestamp': datetime.now(datetime.UTC).isoformat(),
+                'graph_creation_time': self.creation_time,
+                'last_modified': self.last_modified,
+                'total_nodes': self.graph.number_of_nodes(),
+                'total_edges': self.graph.number_of_edges(),
+                'graph_format': 'dnsrecon_v1'
+            },
+            'nodes': graph_data['nodes'],
+            'edges': graph_data['edges'],
+            'node_types': [node_type.value for node_type in NodeType],
+            'relationship_types': [
+                {
+                    'name': rel_type.relationship_name,
+                    'default_confidence': rel_type.default_confidence
+                }
+                for rel_type in RelationshipType
+            ],
+            'confidence_distribution': self._get_confidence_distribution()
+        }
+        
+        return export_data
+    
+    def _get_confidence_distribution(self) -> Dict[str, int]:
+        """Get distribution of confidence scores."""
+        distribution = {'high': 0, 'medium': 0, 'low': 0}
+        
+        for _, _, attributes in self.graph.edges(data=True):
+            confidence = attributes.get('confidence_score', 0)
+            if confidence >= 0.8:
+                distribution['high'] += 1
+            elif confidence >= 0.6:
+                distribution['medium'] += 1
+            else:
+                distribution['low'] += 1
+        
+        return distribution
+    
+    def get_statistics(self) -> Dict[str, Any]:
+        """
+        Get comprehensive graph statistics.
+        
+        Returns:
+            Dictionary containing various graph metrics
+        """
+        #with self.lock:
+        stats = {
+            'basic_metrics': {
+                'total_nodes': self.graph.number_of_nodes(),
+                'total_edges': self.graph.number_of_edges(),
+                'creation_time': self.creation_time,
+                'last_modified': self.last_modified
+            },
+            'node_type_distribution': {},
+            'relationship_type_distribution': {},
+            'confidence_distribution': self._get_confidence_distribution(),
+            'provider_distribution': {}
+        }
+        
+        # Node type distribution
+        for node_type in NodeType:
+            count = len(self.get_nodes_by_type(node_type))
+            stats['node_type_distribution'][node_type.value] = count
+        
+        # Relationship type distribution
+        for _, _, attributes in self.graph.edges(data=True):
+            rel_type = attributes.get('relationship_type', 'unknown')
+            stats['relationship_type_distribution'][rel_type] = \
+                stats['relationship_type_distribution'].get(rel_type, 0) + 1
+        
+        # Provider distribution
+        for _, _, attributes in self.graph.edges(data=True):
+            provider = attributes.get('source_provider', 'unknown')
+            stats['provider_distribution'][provider] = \
+                stats['provider_distribution'].get(provider, 0) + 1
+        
+        return stats
+    
+    def clear(self) -> None:
+        """Clear all nodes and edges from the graph."""
+        #with self.lock:
+        self.graph.clear()
+        self.creation_time = datetime.now(datetime.UTC).isoformat()
+        self.last_modified = self.creation_time
--- a/core/logger.py
+++ b/core/logger.py
@@ -0,0 +1,270 @@
+"""
+Forensic logging system for DNSRecon tool.
+Provides structured audit trail for all reconnaissance activities.
+"""
+
+import json
+import logging
+import threading
+from datetime import datetime
+from typing import Dict, Any, Optional, List
+from dataclasses import dataclass, asdict
+
+
+@dataclass
+class APIRequest:
+    """Structured representation of an API request for forensic logging."""
+    timestamp: str
+    provider: str
+    url: str
+    method: str
+    status_code: Optional[int]
+    response_size: Optional[int]
+    duration_ms: Optional[float]
+    error: Optional[str]
+    target_indicator: str
+    discovery_context: Optional[str]
+
+
+@dataclass
+class RelationshipDiscovery:
+    """Structured representation of a discovered relationship."""
+    timestamp: str
+    source_node: str
+    target_node: str
+    relationship_type: str
+    confidence_score: float
+    provider: str
+    raw_data: Dict[str, Any]
+    discovery_method: str
+
+
+class ForensicLogger:
+    """
+    Thread-safe forensic logging system for DNSRecon.
+    Maintains detailed audit trail of all reconnaissance activities.
+    """
+    
+    def __init__(self, session_id: str = None):
+        """
+        Initialize forensic logger.
+        
+        Args:
+            session_id: Unique identifier for this reconnaissance session
+        """
+        self.session_id = session_id or self._generate_session_id()
+        #self.lock = threading.Lock()
+        
+        # Initialize audit trail storage
+        self.api_requests: List[APIRequest] = []
+        self.relationships: List[RelationshipDiscovery] = []
+        self.session_metadata = {
+            'session_id': self.session_id,
+            'start_time': datetime.now(datetime.UTC).isoformat(),
+            'end_time': None,
+            'total_requests': 0,
+            'total_relationships': 0,
+            'providers_used': set(),
+            'target_domains': set()
+        }
+        
+        # Configure standard logger
+        self.logger = logging.getLogger(f'dnsrecon.{self.session_id}')
+        self.logger.setLevel(logging.INFO)
+        
+        # Create formatter for structured logging
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+        
+        # Add console handler if not already present
+        if not self.logger.handlers:
+            console_handler = logging.StreamHandler()
+            console_handler.setFormatter(formatter)
+            self.logger.addHandler(console_handler)
+    
+    def _generate_session_id(self) -> str:
+        """Generate unique session identifier."""
+        return f"dnsrecon_{datetime.now(datetime.UTC).strftime('%Y%m%d_%H%M%S')}"
+    
+    def log_api_request(self, provider: str, url: str, method: str = "GET", 
+                       status_code: Optional[int] = None, 
+                       response_size: Optional[int] = None,
+                       duration_ms: Optional[float] = None,
+                       error: Optional[str] = None,
+                       target_indicator: str = "",
+                       discovery_context: Optional[str] = None) -> None:
+        """
+        Log an API request for forensic audit trail.
+        
+        Args:
+            provider: Name of the data provider
+            url: Request URL
+            method: HTTP method
+            status_code: HTTP response status code
+            response_size: Size of response in bytes
+            duration_ms: Request duration in milliseconds
+            error: Error message if request failed
+            target_indicator: The indicator being investigated
+            discovery_context: Context of how this indicator was discovered
+        """
+        #with self.lock:
+        api_request = APIRequest(
+            timestamp=datetime.now(datetime.UTC).isoformat(),
+            provider=provider,
+            url=url,
+            method=method,
+            status_code=status_code,
+            response_size=response_size,
+            duration_ms=duration_ms,
+            error=error,
+            target_indicator=target_indicator,
+            discovery_context=discovery_context
+        )
+        
+        self.api_requests.append(api_request)
+        self.session_metadata['total_requests'] += 1
+        self.session_metadata['providers_used'].add(provider)
+        
+        if target_indicator:
+            self.session_metadata['target_domains'].add(target_indicator)
+        
+        # Log to standard logger
+        if error:
+            self.logger.error(f"API Request Failed - {provider}: {url} - {error}")
+        else:
+            self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
+    
+    def log_relationship_discovery(self, source_node: str, target_node: str,
+                                 relationship_type: str, confidence_score: float,
+                                 provider: str, raw_data: Dict[str, Any],
+                                 discovery_method: str) -> None:
+        """
+        Log discovery of a new relationship between indicators.
+        
+        Args:
+            source_node: Source node identifier
+            target_node: Target node identifier
+            relationship_type: Type of relationship (e.g., 'SAN', 'A_Record')
+            confidence_score: Confidence score (0.0 to 1.0)
+            provider: Provider that discovered this relationship
+            raw_data: Raw data from provider response
+            discovery_method: Method used to discover relationship
+        """
+        #with self.lock:
+        relationship = RelationshipDiscovery(
+            timestamp=datetime.now(datetime.UTC).isoformat(),
+            source_node=source_node,
+            target_node=target_node,
+            relationship_type=relationship_type,
+            confidence_score=confidence_score,
+            provider=provider,
+            raw_data=raw_data,
+            discovery_method=discovery_method
+        )
+        
+        self.relationships.append(relationship)
+        self.session_metadata['total_relationships'] += 1
+        
+        self.logger.info(
+            f"Relationship Discovered - {source_node} -> {target_node} "
+            f"({relationship_type}) - Confidence: {confidence_score:.2f} - Provider: {provider}"
+        )
+    
+    def log_scan_start(self, target_domain: str, recursion_depth: int, 
+                      enabled_providers: List[str]) -> None:
+        """Log the start of a reconnaissance scan."""
+        self.logger.info(f"Scan Started - Target: {target_domain}, Depth: {recursion_depth}")
+        self.logger.info(f"Enabled Providers: {', '.join(enabled_providers)}")
+        
+        #with self.lock:
+        self.session_metadata['target_domains'].add(target_domain)
+    
+    def log_scan_complete(self) -> None:
+        """Log the completion of a reconnaissance scan."""
+        #with self.lock:
+        self.session_metadata['end_time'] = datetime.now(datetime.UTC).isoformat()
+        self.session_metadata['providers_used'] = list(self.session_metadata['providers_used'])
+        self.session_metadata['target_domains'] = list(self.session_metadata['target_domains'])
+        
+        self.logger.info(f"Scan Complete - Session: {self.session_id}")
+        self.logger.info(f"Total API Requests: {self.session_metadata['total_requests']}")
+        self.logger.info(f"Total Relationships: {self.session_metadata['total_relationships']}")
+    
+    def export_audit_trail(self) -> Dict[str, Any]:
+        """
+        Export complete audit trail for forensic analysis.
+        
+        Returns:
+            Dictionary containing complete session audit trail
+        """
+        #with self.lock:
+        return {
+            'session_metadata': self.session_metadata.copy(),
+            'api_requests': [asdict(req) for req in self.api_requests],
+            'relationships': [asdict(rel) for rel in self.relationships],
+            'export_timestamp': datetime.now(datetime.UTC).isoformat()
+        }
+    
+    def get_forensic_summary(self) -> Dict[str, Any]:
+        """
+        Get summary statistics for forensic reporting.
+        
+        Returns:
+            Dictionary containing summary statistics
+        """
+        #with self.lock:
+        provider_stats = {}
+        for provider in self.session_metadata['providers_used']:
+            provider_requests = [req for req in self.api_requests if req.provider == provider]
+            provider_relationships = [rel for rel in self.relationships if rel.provider == provider]
+            
+            provider_stats[provider] = {
+                'total_requests': len(provider_requests),
+                'successful_requests': len([req for req in provider_requests if req.error is None]),
+                'failed_requests': len([req for req in provider_requests if req.error is not None]),
+                'relationships_discovered': len(provider_relationships),
+                'avg_confidence': sum(rel.confidence_score for rel in provider_relationships) / len(provider_relationships) if provider_relationships else 0
+            }
+        
+        return {
+            'session_id': self.session_id,
+            'duration_minutes': self._calculate_session_duration(),
+            'total_requests': self.session_metadata['total_requests'],
+            'total_relationships': self.session_metadata['total_relationships'],
+            'unique_indicators': len(set([rel.source_node for rel in self.relationships] + [rel.target_node for rel in self.relationships])),
+            'provider_statistics': provider_stats
+        }
+    
+    def _calculate_session_duration(self) -> float:
+        """Calculate session duration in minutes."""
+        if not self.session_metadata['end_time']:
+            end_time = datetime.now(datetime.UTC)
+        else:
+            end_time = datetime.fromisoformat(self.session_metadata['end_time'])
+        
+        start_time = datetime.fromisoformat(self.session_metadata['start_time'])
+        duration = (end_time - start_time).total_seconds() / 60
+        return round(duration, 2)
+
+
+# Global logger instance for the current session
+_current_logger: Optional[ForensicLogger] = None
+_logger_lock = threading.Lock()
+
+
+def get_forensic_logger() -> ForensicLogger:
+    """Get or create the current forensic logger instance."""
+    global _current_logger
+    with _logger_lock:
+        if _current_logger is None:
+            _current_logger = ForensicLogger()
+        return _current_logger
+
+
+def new_session() -> ForensicLogger:
+    """Start a new forensic logging session."""
+    global _current_logger
+    with _logger_lock:
+        _current_logger = ForensicLogger()
+        return _current_logger
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -0,0 +1,461 @@
+"""
+Main scanning orchestrator for DNSRecon.
+Coordinates data gathering from multiple providers and builds the infrastructure graph.
+"""
+
+import threading
+import time
+import traceback
+from typing import List, Set, Dict, Any, Optional
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+from core.graph_manager import GraphManager, NodeType, RelationshipType
+from core.logger import get_forensic_logger, new_session
+from providers.crtsh_provider import CrtShProvider
+from config import config
+
+
+class ScanStatus:
+    """Enumeration of scan statuses."""
+    IDLE = "idle"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    STOPPED = "stopped"
+
+
+class Scanner:
+    """
+    Main scanning orchestrator for DNSRecon passive reconnaissance.
+    Manages multi-provider data gathering and graph construction.
+    """
+
+    def __init__(self):
+        """Initialize scanner with default providers and empty graph."""
+        print("Initializing Scanner instance...")
+        
+        try:
+            from providers.base_provider import BaseProvider
+            self.graph = GraphManager()
+            self.providers: List[BaseProvider] = []
+            self.status = ScanStatus.IDLE
+            self.current_target = None
+            self.current_depth = 0
+            self.max_depth = 2
+            self.stop_requested = False
+            self.scan_thread = None
+
+            # Scanning progress tracking
+            self.total_indicators_found = 0
+            self.indicators_processed = 0
+            self.current_indicator = ""
+
+            # Initialize providers
+            print("Calling _initialize_providers...")
+            self._initialize_providers()
+
+            # Initialize logger
+            print("Initializing forensic logger...")
+            self.logger = get_forensic_logger()
+            
+            print("Scanner initialization complete")
+            
+        except Exception as e:
+            print(f"ERROR: Scanner initialization failed: {e}")
+            traceback.print_exc()
+            raise
+
+    def _initialize_providers(self) -> None:
+        """Initialize available providers based on configuration."""
+        self.providers = []
+
+        print("Initializing providers...")
+
+        # Always add free providers
+        if config.is_provider_enabled('crtsh'):
+            try:
+                crtsh_provider = CrtShProvider()
+                if crtsh_provider.is_available():
+                    self.providers.append(crtsh_provider)
+                    print("✓ CrtSh provider initialized successfully")
+                else:
+                    print("✗ CrtSh provider is not available")
+            except Exception as e:
+                print(f"✗ Failed to initialize CrtSh provider: {e}")
+                traceback.print_exc()
+
+        print(f"Initialized {len(self.providers)} providers")
+
+    def _debug_threads(self):
+        """Debug function to show current threads."""
+        print("=== THREAD DEBUG INFO ===")
+        for t in threading.enumerate():
+            print(f"Thread: {t.name} | Alive: {t.is_alive()} | Daemon: {t.daemon}")
+        print("=== END THREAD DEBUG ===")
+
+    def start_scan(self, target_domain: str, max_depth: int = 2) -> bool:
+        """
+        Start a new reconnaissance scan.
+
+        Args:
+            target_domain: Initial domain to investigate
+            max_depth: Maximum recursion depth
+
+        Returns:
+            bool: True if scan started successfully
+        """
+        print(f"Scanner.start_scan called with target='{target_domain}', depth={max_depth}")
+        
+        try:
+            print("Checking current status...")
+            self._debug_threads()
+            
+            if self.status == ScanStatus.RUNNING:
+                print("Scan already running, rejecting new scan")
+                return False
+
+            # Check if we have any providers
+            if not self.providers:
+                print("No providers available, cannot start scan")
+                return False
+
+            print(f"Current status: {self.status}, Providers: {len(self.providers)}")
+
+            # Stop any existing scan thread
+            if self.scan_thread and self.scan_thread.is_alive():
+                print("Stopping existing scan thread...")
+                self.stop_requested = True
+                self.scan_thread.join(timeout=2.0)
+                if self.scan_thread.is_alive():
+                    print("WARNING: Could not stop existing thread")
+                    return False
+
+            # Reset state
+            print("Resetting scanner state...")
+            #print("Running graph.clear()")
+            #self.graph.clear()
+            print("running self.current_target = target_domain.lower().strip()")
+            self.current_target = target_domain.lower().strip()
+            self.max_depth = max_depth
+            self.current_depth = 0
+            self.stop_requested = False
+            self.total_indicators_found = 0
+            self.indicators_processed = 0
+            self.current_indicator = self.current_target
+
+            # Start new forensic session
+            print("Starting new forensic session...")
+            self.logger = new_session()
+
+            # FOR DEBUGGING: Run scan synchronously instead of in thread
+            print("Running scan synchronously for debugging...")
+            self._execute_scan_sync(self.current_target, max_depth)
+            return True
+                
+        except Exception as e:
+            print(f"ERROR: Exception in start_scan: {e}")
+            traceback.print_exc()
+            return False
+
+    def stop_scan(self) -> bool:
+        """
+        Request scan termination.
+
+        Returns:
+            bool: True if stop request was accepted
+        """
+        try:
+            if self.status == ScanStatus.RUNNING:
+                self.stop_requested = True
+                print("Scan stop requested")
+                return True
+            print("No active scan to stop")
+            return False
+        except Exception as e:
+            print(f"ERROR: Exception in stop_scan: {e}")
+            traceback.print_exc()
+            return False
+
+    def get_scan_status(self) -> Dict[str, Any]:
+        """
+        Get current scan status and progress.
+
+        Returns:
+            Dictionary containing scan status information
+        """
+        try:
+            return {
+                'status': self.status,
+                'target_domain': self.current_target,
+                'current_depth': self.current_depth,
+                'max_depth': self.max_depth,
+                'current_indicator': self.current_indicator,
+                'total_indicators_found': self.total_indicators_found,
+                'indicators_processed': self.indicators_processed,
+                'progress_percentage': self._calculate_progress(),
+                'enabled_providers': [provider.get_name() for provider in self.providers],
+                'graph_statistics': self.graph.get_statistics()
+            }
+        except Exception as e:
+            print(f"ERROR: Exception in get_scan_status: {e}")
+            traceback.print_exc()
+            return {
+                'status': 'error',
+                'target_domain': None,
+                'current_depth': 0,
+                'max_depth': 0,
+                'current_indicator': '',
+                'total_indicators_found': 0,
+                'indicators_processed': 0,
+                'progress_percentage': 0.0,
+                'enabled_providers': [],
+                'graph_statistics': {}
+            }
+
+    def _calculate_progress(self) -> float:
+        """Calculate scan progress percentage."""
+        if self.total_indicators_found == 0:
+            return 0.0
+        return min(100.0, (self.indicators_processed / self.total_indicators_found) * 100)
+
+    def _execute_scan_sync(self, target_domain: str, max_depth: int) -> None:
+        """
+        Execute the reconnaissance scan synchronously (for debugging).
+
+        Args:
+            target_domain: Target domain to investigate
+            max_depth: Maximum recursion depth
+        """
+        print(f"_execute_scan_sync started for {target_domain} with depth {max_depth}")
+
+        try:
+            print("Setting status to RUNNING")
+            self.status = ScanStatus.RUNNING
+
+            # Log scan start
+            enabled_providers = [provider.get_name() for provider in self.providers]
+            self.logger.log_scan_start(target_domain, max_depth, enabled_providers)
+            print(f"Logged scan start with providers: {enabled_providers}")
+
+            # Initialize with target domain
+            print(f"Adding target domain '{target_domain}' as initial node")
+            self.graph.add_node(target_domain, NodeType.DOMAIN)
+
+            # BFS-style exploration with depth limiting
+            current_level_domains = {target_domain}
+            processed_domains = set()
+
+            print(f"Starting BFS exploration...")
+
+            for depth in range(max_depth + 1):
+                if self.stop_requested:
+                    print(f"Stop requested at depth {depth}")
+                    break
+
+                self.current_depth = depth
+                print(f"Processing depth level {depth} with {len(current_level_domains)} domains")
+
+                if not current_level_domains:
+                    print("No domains to process at this level")
+                    break
+
+                # Update progress tracking
+                self.total_indicators_found += len(current_level_domains)
+                next_level_domains = set()
+
+                # Process domains at current depth level
+                for domain in current_level_domains:
+                    if self.stop_requested:
+                        print(f"Stop requested while processing domain {domain}")
+                        break
+
+                    if domain in processed_domains:
+                        print(f"Domain {domain} already processed, skipping")
+                        continue
+
+                    print(f"Processing domain: {domain}")
+                    self.current_indicator = domain
+                    self.indicators_processed += 1
+
+                    # Query all providers for this domain
+                    discovered_domains = self._query_providers_for_domain(domain)
+                    print(f"Discovered {len(discovered_domains)} new domains from {domain}")
+
+                    # Add discovered domains to next level if not at max depth
+                    if depth < max_depth:
+                        for discovered_domain in discovered_domains:
+                            if discovered_domain not in processed_domains:
+                                next_level_domains.add(discovered_domain)
+                                print(f"Adding {discovered_domain} to next level")
+
+                    processed_domains.add(domain)
+
+                current_level_domains = next_level_domains
+                print(f"Completed depth {depth}, {len(next_level_domains)} domains for next level")
+
+            # Finalize scan
+            if self.stop_requested:
+                self.status = ScanStatus.STOPPED
+                print("Scan completed with STOPPED status")
+            else:
+                self.status = ScanStatus.COMPLETED
+                print("Scan completed with COMPLETED status")
+
+            self.logger.log_scan_complete()
+
+            # Print final statistics
+            stats = self.graph.get_statistics()
+            print(f"Final scan statistics:")
+            print(f"  - Total nodes: {stats['basic_metrics']['total_nodes']}")
+            print(f"  - Total edges: {stats['basic_metrics']['total_edges']}")
+            print(f"  - Domains processed: {len(processed_domains)}")
+
+        except Exception as e:
+            print(f"ERROR: Scan execution failed with error: {e}")
+            traceback.print_exc()
+            self.status = ScanStatus.FAILED
+            self.logger.logger.error(f"Scan failed: {e}")
+
+    def _query_providers_for_domain(self, domain: str) -> Set[str]:
+        """
+        Query all enabled providers for information about a domain.
+
+        Args:
+            domain: Domain to investigate
+
+        Returns:
+            Set of newly discovered domains
+        """
+        print(f"Querying {len(self.providers)} providers for domain: {domain}")
+        discovered_domains = set()
+
+        if not self.providers:
+            print("No providers available")
+            return discovered_domains
+
+        # Query providers sequentially for debugging
+        for provider in self.providers:
+            if self.stop_requested:
+                print("Stop requested, cancelling provider queries")
+                break
+
+            try:
+                print(f"Querying provider: {provider.get_name()}")
+                relationships = provider.query_domain(domain)
+                print(f"Provider {provider.get_name()} returned {len(relationships)} relationships")
+
+                for source, target, rel_type, confidence, raw_data in relationships:
+                    print(f"Processing relationship: {source} -> {target} ({rel_type.relationship_name})")
+
+                    # Add target node to graph if it doesn't exist
+                    self.graph.add_node(target, NodeType.DOMAIN)
+
+                    # Add relationship
+                    success = self.graph.add_edge(
+                        source, target, rel_type, confidence,
+                        provider.get_name(), raw_data
+                    )
+
+                    if success:
+                        print(f"Added new relationship: {source} -> {target}")
+                    else:
+                        print(f"Relationship already exists or failed to add: {source} -> {target}")
+
+                    discovered_domains.add(target)
+
+            except Exception as e:
+                print(f"Provider {provider.get_name()} failed for {domain}: {e}")
+                traceback.print_exc()
+                self.logger.logger.error(f"Provider {provider.get_name()} failed for {domain}: {e}")
+
+        print(f"Total unique domains discovered: {len(discovered_domains)}")
+        return discovered_domains
+
+    def get_graph_data(self) -> Dict[str, Any]:
+        """
+        Get current graph data for visualization.
+
+        Returns:
+            Graph data formatted for frontend
+        """
+        return self.graph.get_graph_data()
+
+    def export_results(self) -> Dict[str, Any]:
+        """
+        Export complete scan results including graph and audit trail.
+
+        Returns:
+            Dictionary containing complete scan results
+        """
+        # Get graph data
+        graph_data = self.graph.export_json()
+
+        # Get forensic audit trail
+        audit_trail = self.logger.export_audit_trail()
+
+        # Get provider statistics
+        provider_stats = {}
+        for provider in self.providers:
+            provider_stats[provider.get_name()] = provider.get_statistics()
+
+        # Combine all results
+        export_data = {
+            'scan_metadata': {
+                'target_domain': self.current_target,
+                'max_depth': self.max_depth,
+                'final_status': self.status,
+                'total_indicators_processed': self.indicators_processed,
+                'enabled_providers': list(provider_stats.keys())
+            },
+            'graph_data': graph_data,
+            'forensic_audit': audit_trail,
+            'provider_statistics': provider_stats,
+            'scan_summary': self.logger.get_forensic_summary()
+        }
+
+        return export_data
+
+    def remove_provider(self, provider_name: str) -> bool:
+        """
+        Remove a provider from the scanner.
+
+        Args:
+            provider_name: Name of provider to remove
+
+        Returns:
+            bool: True if provider was removed
+        """
+        for i, provider in enumerate(self.providers):
+            if provider.get_name() == provider_name:
+                self.providers.pop(i)
+                return True
+        return False
+
+    def get_provider_statistics(self) -> Dict[str, Dict[str, Any]]:
+        """
+        Get statistics for all providers.
+
+        Returns:
+            Dictionary mapping provider names to their statistics
+        """
+        stats = {}
+        for provider in self.providers:
+            stats[provider.get_name()] = provider.get_statistics()
+        return stats
+
+
+class ScannerProxy:
+    def __init__(self):
+        self._scanner = None
+        print("ScannerProxy initialized")
+
+    def __getattr__(self, name):
+        if self._scanner is None:
+            print("Creating new Scanner instance...")
+            self._scanner = Scanner()
+            print("Scanner instance created")
+        return getattr(self._scanner, name)
+
+
+# Global scanner instance
+scanner = ScannerProxy()