full re implementation

This commit is contained in:
overcuriousity
2025-09-10 13:53:32 +02:00
parent 29e36e34be
commit 696cec0723
32 changed files with 4731 additions and 7955 deletions

22
core/__init__.py Normal file
View File

@@ -0,0 +1,22 @@
"""
Core modules for DNSRecon passive reconnaissance tool.
Contains graph management, scanning orchestration, and forensic logging.
"""
from .graph_manager import GraphManager, NodeType, RelationshipType
from .scanner import Scanner, ScanStatus, scanner
from .logger import ForensicLogger, get_forensic_logger, new_session
__all__ = [
'GraphManager',
'NodeType',
'RelationshipType',
'Scanner',
'ScanStatus',
'scanner',
'ForensicLogger',
'get_forensic_logger',
'new_session'
]
__version__ = "1.0.0-phase1"

355
core/graph_manager.py Normal file
View File

@@ -0,0 +1,355 @@
"""
Graph data model for DNSRecon using NetworkX.
Manages in-memory graph storage with confidence scoring and forensic metadata.
"""
import json
import threading
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple, Set
from enum import Enum
import networkx as nx
class NodeType(Enum):
"""Enumeration of supported node types."""
DOMAIN = "domain"
IP = "ip"
CERTIFICATE = "certificate"
ASN = "asn"
class RelationshipType(Enum):
"""Enumeration of supported relationship types with confidence scores."""
SAN_CERTIFICATE = ("san", 0.9) # Certificate SAN relationships
A_RECORD = ("a_record", 0.8) # A/AAAA record relationships
CNAME_RECORD = ("cname", 0.8) # CNAME relationships
PASSIVE_DNS = ("passive_dns", 0.6) # Passive DNS relationships
ASN_MEMBERSHIP = ("asn", 0.7) # ASN relationships
MX_RECORD = ("mx_record", 0.7) # MX record relationships
NS_RECORD = ("ns_record", 0.7) # NS record relationships
def __init__(self, relationship_name: str, default_confidence: float):
self.relationship_name = relationship_name
self.default_confidence = default_confidence
class GraphManager:
"""
Thread-safe graph manager for DNSRecon infrastructure mapping.
Uses NetworkX for in-memory graph storage with confidence scoring.
"""
def __init__(self):
"""Initialize empty directed graph."""
self.graph = nx.DiGraph()
#self.lock = threading.Lock()
self.creation_time = datetime.now(datetime.UTC).isoformat()
self.last_modified = self.creation_time
def add_node(self, node_id: str, node_type: NodeType,
metadata: Optional[Dict[str, Any]] = None) -> bool:
"""
Add a node to the graph.
Args:
node_id: Unique identifier for the node
node_type: Type of the node (Domain, IP, Certificate, ASN)
metadata: Additional metadata for the node
Returns:
bool: True if node was added, False if it already exists
"""
if self.graph.has_node(node_id):
# Update metadata if node exists
existing_metadata = self.graph.nodes[node_id].get('metadata', {})
if metadata:
existing_metadata.update(metadata)
self.graph.nodes[node_id]['metadata'] = existing_metadata
return False
node_attributes = {
'type': node_type.value,
'added_timestamp': datetime.now(datetime.UTC).isoformat(),
'metadata': metadata or {}
}
self.graph.add_node(node_id, **node_attributes)
self.last_modified = datetime.now(datetime.UTC).isoformat()
return True
def add_edge(self, source_id: str, target_id: str,
relationship_type: RelationshipType,
confidence_score: Optional[float] = None,
source_provider: str = "unknown",
raw_data: Optional[Dict[str, Any]] = None) -> bool:
"""
Add an edge between two nodes.
Args:
source_id: Source node identifier
target_id: Target node identifier
relationship_type: Type of relationship
confidence_score: Custom confidence score (overrides default)
source_provider: Provider that discovered this relationship
raw_data: Raw data from provider response
Returns:
bool: True if edge was added, False if it already exists
"""
#with self.lock:
# Ensure both nodes exist
if not self.graph.has_node(source_id) or not self.graph.has_node(target_id):
return False
# Check if edge already exists
if self.graph.has_edge(source_id, target_id):
# Update confidence score if new score is higher
existing_confidence = self.graph.edges[source_id, target_id]['confidence_score']
new_confidence = confidence_score or relationship_type.default_confidence
if new_confidence > existing_confidence:
self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(datetime.UTC).isoformat()
self.graph.edges[source_id, target_id]['updated_by'] = source_provider
return False
edge_attributes = {
'relationship_type': relationship_type.relationship_name,
'confidence_score': confidence_score or relationship_type.default_confidence,
'source_provider': source_provider,
'discovery_timestamp': datetime.now(datetime.UTC).isoformat(),
'raw_data': raw_data or {}
}
self.graph.add_edge(source_id, target_id, **edge_attributes)
self.last_modified = datetime.now(datetime.UTC).isoformat()
return True
def get_node_count(self) -> int:
"""Get total number of nodes in the graph."""
#with self.lock:
return self.graph.number_of_nodes()
def get_edge_count(self) -> int:
"""Get total number of edges in the graph."""
#with self.lock:
return self.graph.number_of_edges()
def get_nodes_by_type(self, node_type: NodeType) -> List[str]:
"""
Get all nodes of a specific type.
Args:
node_type: Type of nodes to retrieve
Returns:
List of node identifiers
"""
#with self.lock:
return [
node_id for node_id, attributes in self.graph.nodes(data=True)
if attributes.get('type') == node_type.value
]
def get_neighbors(self, node_id: str) -> List[str]:
"""
Get all neighboring nodes (both incoming and outgoing).
Args:
node_id: Node identifier
Returns:
List of neighboring node identifiers
"""
#with self.lock:
if not self.graph.has_node(node_id):
return []
predecessors = list(self.graph.predecessors(node_id))
successors = list(self.graph.successors(node_id))
return list(set(predecessors + successors))
def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
"""
Get edges with confidence score above threshold.
Args:
min_confidence: Minimum confidence threshold
Returns:
List of tuples (source, target, attributes)
"""
#with self.lock:
return [
(source, target, attributes)
for source, target, attributes in self.graph.edges(data=True)
if attributes.get('confidence_score', 0) >= min_confidence
]
def get_graph_data(self) -> Dict[str, Any]:
"""
Export graph data for visualization.
Returns:
Dictionary containing nodes and edges for frontend visualization
"""
#with self.lock:
nodes = []
edges = []
# Format nodes for visualization
for node_id, attributes in self.graph.nodes(data=True):
node_data = {
'id': node_id,
'label': node_id,
'type': attributes.get('type', 'unknown'),
'metadata': attributes.get('metadata', {}),
'added_timestamp': attributes.get('added_timestamp')
}
# Color coding by type
type_colors = {
'domain': '#00ff41', # Green for domains
'ip': '#ff9900', # Amber for IPs
'certificate': '#c7c7c7', # Gray for certificates
'asn': '#00aaff' # Blue for ASNs
}
node_data['color'] = type_colors.get(attributes.get('type'), '#ffffff')
nodes.append(node_data)
# Format edges for visualization
for source, target, attributes in self.graph.edges(data=True):
edge_data = {
'from': source,
'to': target,
'label': attributes.get('relationship_type', ''),
'confidence_score': attributes.get('confidence_score', 0),
'source_provider': attributes.get('source_provider', ''),
'discovery_timestamp': attributes.get('discovery_timestamp')
}
# Edge styling based on confidence
confidence = attributes.get('confidence_score', 0)
if confidence >= 0.8:
edge_data['color'] = '#00ff41' # Green for high confidence
edge_data['width'] = 3
elif confidence >= 0.6:
edge_data['color'] = '#ff9900' # Amber for medium confidence
edge_data['width'] = 2
else:
edge_data['color'] = '#444444' # Dark gray for low confidence
edge_data['width'] = 1
edges.append(edge_data)
return {
'nodes': nodes,
'edges': edges,
'statistics': {
'node_count': len(nodes),
'edge_count': len(edges),
'creation_time': self.creation_time,
'last_modified': self.last_modified
}
}
def export_json(self) -> Dict[str, Any]:
"""
Export complete graph data as JSON for download.
Returns:
Dictionary containing complete graph data with metadata
"""
#with self.lock:
# Get basic graph data
graph_data = self.get_graph_data()
# Add comprehensive metadata
export_data = {
'export_metadata': {
'export_timestamp': datetime.now(datetime.UTC).isoformat(),
'graph_creation_time': self.creation_time,
'last_modified': self.last_modified,
'total_nodes': self.graph.number_of_nodes(),
'total_edges': self.graph.number_of_edges(),
'graph_format': 'dnsrecon_v1'
},
'nodes': graph_data['nodes'],
'edges': graph_data['edges'],
'node_types': [node_type.value for node_type in NodeType],
'relationship_types': [
{
'name': rel_type.relationship_name,
'default_confidence': rel_type.default_confidence
}
for rel_type in RelationshipType
],
'confidence_distribution': self._get_confidence_distribution()
}
return export_data
def _get_confidence_distribution(self) -> Dict[str, int]:
"""Get distribution of confidence scores."""
distribution = {'high': 0, 'medium': 0, 'low': 0}
for _, _, attributes in self.graph.edges(data=True):
confidence = attributes.get('confidence_score', 0)
if confidence >= 0.8:
distribution['high'] += 1
elif confidence >= 0.6:
distribution['medium'] += 1
else:
distribution['low'] += 1
return distribution
def get_statistics(self) -> Dict[str, Any]:
"""
Get comprehensive graph statistics.
Returns:
Dictionary containing various graph metrics
"""
#with self.lock:
stats = {
'basic_metrics': {
'total_nodes': self.graph.number_of_nodes(),
'total_edges': self.graph.number_of_edges(),
'creation_time': self.creation_time,
'last_modified': self.last_modified
},
'node_type_distribution': {},
'relationship_type_distribution': {},
'confidence_distribution': self._get_confidence_distribution(),
'provider_distribution': {}
}
# Node type distribution
for node_type in NodeType:
count = len(self.get_nodes_by_type(node_type))
stats['node_type_distribution'][node_type.value] = count
# Relationship type distribution
for _, _, attributes in self.graph.edges(data=True):
rel_type = attributes.get('relationship_type', 'unknown')
stats['relationship_type_distribution'][rel_type] = \
stats['relationship_type_distribution'].get(rel_type, 0) + 1
# Provider distribution
for _, _, attributes in self.graph.edges(data=True):
provider = attributes.get('source_provider', 'unknown')
stats['provider_distribution'][provider] = \
stats['provider_distribution'].get(provider, 0) + 1
return stats
def clear(self) -> None:
"""Clear all nodes and edges from the graph."""
#with self.lock:
self.graph.clear()
self.creation_time = datetime.now(datetime.UTC).isoformat()
self.last_modified = self.creation_time

270
core/logger.py Normal file
View File

@@ -0,0 +1,270 @@
"""
Forensic logging system for DNSRecon tool.
Provides structured audit trail for all reconnaissance activities.
"""
import json
import logging
import threading
from datetime import datetime
from typing import Dict, Any, Optional, List
from dataclasses import dataclass, asdict
@dataclass
class APIRequest:
"""Structured representation of an API request for forensic logging."""
timestamp: str
provider: str
url: str
method: str
status_code: Optional[int]
response_size: Optional[int]
duration_ms: Optional[float]
error: Optional[str]
target_indicator: str
discovery_context: Optional[str]
@dataclass
class RelationshipDiscovery:
"""Structured representation of a discovered relationship."""
timestamp: str
source_node: str
target_node: str
relationship_type: str
confidence_score: float
provider: str
raw_data: Dict[str, Any]
discovery_method: str
class ForensicLogger:
"""
Thread-safe forensic logging system for DNSRecon.
Maintains detailed audit trail of all reconnaissance activities.
"""
def __init__(self, session_id: str = None):
"""
Initialize forensic logger.
Args:
session_id: Unique identifier for this reconnaissance session
"""
self.session_id = session_id or self._generate_session_id()
#self.lock = threading.Lock()
# Initialize audit trail storage
self.api_requests: List[APIRequest] = []
self.relationships: List[RelationshipDiscovery] = []
self.session_metadata = {
'session_id': self.session_id,
'start_time': datetime.now(datetime.UTC).isoformat(),
'end_time': None,
'total_requests': 0,
'total_relationships': 0,
'providers_used': set(),
'target_domains': set()
}
# Configure standard logger
self.logger = logging.getLogger(f'dnsrecon.{self.session_id}')
self.logger.setLevel(logging.INFO)
# Create formatter for structured logging
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# Add console handler if not already present
if not self.logger.handlers:
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
self.logger.addHandler(console_handler)
def _generate_session_id(self) -> str:
"""Generate unique session identifier."""
return f"dnsrecon_{datetime.now(datetime.UTC).strftime('%Y%m%d_%H%M%S')}"
def log_api_request(self, provider: str, url: str, method: str = "GET",
status_code: Optional[int] = None,
response_size: Optional[int] = None,
duration_ms: Optional[float] = None,
error: Optional[str] = None,
target_indicator: str = "",
discovery_context: Optional[str] = None) -> None:
"""
Log an API request for forensic audit trail.
Args:
provider: Name of the data provider
url: Request URL
method: HTTP method
status_code: HTTP response status code
response_size: Size of response in bytes
duration_ms: Request duration in milliseconds
error: Error message if request failed
target_indicator: The indicator being investigated
discovery_context: Context of how this indicator was discovered
"""
#with self.lock:
api_request = APIRequest(
timestamp=datetime.now(datetime.UTC).isoformat(),
provider=provider,
url=url,
method=method,
status_code=status_code,
response_size=response_size,
duration_ms=duration_ms,
error=error,
target_indicator=target_indicator,
discovery_context=discovery_context
)
self.api_requests.append(api_request)
self.session_metadata['total_requests'] += 1
self.session_metadata['providers_used'].add(provider)
if target_indicator:
self.session_metadata['target_domains'].add(target_indicator)
# Log to standard logger
if error:
self.logger.error(f"API Request Failed - {provider}: {url} - {error}")
else:
self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
def log_relationship_discovery(self, source_node: str, target_node: str,
relationship_type: str, confidence_score: float,
provider: str, raw_data: Dict[str, Any],
discovery_method: str) -> None:
"""
Log discovery of a new relationship between indicators.
Args:
source_node: Source node identifier
target_node: Target node identifier
relationship_type: Type of relationship (e.g., 'SAN', 'A_Record')
confidence_score: Confidence score (0.0 to 1.0)
provider: Provider that discovered this relationship
raw_data: Raw data from provider response
discovery_method: Method used to discover relationship
"""
#with self.lock:
relationship = RelationshipDiscovery(
timestamp=datetime.now(datetime.UTC).isoformat(),
source_node=source_node,
target_node=target_node,
relationship_type=relationship_type,
confidence_score=confidence_score,
provider=provider,
raw_data=raw_data,
discovery_method=discovery_method
)
self.relationships.append(relationship)
self.session_metadata['total_relationships'] += 1
self.logger.info(
f"Relationship Discovered - {source_node} -> {target_node} "
f"({relationship_type}) - Confidence: {confidence_score:.2f} - Provider: {provider}"
)
def log_scan_start(self, target_domain: str, recursion_depth: int,
enabled_providers: List[str]) -> None:
"""Log the start of a reconnaissance scan."""
self.logger.info(f"Scan Started - Target: {target_domain}, Depth: {recursion_depth}")
self.logger.info(f"Enabled Providers: {', '.join(enabled_providers)}")
#with self.lock:
self.session_metadata['target_domains'].add(target_domain)
def log_scan_complete(self) -> None:
"""Log the completion of a reconnaissance scan."""
#with self.lock:
self.session_metadata['end_time'] = datetime.now(datetime.UTC).isoformat()
self.session_metadata['providers_used'] = list(self.session_metadata['providers_used'])
self.session_metadata['target_domains'] = list(self.session_metadata['target_domains'])
self.logger.info(f"Scan Complete - Session: {self.session_id}")
self.logger.info(f"Total API Requests: {self.session_metadata['total_requests']}")
self.logger.info(f"Total Relationships: {self.session_metadata['total_relationships']}")
def export_audit_trail(self) -> Dict[str, Any]:
"""
Export complete audit trail for forensic analysis.
Returns:
Dictionary containing complete session audit trail
"""
#with self.lock:
return {
'session_metadata': self.session_metadata.copy(),
'api_requests': [asdict(req) for req in self.api_requests],
'relationships': [asdict(rel) for rel in self.relationships],
'export_timestamp': datetime.now(datetime.UTC).isoformat()
}
def get_forensic_summary(self) -> Dict[str, Any]:
"""
Get summary statistics for forensic reporting.
Returns:
Dictionary containing summary statistics
"""
#with self.lock:
provider_stats = {}
for provider in self.session_metadata['providers_used']:
provider_requests = [req for req in self.api_requests if req.provider == provider]
provider_relationships = [rel for rel in self.relationships if rel.provider == provider]
provider_stats[provider] = {
'total_requests': len(provider_requests),
'successful_requests': len([req for req in provider_requests if req.error is None]),
'failed_requests': len([req for req in provider_requests if req.error is not None]),
'relationships_discovered': len(provider_relationships),
'avg_confidence': sum(rel.confidence_score for rel in provider_relationships) / len(provider_relationships) if provider_relationships else 0
}
return {
'session_id': self.session_id,
'duration_minutes': self._calculate_session_duration(),
'total_requests': self.session_metadata['total_requests'],
'total_relationships': self.session_metadata['total_relationships'],
'unique_indicators': len(set([rel.source_node for rel in self.relationships] + [rel.target_node for rel in self.relationships])),
'provider_statistics': provider_stats
}
def _calculate_session_duration(self) -> float:
"""Calculate session duration in minutes."""
if not self.session_metadata['end_time']:
end_time = datetime.now(datetime.UTC)
else:
end_time = datetime.fromisoformat(self.session_metadata['end_time'])
start_time = datetime.fromisoformat(self.session_metadata['start_time'])
duration = (end_time - start_time).total_seconds() / 60
return round(duration, 2)
# Global logger instance for the current session
_current_logger: Optional[ForensicLogger] = None
_logger_lock = threading.Lock()
def get_forensic_logger() -> ForensicLogger:
"""Get or create the current forensic logger instance."""
global _current_logger
with _logger_lock:
if _current_logger is None:
_current_logger = ForensicLogger()
return _current_logger
def new_session() -> ForensicLogger:
"""Start a new forensic logging session."""
global _current_logger
with _logger_lock:
_current_logger = ForensicLogger()
return _current_logger

461
core/scanner.py Normal file
View File

@@ -0,0 +1,461 @@
"""
Main scanning orchestrator for DNSRecon.
Coordinates data gathering from multiple providers and builds the infrastructure graph.
"""
import threading
import time
import traceback
from typing import List, Set, Dict, Any, Optional
from concurrent.futures import ThreadPoolExecutor, as_completed
from core.graph_manager import GraphManager, NodeType, RelationshipType
from core.logger import get_forensic_logger, new_session
from providers.crtsh_provider import CrtShProvider
from config import config
class ScanStatus:
"""Enumeration of scan statuses."""
IDLE = "idle"
RUNNING = "running"
COMPLETED = "completed"
FAILED = "failed"
STOPPED = "stopped"
class Scanner:
"""
Main scanning orchestrator for DNSRecon passive reconnaissance.
Manages multi-provider data gathering and graph construction.
"""
def __init__(self):
"""Initialize scanner with default providers and empty graph."""
print("Initializing Scanner instance...")
try:
from providers.base_provider import BaseProvider
self.graph = GraphManager()
self.providers: List[BaseProvider] = []
self.status = ScanStatus.IDLE
self.current_target = None
self.current_depth = 0
self.max_depth = 2
self.stop_requested = False
self.scan_thread = None
# Scanning progress tracking
self.total_indicators_found = 0
self.indicators_processed = 0
self.current_indicator = ""
# Initialize providers
print("Calling _initialize_providers...")
self._initialize_providers()
# Initialize logger
print("Initializing forensic logger...")
self.logger = get_forensic_logger()
print("Scanner initialization complete")
except Exception as e:
print(f"ERROR: Scanner initialization failed: {e}")
traceback.print_exc()
raise
def _initialize_providers(self) -> None:
"""Initialize available providers based on configuration."""
self.providers = []
print("Initializing providers...")
# Always add free providers
if config.is_provider_enabled('crtsh'):
try:
crtsh_provider = CrtShProvider()
if crtsh_provider.is_available():
self.providers.append(crtsh_provider)
print("✓ CrtSh provider initialized successfully")
else:
print("✗ CrtSh provider is not available")
except Exception as e:
print(f"✗ Failed to initialize CrtSh provider: {e}")
traceback.print_exc()
print(f"Initialized {len(self.providers)} providers")
def _debug_threads(self):
"""Debug function to show current threads."""
print("=== THREAD DEBUG INFO ===")
for t in threading.enumerate():
print(f"Thread: {t.name} | Alive: {t.is_alive()} | Daemon: {t.daemon}")
print("=== END THREAD DEBUG ===")
def start_scan(self, target_domain: str, max_depth: int = 2) -> bool:
"""
Start a new reconnaissance scan.
Args:
target_domain: Initial domain to investigate
max_depth: Maximum recursion depth
Returns:
bool: True if scan started successfully
"""
print(f"Scanner.start_scan called with target='{target_domain}', depth={max_depth}")
try:
print("Checking current status...")
self._debug_threads()
if self.status == ScanStatus.RUNNING:
print("Scan already running, rejecting new scan")
return False
# Check if we have any providers
if not self.providers:
print("No providers available, cannot start scan")
return False
print(f"Current status: {self.status}, Providers: {len(self.providers)}")
# Stop any existing scan thread
if self.scan_thread and self.scan_thread.is_alive():
print("Stopping existing scan thread...")
self.stop_requested = True
self.scan_thread.join(timeout=2.0)
if self.scan_thread.is_alive():
print("WARNING: Could not stop existing thread")
return False
# Reset state
print("Resetting scanner state...")
#print("Running graph.clear()")
#self.graph.clear()
print("running self.current_target = target_domain.lower().strip()")
self.current_target = target_domain.lower().strip()
self.max_depth = max_depth
self.current_depth = 0
self.stop_requested = False
self.total_indicators_found = 0
self.indicators_processed = 0
self.current_indicator = self.current_target
# Start new forensic session
print("Starting new forensic session...")
self.logger = new_session()
# FOR DEBUGGING: Run scan synchronously instead of in thread
print("Running scan synchronously for debugging...")
self._execute_scan_sync(self.current_target, max_depth)
return True
except Exception as e:
print(f"ERROR: Exception in start_scan: {e}")
traceback.print_exc()
return False
def stop_scan(self) -> bool:
"""
Request scan termination.
Returns:
bool: True if stop request was accepted
"""
try:
if self.status == ScanStatus.RUNNING:
self.stop_requested = True
print("Scan stop requested")
return True
print("No active scan to stop")
return False
except Exception as e:
print(f"ERROR: Exception in stop_scan: {e}")
traceback.print_exc()
return False
def get_scan_status(self) -> Dict[str, Any]:
"""
Get current scan status and progress.
Returns:
Dictionary containing scan status information
"""
try:
return {
'status': self.status,
'target_domain': self.current_target,
'current_depth': self.current_depth,
'max_depth': self.max_depth,
'current_indicator': self.current_indicator,
'total_indicators_found': self.total_indicators_found,
'indicators_processed': self.indicators_processed,
'progress_percentage': self._calculate_progress(),
'enabled_providers': [provider.get_name() for provider in self.providers],
'graph_statistics': self.graph.get_statistics()
}
except Exception as e:
print(f"ERROR: Exception in get_scan_status: {e}")
traceback.print_exc()
return {
'status': 'error',
'target_domain': None,
'current_depth': 0,
'max_depth': 0,
'current_indicator': '',
'total_indicators_found': 0,
'indicators_processed': 0,
'progress_percentage': 0.0,
'enabled_providers': [],
'graph_statistics': {}
}
def _calculate_progress(self) -> float:
"""Calculate scan progress percentage."""
if self.total_indicators_found == 0:
return 0.0
return min(100.0, (self.indicators_processed / self.total_indicators_found) * 100)
def _execute_scan_sync(self, target_domain: str, max_depth: int) -> None:
"""
Execute the reconnaissance scan synchronously (for debugging).
Args:
target_domain: Target domain to investigate
max_depth: Maximum recursion depth
"""
print(f"_execute_scan_sync started for {target_domain} with depth {max_depth}")
try:
print("Setting status to RUNNING")
self.status = ScanStatus.RUNNING
# Log scan start
enabled_providers = [provider.get_name() for provider in self.providers]
self.logger.log_scan_start(target_domain, max_depth, enabled_providers)
print(f"Logged scan start with providers: {enabled_providers}")
# Initialize with target domain
print(f"Adding target domain '{target_domain}' as initial node")
self.graph.add_node(target_domain, NodeType.DOMAIN)
# BFS-style exploration with depth limiting
current_level_domains = {target_domain}
processed_domains = set()
print(f"Starting BFS exploration...")
for depth in range(max_depth + 1):
if self.stop_requested:
print(f"Stop requested at depth {depth}")
break
self.current_depth = depth
print(f"Processing depth level {depth} with {len(current_level_domains)} domains")
if not current_level_domains:
print("No domains to process at this level")
break
# Update progress tracking
self.total_indicators_found += len(current_level_domains)
next_level_domains = set()
# Process domains at current depth level
for domain in current_level_domains:
if self.stop_requested:
print(f"Stop requested while processing domain {domain}")
break
if domain in processed_domains:
print(f"Domain {domain} already processed, skipping")
continue
print(f"Processing domain: {domain}")
self.current_indicator = domain
self.indicators_processed += 1
# Query all providers for this domain
discovered_domains = self._query_providers_for_domain(domain)
print(f"Discovered {len(discovered_domains)} new domains from {domain}")
# Add discovered domains to next level if not at max depth
if depth < max_depth:
for discovered_domain in discovered_domains:
if discovered_domain not in processed_domains:
next_level_domains.add(discovered_domain)
print(f"Adding {discovered_domain} to next level")
processed_domains.add(domain)
current_level_domains = next_level_domains
print(f"Completed depth {depth}, {len(next_level_domains)} domains for next level")
# Finalize scan
if self.stop_requested:
self.status = ScanStatus.STOPPED
print("Scan completed with STOPPED status")
else:
self.status = ScanStatus.COMPLETED
print("Scan completed with COMPLETED status")
self.logger.log_scan_complete()
# Print final statistics
stats = self.graph.get_statistics()
print(f"Final scan statistics:")
print(f" - Total nodes: {stats['basic_metrics']['total_nodes']}")
print(f" - Total edges: {stats['basic_metrics']['total_edges']}")
print(f" - Domains processed: {len(processed_domains)}")
except Exception as e:
print(f"ERROR: Scan execution failed with error: {e}")
traceback.print_exc()
self.status = ScanStatus.FAILED
self.logger.logger.error(f"Scan failed: {e}")
def _query_providers_for_domain(self, domain: str) -> Set[str]:
"""
Query all enabled providers for information about a domain.
Args:
domain: Domain to investigate
Returns:
Set of newly discovered domains
"""
print(f"Querying {len(self.providers)} providers for domain: {domain}")
discovered_domains = set()
if not self.providers:
print("No providers available")
return discovered_domains
# Query providers sequentially for debugging
for provider in self.providers:
if self.stop_requested:
print("Stop requested, cancelling provider queries")
break
try:
print(f"Querying provider: {provider.get_name()}")
relationships = provider.query_domain(domain)
print(f"Provider {provider.get_name()} returned {len(relationships)} relationships")
for source, target, rel_type, confidence, raw_data in relationships:
print(f"Processing relationship: {source} -> {target} ({rel_type.relationship_name})")
# Add target node to graph if it doesn't exist
self.graph.add_node(target, NodeType.DOMAIN)
# Add relationship
success = self.graph.add_edge(
source, target, rel_type, confidence,
provider.get_name(), raw_data
)
if success:
print(f"Added new relationship: {source} -> {target}")
else:
print(f"Relationship already exists or failed to add: {source} -> {target}")
discovered_domains.add(target)
except Exception as e:
print(f"Provider {provider.get_name()} failed for {domain}: {e}")
traceback.print_exc()
self.logger.logger.error(f"Provider {provider.get_name()} failed for {domain}: {e}")
print(f"Total unique domains discovered: {len(discovered_domains)}")
return discovered_domains
def get_graph_data(self) -> Dict[str, Any]:
"""
Get current graph data for visualization.
Returns:
Graph data formatted for frontend
"""
return self.graph.get_graph_data()
def export_results(self) -> Dict[str, Any]:
"""
Export complete scan results including graph and audit trail.
Returns:
Dictionary containing complete scan results
"""
# Get graph data
graph_data = self.graph.export_json()
# Get forensic audit trail
audit_trail = self.logger.export_audit_trail()
# Get provider statistics
provider_stats = {}
for provider in self.providers:
provider_stats[provider.get_name()] = provider.get_statistics()
# Combine all results
export_data = {
'scan_metadata': {
'target_domain': self.current_target,
'max_depth': self.max_depth,
'final_status': self.status,
'total_indicators_processed': self.indicators_processed,
'enabled_providers': list(provider_stats.keys())
},
'graph_data': graph_data,
'forensic_audit': audit_trail,
'provider_statistics': provider_stats,
'scan_summary': self.logger.get_forensic_summary()
}
return export_data
def remove_provider(self, provider_name: str) -> bool:
"""
Remove a provider from the scanner.
Args:
provider_name: Name of provider to remove
Returns:
bool: True if provider was removed
"""
for i, provider in enumerate(self.providers):
if provider.get_name() == provider_name:
self.providers.pop(i)
return True
return False
def get_provider_statistics(self) -> Dict[str, Dict[str, Any]]:
"""
Get statistics for all providers.
Returns:
Dictionary mapping provider names to their statistics
"""
stats = {}
for provider in self.providers:
stats[provider.get_name()] = provider.get_statistics()
return stats
class ScannerProxy:
def __init__(self):
self._scanner = None
print("ScannerProxy initialized")
def __getattr__(self, name):
if self._scanner is None:
print("Creating new Scanner instance...")
self._scanner = Scanner()
print("Scanner instance created")
return getattr(self._scanner, name)
# Global scanner instance
scanner = ScannerProxy()