This commit is contained in:
overcuriousity
2025-09-10 15:17:17 +02:00
parent 696cec0723
commit ce0e11cf0b
16 changed files with 2577 additions and 484 deletions

View File

@@ -1,6 +1,7 @@
"""
Core modules for DNSRecon passive reconnaissance tool.
Contains graph management, scanning orchestration, and forensic logging.
Phase 2: Enhanced with concurrent processing and real-time capabilities.
"""
from .graph_manager import GraphManager, NodeType, RelationshipType
@@ -19,4 +20,4 @@ __all__ = [
'new_session'
]
__version__ = "1.0.0-phase1"
__version__ = "1.0.0-phase2"

View File

@@ -8,6 +8,7 @@ import threading
from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple, Set
from enum import Enum
from datetime import timezone
import networkx as nx
@@ -44,8 +45,8 @@ class GraphManager:
def __init__(self):
"""Initialize empty directed graph."""
self.graph = nx.DiGraph()
#self.lock = threading.Lock()
self.creation_time = datetime.now(datetime.UTC).isoformat()
# self.lock = threading.Lock()
self.creation_time = datetime.now(timezone.utc).isoformat()
self.last_modified = self.creation_time
def add_node(self, node_id: str, node_type: NodeType,
@@ -71,12 +72,12 @@ class GraphManager:
node_attributes = {
'type': node_type.value,
'added_timestamp': datetime.now(datetime.UTC).isoformat(),
'added_timestamp': datetime.now(timezone.utc).isoformat(),
'metadata': metadata or {}
}
self.graph.add_node(node_id, **node_attributes)
self.last_modified = datetime.now(datetime.UTC).isoformat()
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def add_edge(self, source_id: str, target_id: str,
@@ -111,7 +112,7 @@ class GraphManager:
if new_confidence > existing_confidence:
self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(datetime.UTC).isoformat()
self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
self.graph.edges[source_id, target_id]['updated_by'] = source_provider
return False
@@ -120,12 +121,12 @@ class GraphManager:
'relationship_type': relationship_type.relationship_name,
'confidence_score': confidence_score or relationship_type.default_confidence,
'source_provider': source_provider,
'discovery_timestamp': datetime.now(datetime.UTC).isoformat(),
'discovery_timestamp': datetime.now(timezone.utc).isoformat(),
'raw_data': raw_data or {}
}
self.graph.add_edge(source_id, target_id, **edge_attributes)
self.last_modified = datetime.now(datetime.UTC).isoformat()
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def get_node_count(self) -> int:
@@ -210,14 +211,36 @@ class GraphManager:
'added_timestamp': attributes.get('added_timestamp')
}
# Color coding by type
# Color coding by type - now returns color objects for enhanced visualization
type_colors = {
'domain': '#00ff41', # Green for domains
'ip': '#ff9900', # Amber for IPs
'certificate': '#c7c7c7', # Gray for certificates
'asn': '#00aaff' # Blue for ASNs
'domain': {
'background': '#00ff41',
'border': '#00aa2e',
'highlight': {'background': '#44ff75', 'border': '#00ff41'},
'hover': {'background': '#22ff63', 'border': '#00cc35'}
},
'ip': {
'background': '#ff9900',
'border': '#cc7700',
'highlight': {'background': '#ffbb44', 'border': '#ff9900'},
'hover': {'background': '#ffaa22', 'border': '#dd8800'}
},
'certificate': {
'background': '#c7c7c7',
'border': '#999999',
'highlight': {'background': '#e0e0e0', 'border': '#c7c7c7'},
'hover': {'background': '#d4d4d4', 'border': '#aaaaaa'}
},
'asn': {
'background': '#00aaff',
'border': '#0088cc',
'highlight': {'background': '#44ccff', 'border': '#00aaff'},
'hover': {'background': '#22bbff', 'border': '#0099dd'}
}
}
node_data['color'] = type_colors.get(attributes.get('type'), '#ffffff')
node_color_config = type_colors.get(attributes.get('type', 'unknown'), type_colors['domain'])
node_data['color'] = node_color_config
nodes.append(node_data)
# Format edges for visualization
@@ -231,17 +254,36 @@ class GraphManager:
'discovery_timestamp': attributes.get('discovery_timestamp')
}
# Edge styling based on confidence
# Enhanced edge styling based on confidence
confidence = attributes.get('confidence_score', 0)
if confidence >= 0.8:
edge_data['color'] = '#00ff41' # Green for high confidence
edge_data['width'] = 3
edge_data['color'] = {
'color': '#00ff41',
'highlight': '#44ff75',
'hover': '#22ff63',
'inherit': False
}
edge_data['width'] = 4
elif confidence >= 0.6:
edge_data['color'] = '#ff9900' # Amber for medium confidence
edge_data['width'] = 2
edge_data['color'] = {
'color': '#ff9900',
'highlight': '#ffbb44',
'hover': '#ffaa22',
'inherit': False
}
edge_data['width'] = 3
else:
edge_data['color'] = '#444444' # Dark gray for low confidence
edge_data['width'] = 1
edge_data['color'] = {
'color': '#666666',
'highlight': '#888888',
'hover': '#777777',
'inherit': False
}
edge_data['width'] = 2
# Add dashed line for low confidence
if confidence < 0.6:
edge_data['dashes'] = [5, 5]
edges.append(edge_data)
@@ -270,7 +312,7 @@ class GraphManager:
# Add comprehensive metadata
export_data = {
'export_metadata': {
'export_timestamp': datetime.now(datetime.UTC).isoformat(),
'export_timestamp': datetime.now(timezone.utc).isoformat(),
'graph_creation_time': self.creation_time,
'last_modified': self.last_modified,
'total_nodes': self.graph.number_of_nodes(),
@@ -351,5 +393,5 @@ class GraphManager:
"""Clear all nodes and edges from the graph."""
#with self.lock:
self.graph.clear()
self.creation_time = datetime.now(datetime.UTC).isoformat()
self.creation_time = datetime.now(timezone.utc).isoformat()
self.last_modified = self.creation_time

View File

@@ -9,6 +9,7 @@ import threading
from datetime import datetime
from typing import Dict, Any, Optional, List
from dataclasses import dataclass, asdict
from datetime import timezone
@dataclass
@@ -60,7 +61,7 @@ class ForensicLogger:
self.relationships: List[RelationshipDiscovery] = []
self.session_metadata = {
'session_id': self.session_id,
'start_time': datetime.now(datetime.UTC).isoformat(),
'start_time': datetime.now(timezone.utc).isoformat(),
'end_time': None,
'total_requests': 0,
'total_relationships': 0,
@@ -85,7 +86,7 @@ class ForensicLogger:
def _generate_session_id(self) -> str:
"""Generate unique session identifier."""
return f"dnsrecon_{datetime.now(datetime.UTC).strftime('%Y%m%d_%H%M%S')}"
return f"dnsrecon_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}"
def log_api_request(self, provider: str, url: str, method: str = "GET",
status_code: Optional[int] = None,
@@ -110,7 +111,7 @@ class ForensicLogger:
"""
#with self.lock:
api_request = APIRequest(
timestamp=datetime.now(datetime.UTC).isoformat(),
timestamp=datetime.now(timezone.utc).isoformat(),
provider=provider,
url=url,
method=method,
@@ -153,7 +154,7 @@ class ForensicLogger:
"""
#with self.lock:
relationship = RelationshipDiscovery(
timestamp=datetime.now(datetime.UTC).isoformat(),
timestamp=datetime.now(timezone.utc).isoformat(),
source_node=source_node,
target_node=target_node,
relationship_type=relationship_type,
@@ -183,7 +184,7 @@ class ForensicLogger:
def log_scan_complete(self) -> None:
"""Log the completion of a reconnaissance scan."""
#with self.lock:
self.session_metadata['end_time'] = datetime.now(datetime.UTC).isoformat()
self.session_metadata['end_time'] = datetime.now(timezone.utc).isoformat()
self.session_metadata['providers_used'] = list(self.session_metadata['providers_used'])
self.session_metadata['target_domains'] = list(self.session_metadata['target_domains'])
@@ -203,7 +204,7 @@ class ForensicLogger:
'session_metadata': self.session_metadata.copy(),
'api_requests': [asdict(req) for req in self.api_requests],
'relationships': [asdict(rel) for rel in self.relationships],
'export_timestamp': datetime.now(datetime.UTC).isoformat()
'export_timestamp': datetime.now(timezone.utc).isoformat()
}
def get_forensic_summary(self) -> Dict[str, Any]:
@@ -239,7 +240,7 @@ class ForensicLogger:
def _calculate_session_duration(self) -> float:
"""Calculate session duration in minutes."""
if not self.session_metadata['end_time']:
end_time = datetime.now(datetime.UTC)
end_time = datetime.now(timezone.utc)
else:
end_time = datetime.fromisoformat(self.session_metadata['end_time'])

View File

@@ -6,12 +6,15 @@ Coordinates data gathering from multiple providers and builds the infrastructure
import threading
import time
import traceback
from typing import List, Set, Dict, Any, Optional
from typing import List, Set, Dict, Any, Optional, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
from core.graph_manager import GraphManager, NodeType, RelationshipType
from core.logger import get_forensic_logger, new_session
from providers.crtsh_provider import CrtShProvider
from providers.dns_provider import DNSProvider
from providers.shodan_provider import ShodanProvider
from providers.virustotal_provider import VirusTotalProvider
from config import config
@@ -27,17 +30,16 @@ class ScanStatus:
class Scanner:
"""
Main scanning orchestrator for DNSRecon passive reconnaissance.
Manages multi-provider data gathering and graph construction.
Manages multi-provider data gathering and graph construction with concurrent processing.
"""
def __init__(self):
"""Initialize scanner with default providers and empty graph."""
"""Initialize scanner with all available providers and empty graph."""
print("Initializing Scanner instance...")
try:
from providers.base_provider import BaseProvider
self.graph = GraphManager()
self.providers: List[BaseProvider] = []
self.providers = []
self.status = ScanStatus.IDLE
self.current_target = None
self.current_depth = 0
@@ -49,6 +51,9 @@ class Scanner:
self.total_indicators_found = 0
self.indicators_processed = 0
self.current_indicator = ""
# Concurrent processing configuration
self.max_workers = config.max_concurrent_requests
# Initialize providers
print("Calling _initialize_providers...")
@@ -66,36 +71,54 @@ class Scanner:
raise
def _initialize_providers(self) -> None:
"""Initialize available providers based on configuration."""
"""Initialize all available providers based on configuration."""
self.providers = []
print("Initializing providers...")
# Always add free providers
if config.is_provider_enabled('crtsh'):
try:
crtsh_provider = CrtShProvider()
if crtsh_provider.is_available():
self.providers.append(crtsh_provider)
print("✓ CrtSh provider initialized successfully")
else:
print("✗ CrtSh provider is not available")
except Exception as e:
print(f"✗ Failed to initialize CrtSh provider: {e}")
traceback.print_exc()
free_providers = [
('crtsh', CrtShProvider),
('dns', DNSProvider)
]
for provider_name, provider_class in free_providers:
if config.is_provider_enabled(provider_name):
try:
provider = provider_class()
if provider.is_available():
self.providers.append(provider)
print(f"{provider_name.title()} provider initialized successfully")
else:
print(f"{provider_name.title()} provider is not available")
except Exception as e:
print(f"✗ Failed to initialize {provider_name.title()} provider: {e}")
traceback.print_exc()
# Add API key-dependent providers
api_providers = [
('shodan', ShodanProvider),
('virustotal', VirusTotalProvider)
]
for provider_name, provider_class in api_providers:
if config.is_provider_enabled(provider_name):
try:
provider = provider_class()
if provider.is_available():
self.providers.append(provider)
print(f"{provider_name.title()} provider initialized successfully")
else:
print(f"{provider_name.title()} provider is not available (API key required)")
except Exception as e:
print(f"✗ Failed to initialize {provider_name.title()} provider: {e}")
traceback.print_exc()
print(f"Initialized {len(self.providers)} providers")
def _debug_threads(self):
"""Debug function to show current threads."""
print("=== THREAD DEBUG INFO ===")
for t in threading.enumerate():
print(f"Thread: {t.name} | Alive: {t.is_alive()} | Daemon: {t.daemon}")
print("=== END THREAD DEBUG ===")
def start_scan(self, target_domain: str, max_depth: int = 2) -> bool:
"""
Start a new reconnaissance scan.
Start a new reconnaissance scan with concurrent processing.
Args:
target_domain: Initial domain to investigate
@@ -107,9 +130,6 @@ class Scanner:
print(f"Scanner.start_scan called with target='{target_domain}', depth={max_depth}")
try:
print("Checking current status...")
self._debug_threads()
if self.status == ScanStatus.RUNNING:
print("Scan already running, rejecting new scan")
return False
@@ -119,8 +139,6 @@ class Scanner:
print("No providers available, cannot start scan")
return False
print(f"Current status: {self.status}, Providers: {len(self.providers)}")
# Stop any existing scan thread
if self.scan_thread and self.scan_thread.is_alive():
print("Stopping existing scan thread...")
@@ -132,9 +150,7 @@ class Scanner:
# Reset state
print("Resetting scanner state...")
#print("Running graph.clear()")
#self.graph.clear()
print("running self.current_target = target_domain.lower().strip()")
self.graph.clear()
self.current_target = target_domain.lower().strip()
self.max_depth = max_depth
self.current_depth = 0
@@ -147,9 +163,15 @@ class Scanner:
print("Starting new forensic session...")
self.logger = new_session()
# FOR DEBUGGING: Run scan synchronously instead of in thread
print("Running scan synchronously for debugging...")
self._execute_scan_sync(self.current_target, max_depth)
# Start scan in separate thread for Phase 2
print("Starting scan thread...")
self.scan_thread = threading.Thread(
target=self._execute_scan_async,
args=(self.current_target, max_depth),
daemon=True
)
self.scan_thread.start()
return True
except Exception as e:
@@ -157,6 +179,321 @@ class Scanner:
traceback.print_exc()
return False
def _execute_scan_async(self, target_domain: str, max_depth: int) -> None:
"""
Execute the reconnaissance scan asynchronously with concurrent provider queries.
Args:
target_domain: Target domain to investigate
max_depth: Maximum recursion depth
"""
print(f"_execute_scan_async started for {target_domain} with depth {max_depth}")
try:
print("Setting status to RUNNING")
self.status = ScanStatus.RUNNING
# Log scan start
enabled_providers = [provider.get_name() for provider in self.providers]
self.logger.log_scan_start(target_domain, max_depth, enabled_providers)
print(f"Logged scan start with providers: {enabled_providers}")
# Initialize with target domain
print(f"Adding target domain '{target_domain}' as initial node")
self.graph.add_node(target_domain, NodeType.DOMAIN)
# BFS-style exploration with depth limiting and concurrent processing
current_level_domains = {target_domain}
processed_domains = set()
all_discovered_ips = set()
print(f"Starting BFS exploration...")
for depth in range(max_depth + 1):
if self.stop_requested:
print(f"Stop requested at depth {depth}")
break
self.current_depth = depth
print(f"Processing depth level {depth} with {len(current_level_domains)} domains")
if not current_level_domains:
print("No domains to process at this level")
break
# Update progress tracking
self.total_indicators_found += len(current_level_domains)
next_level_domains = set()
# Process domains at current depth level with concurrent queries
domain_results = self._process_domains_concurrent(current_level_domains, processed_domains)
for domain, discovered_domains, discovered_ips in domain_results:
if self.stop_requested:
break
processed_domains.add(domain)
all_discovered_ips.update(discovered_ips)
# Add discovered domains to next level if not at max depth
if depth < max_depth:
for discovered_domain in discovered_domains:
if discovered_domain not in processed_domains:
next_level_domains.add(discovered_domain)
print(f"Adding {discovered_domain} to next level")
# Process discovered IPs concurrently
if all_discovered_ips:
print(f"Processing {len(all_discovered_ips)} discovered IP addresses")
self._process_ips_concurrent(all_discovered_ips)
current_level_domains = next_level_domains
print(f"Completed depth {depth}, {len(next_level_domains)} domains for next level")
# Finalize scan
if self.stop_requested:
self.status = ScanStatus.STOPPED
print("Scan completed with STOPPED status")
else:
self.status = ScanStatus.COMPLETED
print("Scan completed with COMPLETED status")
self.logger.log_scan_complete()
# Print final statistics
stats = self.graph.get_statistics()
print(f"Final scan statistics:")
print(f" - Total nodes: {stats['basic_metrics']['total_nodes']}")
print(f" - Total edges: {stats['basic_metrics']['total_edges']}")
print(f" - Domains processed: {len(processed_domains)}")
print(f" - IPs discovered: {len(all_discovered_ips)}")
except Exception as e:
print(f"ERROR: Scan execution failed with error: {e}")
traceback.print_exc()
self.status = ScanStatus.FAILED
self.logger.logger.error(f"Scan failed: {e}")
def _process_domains_concurrent(self, domains: Set[str], processed_domains: Set[str]) -> List[Tuple[str, Set[str], Set[str]]]:
"""
Process multiple domains concurrently using thread pool.
Args:
domains: Set of domains to process
processed_domains: Set of already processed domains
Returns:
List of tuples (domain, discovered_domains, discovered_ips)
"""
results = []
# Filter out already processed domains
domains_to_process = domains - processed_domains
if not domains_to_process:
return results
print(f"Processing {len(domains_to_process)} domains concurrently with {self.max_workers} workers")
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# Submit all domain processing tasks
future_to_domain = {
executor.submit(self._query_providers_for_domain, domain): domain
for domain in domains_to_process
}
# Collect results as they complete
for future in as_completed(future_to_domain):
if self.stop_requested:
break
domain = future_to_domain[future]
try:
discovered_domains, discovered_ips = future.result()
results.append((domain, discovered_domains, discovered_ips))
self.indicators_processed += 1
print(f"Completed processing domain: {domain} ({len(discovered_domains)} domains, {len(discovered_ips)} IPs)")
except Exception as e:
print(f"Error processing domain {domain}: {e}")
traceback.print_exc()
return results
def _process_ips_concurrent(self, ips: Set[str]) -> None:
"""
Process multiple IP addresses concurrently.
Args:
ips: Set of IP addresses to process
"""
if not ips:
return
print(f"Processing {len(ips)} IP addresses concurrently")
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# Submit all IP processing tasks
future_to_ip = {
executor.submit(self._query_providers_for_ip, ip): ip
for ip in ips
}
# Collect results as they complete
for future in as_completed(future_to_ip):
if self.stop_requested:
break
ip = future_to_ip[future]
try:
future.result() # Just wait for completion
print(f"Completed processing IP: {ip}")
except Exception as e:
print(f"Error processing IP {ip}: {e}")
traceback.print_exc()
def _query_providers_for_domain(self, domain: str) -> Tuple[Set[str], Set[str]]:
"""
Query all enabled providers for information about a domain.
Args:
domain: Domain to investigate
Returns:
Tuple of (discovered_domains, discovered_ips)
"""
print(f"Querying {len(self.providers)} providers for domain: {domain}")
discovered_domains = set()
discovered_ips = set()
if not self.providers:
print("No providers available")
return discovered_domains, discovered_ips
# Query providers concurrently for better performance
with ThreadPoolExecutor(max_workers=len(self.providers)) as executor:
# Submit queries for all providers
future_to_provider = {
executor.submit(self._safe_provider_query_domain, provider, domain): provider
for provider in self.providers
}
# Collect results as they complete
for future in as_completed(future_to_provider):
if self.stop_requested:
break
provider = future_to_provider[future]
try:
relationships = future.result()
print(f"Provider {provider.get_name()} returned {len(relationships)} relationships")
for source, target, rel_type, confidence, raw_data in relationships:
# Determine node type based on target
if self._is_valid_ip(target):
target_node_type = NodeType.IP
discovered_ips.add(target)
elif self._is_valid_domain(target):
target_node_type = NodeType.DOMAIN
discovered_domains.add(target)
else:
# Could be ASN or certificate
target_node_type = NodeType.ASN if target.startswith('AS') else NodeType.CERTIFICATE
# Add nodes and relationship to graph
self.graph.add_node(source, NodeType.DOMAIN)
self.graph.add_node(target, target_node_type)
success = self.graph.add_edge(
source, target, rel_type, confidence,
provider.get_name(), raw_data
)
if success:
print(f"Added relationship: {source} -> {target} ({rel_type.relationship_name})")
except Exception as e:
print(f"Provider {provider.get_name()} failed for {domain}: {e}")
print(f"Domain {domain}: discovered {len(discovered_domains)} domains, {len(discovered_ips)} IPs")
return discovered_domains, discovered_ips
def _query_providers_for_ip(self, ip: str) -> None:
"""
Query all enabled providers for information about an IP address.
Args:
ip: IP address to investigate
"""
print(f"Querying {len(self.providers)} providers for IP: {ip}")
if not self.providers:
print("No providers available")
return
# Query providers concurrently
with ThreadPoolExecutor(max_workers=len(self.providers)) as executor:
# Submit queries for all providers
future_to_provider = {
executor.submit(self._safe_provider_query_ip, provider, ip): provider
for provider in self.providers
}
# Collect results as they complete
for future in as_completed(future_to_provider):
if self.stop_requested:
break
provider = future_to_provider[future]
try:
relationships = future.result()
print(f"Provider {provider.get_name()} returned {len(relationships)} relationships for IP {ip}")
for source, target, rel_type, confidence, raw_data in relationships:
# Determine node type based on target
if self._is_valid_domain(target):
target_node_type = NodeType.DOMAIN
elif target.startswith('AS'):
target_node_type = NodeType.ASN
else:
target_node_type = NodeType.IP
# Add nodes and relationship to graph
self.graph.add_node(source, NodeType.IP)
self.graph.add_node(target, target_node_type)
success = self.graph.add_edge(
source, target, rel_type, confidence,
provider.get_name(), raw_data
)
if success:
print(f"Added IP relationship: {source} -> {target} ({rel_type.relationship_name})")
except Exception as e:
print(f"Provider {provider.get_name()} failed for IP {ip}: {e}")
def _safe_provider_query_domain(self, provider, domain: str):
"""Safely query provider for domain with error handling."""
try:
return provider.query_domain(domain)
except Exception as e:
print(f"Provider {provider.get_name()} query_domain failed: {e}")
return []
def _safe_provider_query_ip(self, provider, ip: str):
"""Safely query provider for IP with error handling."""
try:
return provider.query_ip(ip)
except Exception as e:
print(f"Provider {provider.get_name()} query_ip failed: {e}")
return []
def stop_scan(self) -> bool:
"""
Request scan termination.
@@ -218,159 +555,6 @@ class Scanner:
return 0.0
return min(100.0, (self.indicators_processed / self.total_indicators_found) * 100)
def _execute_scan_sync(self, target_domain: str, max_depth: int) -> None:
"""
Execute the reconnaissance scan synchronously (for debugging).
Args:
target_domain: Target domain to investigate
max_depth: Maximum recursion depth
"""
print(f"_execute_scan_sync started for {target_domain} with depth {max_depth}")
try:
print("Setting status to RUNNING")
self.status = ScanStatus.RUNNING
# Log scan start
enabled_providers = [provider.get_name() for provider in self.providers]
self.logger.log_scan_start(target_domain, max_depth, enabled_providers)
print(f"Logged scan start with providers: {enabled_providers}")
# Initialize with target domain
print(f"Adding target domain '{target_domain}' as initial node")
self.graph.add_node(target_domain, NodeType.DOMAIN)
# BFS-style exploration with depth limiting
current_level_domains = {target_domain}
processed_domains = set()
print(f"Starting BFS exploration...")
for depth in range(max_depth + 1):
if self.stop_requested:
print(f"Stop requested at depth {depth}")
break
self.current_depth = depth
print(f"Processing depth level {depth} with {len(current_level_domains)} domains")
if not current_level_domains:
print("No domains to process at this level")
break
# Update progress tracking
self.total_indicators_found += len(current_level_domains)
next_level_domains = set()
# Process domains at current depth level
for domain in current_level_domains:
if self.stop_requested:
print(f"Stop requested while processing domain {domain}")
break
if domain in processed_domains:
print(f"Domain {domain} already processed, skipping")
continue
print(f"Processing domain: {domain}")
self.current_indicator = domain
self.indicators_processed += 1
# Query all providers for this domain
discovered_domains = self._query_providers_for_domain(domain)
print(f"Discovered {len(discovered_domains)} new domains from {domain}")
# Add discovered domains to next level if not at max depth
if depth < max_depth:
for discovered_domain in discovered_domains:
if discovered_domain not in processed_domains:
next_level_domains.add(discovered_domain)
print(f"Adding {discovered_domain} to next level")
processed_domains.add(domain)
current_level_domains = next_level_domains
print(f"Completed depth {depth}, {len(next_level_domains)} domains for next level")
# Finalize scan
if self.stop_requested:
self.status = ScanStatus.STOPPED
print("Scan completed with STOPPED status")
else:
self.status = ScanStatus.COMPLETED
print("Scan completed with COMPLETED status")
self.logger.log_scan_complete()
# Print final statistics
stats = self.graph.get_statistics()
print(f"Final scan statistics:")
print(f" - Total nodes: {stats['basic_metrics']['total_nodes']}")
print(f" - Total edges: {stats['basic_metrics']['total_edges']}")
print(f" - Domains processed: {len(processed_domains)}")
except Exception as e:
print(f"ERROR: Scan execution failed with error: {e}")
traceback.print_exc()
self.status = ScanStatus.FAILED
self.logger.logger.error(f"Scan failed: {e}")
def _query_providers_for_domain(self, domain: str) -> Set[str]:
"""
Query all enabled providers for information about a domain.
Args:
domain: Domain to investigate
Returns:
Set of newly discovered domains
"""
print(f"Querying {len(self.providers)} providers for domain: {domain}")
discovered_domains = set()
if not self.providers:
print("No providers available")
return discovered_domains
# Query providers sequentially for debugging
for provider in self.providers:
if self.stop_requested:
print("Stop requested, cancelling provider queries")
break
try:
print(f"Querying provider: {provider.get_name()}")
relationships = provider.query_domain(domain)
print(f"Provider {provider.get_name()} returned {len(relationships)} relationships")
for source, target, rel_type, confidence, raw_data in relationships:
print(f"Processing relationship: {source} -> {target} ({rel_type.relationship_name})")
# Add target node to graph if it doesn't exist
self.graph.add_node(target, NodeType.DOMAIN)
# Add relationship
success = self.graph.add_edge(
source, target, rel_type, confidence,
provider.get_name(), raw_data
)
if success:
print(f"Added new relationship: {source} -> {target}")
else:
print(f"Relationship already exists or failed to add: {source} -> {target}")
discovered_domains.add(target)
except Exception as e:
print(f"Provider {provider.get_name()} failed for {domain}: {e}")
traceback.print_exc()
self.logger.logger.error(f"Provider {provider.get_name()} failed for {domain}: {e}")
print(f"Total unique domains discovered: {len(discovered_domains)}")
return discovered_domains
def get_graph_data(self) -> Dict[str, Any]:
"""
Get current graph data for visualization.