This commit is contained in:
overcuriousity
2025-09-11 00:00:00 +02:00
parent db2101d814
commit 2d485c5703
7 changed files with 373 additions and 365 deletions

View File

@@ -9,6 +9,7 @@ from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple, Set
from enum import Enum
from datetime import timezone
from collections import defaultdict
import networkx as nx
@@ -24,14 +25,28 @@ class NodeType(Enum):
class RelationshipType(Enum):
"""Enumeration of supported relationship types with confidence scores."""
SAN_CERTIFICATE = ("san", 0.9) # Certificate SAN relationships
A_RECORD = ("a_record", 0.8) # A/AAAA record relationships
CNAME_RECORD = ("cname", 0.8) # CNAME relationships
PASSIVE_DNS = ("passive_dns", 0.6) # Passive DNS relationships
ASN_MEMBERSHIP = ("asn", 0.7) # ASN relationships
MX_RECORD = ("mx_record", 0.7) # MX record relationships
NS_RECORD = ("ns_record", 0.7) # NS record relationships
SAN_CERTIFICATE = ("san", 0.9)
A_RECORD = ("a_record", 0.8)
AAAA_RECORD = ("aaaa_record", 0.8)
CNAME_RECORD = ("cname", 0.8)
MX_RECORD = ("mx_record", 0.7)
NS_RECORD = ("ns_record", 0.7)
PTR_RECORD = ("ptr_record", 0.8)
SOA_RECORD = ("soa_record", 0.7)
TXT_RECORD = ("txt_record", 0.7)
SRV_RECORD = ("srv_record", 0.7)
CAA_RECORD = ("caa_record", 0.7)
DNSKEY_RECORD = ("dnskey_record", 0.7)
DS_RECORD = ("ds_record", 0.7)
RRSIG_RECORD = ("rrsig_record", 0.7)
SSHFP_RECORD = ("sshfp_record", 0.7)
TLSA_RECORD = ("tlsa_record", 0.7)
NAPTR_RECORD = ("naptr_record", 0.7)
SPF_RECORD = ("spf_record", 0.7)
PASSIVE_DNS = ("passive_dns", 0.6)
ASN_MEMBERSHIP = ("asn", 0.7)
def __init__(self, relationship_name: str, default_confidence: float):
self.relationship_name = relationship_name
self.default_confidence = default_confidence
@@ -42,24 +57,24 @@ class GraphManager:
Thread-safe graph manager for DNSRecon infrastructure mapping.
Uses NetworkX for in-memory graph storage with confidence scoring.
"""
def __init__(self):
"""Initialize empty directed graph."""
self.graph = nx.DiGraph()
# self.lock = threading.Lock()
self.creation_time = datetime.now(timezone.utc).isoformat()
self.last_modified = self.creation_time
def add_node(self, node_id: str, node_type: NodeType,
def add_node(self, node_id: str, node_type: NodeType,
metadata: Optional[Dict[str, Any]] = None) -> bool:
"""
Add a node to the graph.
Args:
node_id: Unique identifier for the node
node_type: Type of the node (Domain, IP, Certificate, ASN)
metadata: Additional metadata for the node
Returns:
bool: True if node was added, False if it already exists
"""
@@ -70,33 +85,33 @@ class GraphManager:
existing_metadata.update(metadata)
self.graph.nodes[node_id]['metadata'] = existing_metadata
return False
node_attributes = {
'type': node_type.value,
'added_timestamp': datetime.now(timezone.utc).isoformat(),
'metadata': metadata or {}
}
self.graph.add_node(node_id, **node_attributes)
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def add_edge(self, source_id: str, target_id: str,
def add_edge(self, source_id: str, target_id: str,
relationship_type: RelationshipType,
confidence_score: Optional[float] = None,
source_provider: str = "unknown",
raw_data: Optional[Dict[str, Any]] = None) -> bool:
"""
Add an edge between two nodes.
Args:
source_id: Source node identifier
target_id: Target node identifier
target_id: Target node identifier
relationship_type: Type of relationship
confidence_score: Custom confidence score (overrides default)
source_provider: Provider that discovered this relationship
raw_data: Raw data from provider response
Returns:
bool: True if edge was added, False if it already exists
"""
@@ -112,14 +127,14 @@ class GraphManager:
# Update confidence score if new score is higher
existing_confidence = self.graph.edges[source_id, target_id]['confidence_score']
new_confidence = confidence_score or relationship_type.default_confidence
if new_confidence > existing_confidence:
self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
self.graph.edges[source_id, target_id]['updated_by'] = source_provider
return False
edge_attributes = {
'relationship_type': relationship_type.relationship_name,
'confidence_score': confidence_score or relationship_type.default_confidence,
@@ -127,7 +142,7 @@ class GraphManager:
'discovery_timestamp': datetime.now(timezone.utc).isoformat(),
'raw_data': raw_data or {}
}
self.graph.add_edge(source_id, target_id, **edge_attributes)
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
@@ -136,19 +151,19 @@ class GraphManager:
"""Get total number of nodes in the graph."""
#with self.lock:
return self.graph.number_of_nodes()
def get_edge_count(self) -> int:
"""Get total number of edges in the graph."""
#with self.lock:
return self.graph.number_of_edges()
def get_nodes_by_type(self, node_type: NodeType) -> List[str]:
"""
Get all nodes of a specific type.
Args:
node_type: Type of nodes to retrieve
Returns:
List of node identifiers
"""
@@ -157,32 +172,32 @@ class GraphManager:
node_id for node_id, attributes in self.graph.nodes(data=True)
if attributes.get('type') == node_type.value
]
def get_neighbors(self, node_id: str) -> List[str]:
"""
Get all neighboring nodes (both incoming and outgoing).
Args:
node_id: Node identifier
Returns:
List of neighboring node identifiers
"""
#with self.lock:
if not self.graph.has_node(node_id):
return []
predecessors = list(self.graph.predecessors(node_id))
successors = list(self.graph.successors(node_id))
return list(set(predecessors + successors))
def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
"""
Get edges with confidence score above threshold.
Args:
min_confidence: Minimum confidence threshold
Returns:
List of tuples (source, target, attributes)
"""
@@ -192,18 +207,49 @@ class GraphManager:
for source, target, attributes in self.graph.edges(data=True)
if attributes.get('confidence_score', 0) >= min_confidence
]
def get_graph_data(self) -> Dict[str, Any]:
"""
Export graph data for visualization.
Returns:
Dictionary containing nodes and edges for frontend visualization
"""
#with self.lock:
nodes = []
edges = []
# Create a dictionary to hold aggregated data for each node
node_details = defaultdict(lambda: defaultdict(list))
for source, target, attributes in self.graph.edges(data=True):
provider = attributes.get('source_provider', 'unknown')
raw_data = attributes.get('raw_data', {})
if provider == 'dns':
record_type = raw_data.get('query_type', 'UNKNOWN')
value = raw_data.get('value', target)
# DNS data is always about the source node of the query
node_details[source]['dns_records'].append(f"{record_type}: {value}")
elif provider == 'crtsh':
# Data from crt.sh are domain names found in certificates (SANs)
node_details[source]['related_domains_san'].append(target)
elif provider == 'shodan':
# Shodan data is about the IP, which can be either the source or target
source_node_type = self.graph.nodes[source].get('type')
target_node_type = self.graph.nodes[target].get('type')
if source_node_type == 'ip':
node_details[source]['shodan'] = raw_data
elif target_node_type == 'ip':
node_details[target]['shodan'] = raw_data
elif provider == 'virustotal':
# VirusTotal data is about the source node of the query
node_details[source]['virustotal'] = raw_data
# Format nodes for visualization
for node_id, attributes in self.graph.nodes(data=True):
node_data = {
@@ -213,7 +259,18 @@ class GraphManager:
'metadata': attributes.get('metadata', {}),
'added_timestamp': attributes.get('added_timestamp')
}
# Add the aggregated details to the metadata
if node_id in node_details:
for key, value in node_details[node_id].items():
# Use a set to avoid adding duplicate entries to lists
if key in node_data['metadata'] and isinstance(node_data['metadata'][key], list):
existing_values = set(node_data['metadata'][key])
new_values = [v for v in value if v not in existing_values]
node_data['metadata'][key].extend(new_values)
else:
node_data['metadata'][key] = value
# Color coding by type - now returns color objects for enhanced visualization
type_colors = {
'domain': {
@@ -239,18 +296,24 @@ class GraphManager:
'border': '#0088cc',
'highlight': {'background': '#44ccff', 'border': '#00aaff'},
'hover': {'background': '#22bbff', 'border': '#0099dd'}
},
'large_entity': {
'background': '#ff6b6b',
'border': '#cc3a3a',
'highlight': {'background': '#ff8c8c', 'border': '#ff6b6b'},
'hover': {'background': '#ff7a7a', 'border': '#dd4a4a'}
}
}
node_color_config = type_colors.get(attributes.get('type', 'unknown'), type_colors['domain'])
node_data['color'] = node_color_config
# Pass the has_valid_cert metadata for styling
if 'metadata' in attributes and 'has_valid_cert' in attributes['metadata']:
node_data['has_valid_cert'] = attributes['metadata']['has_valid_cert']
nodes.append(node_data)
# Format edges for visualization
for source, target, attributes in self.graph.edges(data=True):
edge_data = {
@@ -261,7 +324,7 @@ class GraphManager:
'source_provider': attributes.get('source_provider', ''),
'discovery_timestamp': attributes.get('discovery_timestamp')
}
# Enhanced edge styling based on confidence
confidence = attributes.get('confidence_score', 0)
if confidence >= 0.8:
@@ -275,7 +338,7 @@ class GraphManager:
elif confidence >= 0.6:
edge_data['color'] = {
'color': '#ff9900',
'highlight': '#ffbb44',
'highlight': '#ffbb44',
'hover': '#ffaa22',
'inherit': False
}
@@ -288,13 +351,13 @@ class GraphManager:
'inherit': False
}
edge_data['width'] = 2
# Add dashed line for low confidence
if confidence < 0.6:
edge_data['dashes'] = [5, 5]
edges.append(edge_data)
return {
'nodes': nodes,
'edges': edges,
@@ -305,18 +368,18 @@ class GraphManager:
'last_modified': self.last_modified
}
}
def export_json(self) -> Dict[str, Any]:
"""
Export complete graph data as JSON for download.
Returns:
Dictionary containing complete graph data with metadata
"""
#with self.lock:
# Get basic graph data
graph_data = self.get_graph_data()
# Add comprehensive metadata
export_data = {
'export_metadata': {
@@ -339,13 +402,13 @@ class GraphManager:
],
'confidence_distribution': self._get_confidence_distribution()
}
return export_data
def _get_confidence_distribution(self) -> Dict[str, int]:
"""Get distribution of confidence scores."""
distribution = {'high': 0, 'medium': 0, 'low': 0}
for _, _, attributes in self.graph.edges(data=True):
confidence = attributes.get('confidence_score', 0)
if confidence >= 0.8:
@@ -354,13 +417,13 @@ class GraphManager:
distribution['medium'] += 1
else:
distribution['low'] += 1
return distribution
def get_statistics(self) -> Dict[str, Any]:
"""
Get comprehensive graph statistics.
Returns:
Dictionary containing various graph metrics
"""
@@ -377,26 +440,26 @@ class GraphManager:
'confidence_distribution': self._get_confidence_distribution(),
'provider_distribution': {}
}
# Node type distribution
for node_type in NodeType:
count = len(self.get_nodes_by_type(node_type))
stats['node_type_distribution'][node_type.value] = count
# Relationship type distribution
for _, _, attributes in self.graph.edges(data=True):
rel_type = attributes.get('relationship_type', 'unknown')
stats['relationship_type_distribution'][rel_type] = \
stats['relationship_type_distribution'].get(rel_type, 0) + 1
# Provider distribution
for _, _, attributes in self.graph.edges(data=True):
provider = attributes.get('source_provider', 'unknown')
stats['provider_distribution'][provider] = \
stats['provider_distribution'].get(provider, 0) + 1
return stats
def clear(self) -> None:
"""Clear all nodes and edges from the graph."""
#with self.lock:

View File

@@ -8,6 +8,7 @@ import time
import traceback
from typing import List, Set, Dict, Any, Optional, Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError
from collections import defaultdict
from core.graph_manager import GraphManager, NodeType, RelationshipType
from core.logger import get_forensic_logger, new_session
@@ -334,9 +335,7 @@ class Scanner:
print(f"Querying {len(self.providers)} providers for domain: {domain}")
discovered_domains = set()
discovered_ips = set()
# Define a threshold for creating a "large entity" node
LARGE_ENTITY_THRESHOLD = 50
relationships_by_type = defaultdict(list)
if not self.providers or self.stop_event.is_set():
return discovered_domains, discovered_ips
@@ -355,35 +354,72 @@ class Scanner:
relationships = future.result()
print(f"Provider {provider.get_name()} returned {len(relationships)} relationships")
# Check if the number of relationships exceeds the threshold
if len(relationships) > LARGE_ENTITY_THRESHOLD:
# Create a single "large entity" node
large_entity_id = f"large_entity_{provider.get_name()}_{domain}"
self.graph.add_node(large_entity_id, NodeType.LARGE_ENTITY, metadata={'count': len(relationships), 'provider': provider.get_name()})
self.graph.add_edge(domain, large_entity_id, RelationshipType.PASSIVE_DNS, 1.0, provider.get_name(), {})
print(f"Created large entity node for {domain} from {provider.get_name()} with {len(relationships)} relationships")
continue # Skip adding individual nodes
for rel in relationships:
relationships_by_type[rel[2]].append(rel)
for source, target, rel_type, confidence, raw_data in relationships:
if self._is_valid_ip(target):
target_node_type = NodeType.IP
discovered_ips.add(target)
elif self._is_valid_domain(target):
target_node_type = NodeType.DOMAIN
discovered_domains.add(target)
else:
target_node_type = NodeType.ASN if target.startswith('AS') else NodeType.CERTIFICATE
self.graph.add_node(source, NodeType.DOMAIN)
self.graph.add_node(target, target_node_type)
if self.graph.add_edge(source, target, rel_type, confidence, provider.get_name(), raw_data):
print(f"Added relationship: {source} -> {target} ({rel_type.relationship_name})")
except (Exception, CancelledError) as e:
print(f"Provider {provider.get_name()} failed for {domain}: {e}")
for rel_type, relationships in relationships_by_type.items():
if len(relationships) > config.large_entity_threshold and rel_type == RelationshipType.SAN_CERTIFICATE:
self._handle_large_entity(domain, relationships, rel_type, provider.get_name())
else:
for source, target, rel_type, confidence, raw_data in relationships:
# Determine if the target should create a new node
create_node = rel_type in [
RelationshipType.A_RECORD,
RelationshipType.AAAA_RECORD,
RelationshipType.CNAME_RECORD,
RelationshipType.MX_RECORD,
RelationshipType.NS_RECORD,
RelationshipType.PTR_RECORD,
RelationshipType.SAN_CERTIFICATE
]
# Determine if the target should be subject to recursion
recurse = rel_type in [
RelationshipType.A_RECORD,
RelationshipType.AAAA_RECORD,
RelationshipType.CNAME_RECORD,
RelationshipType.MX_RECORD,
RelationshipType.SAN_CERTIFICATE
]
if create_node:
target_node_type = NodeType.IP if self._is_valid_ip(target) else NodeType.DOMAIN
self.graph.add_node(target, target_node_type)
if self.graph.add_edge(source, target, rel_type, confidence, provider.get_name(), raw_data):
print(f"Added relationship: {source} -> {target} ({rel_type.relationship_name})")
else:
# For records that don't create nodes, we still want to log the relationship
self.logger.log_relationship_discovery(
source_node=source,
target_node=target,
relationship_type=rel_type.relationship_name,
confidence_score=confidence,
provider=provider.name,
raw_data=raw_data,
discovery_method=f"dns_{rel_type.name.lower()}_record"
)
if recurse:
if self._is_valid_ip(target):
discovered_ips.add(target)
elif self._is_valid_domain(target):
discovered_domains.add(target)
print(f"Domain {domain}: discovered {len(discovered_domains)} domains, {len(discovered_ips)} IPs")
return discovered_domains, discovered_ips
def _handle_large_entity(self, source_domain: str, relationships: list, rel_type: RelationshipType, provider_name: str):
"""
Handles the creation of a large entity node when a threshold is exceeded.
"""
print(f"Large number of {rel_type.name} relationships for {source_domain}. Creating a large entity node.")
entity_name = f"Large collection of {rel_type.name} for {source_domain}"
self.graph.add_node(entity_name, NodeType.LARGE_ENTITY, metadata={"count": len(relationships)})
self.graph.add_edge(source_domain, entity_name, rel_type, 0.9, provider_name, {"info": "Aggregated node"})
def _query_providers_for_ip(self, ip: str) -> None:
"""
Query all enabled providers for information about an IP address.