it
This commit is contained in:
@@ -9,6 +9,7 @@ from datetime import datetime
|
||||
from typing import Dict, List, Any, Optional, Tuple, Set
|
||||
from enum import Enum
|
||||
from datetime import timezone
|
||||
from collections import defaultdict
|
||||
|
||||
import networkx as nx
|
||||
|
||||
@@ -24,14 +25,28 @@ class NodeType(Enum):
|
||||
|
||||
class RelationshipType(Enum):
|
||||
"""Enumeration of supported relationship types with confidence scores."""
|
||||
SAN_CERTIFICATE = ("san", 0.9) # Certificate SAN relationships
|
||||
A_RECORD = ("a_record", 0.8) # A/AAAA record relationships
|
||||
CNAME_RECORD = ("cname", 0.8) # CNAME relationships
|
||||
PASSIVE_DNS = ("passive_dns", 0.6) # Passive DNS relationships
|
||||
ASN_MEMBERSHIP = ("asn", 0.7) # ASN relationships
|
||||
MX_RECORD = ("mx_record", 0.7) # MX record relationships
|
||||
NS_RECORD = ("ns_record", 0.7) # NS record relationships
|
||||
|
||||
SAN_CERTIFICATE = ("san", 0.9)
|
||||
A_RECORD = ("a_record", 0.8)
|
||||
AAAA_RECORD = ("aaaa_record", 0.8)
|
||||
CNAME_RECORD = ("cname", 0.8)
|
||||
MX_RECORD = ("mx_record", 0.7)
|
||||
NS_RECORD = ("ns_record", 0.7)
|
||||
PTR_RECORD = ("ptr_record", 0.8)
|
||||
SOA_RECORD = ("soa_record", 0.7)
|
||||
TXT_RECORD = ("txt_record", 0.7)
|
||||
SRV_RECORD = ("srv_record", 0.7)
|
||||
CAA_RECORD = ("caa_record", 0.7)
|
||||
DNSKEY_RECORD = ("dnskey_record", 0.7)
|
||||
DS_RECORD = ("ds_record", 0.7)
|
||||
RRSIG_RECORD = ("rrsig_record", 0.7)
|
||||
SSHFP_RECORD = ("sshfp_record", 0.7)
|
||||
TLSA_RECORD = ("tlsa_record", 0.7)
|
||||
NAPTR_RECORD = ("naptr_record", 0.7)
|
||||
SPF_RECORD = ("spf_record", 0.7)
|
||||
PASSIVE_DNS = ("passive_dns", 0.6)
|
||||
ASN_MEMBERSHIP = ("asn", 0.7)
|
||||
|
||||
|
||||
def __init__(self, relationship_name: str, default_confidence: float):
|
||||
self.relationship_name = relationship_name
|
||||
self.default_confidence = default_confidence
|
||||
@@ -42,24 +57,24 @@ class GraphManager:
|
||||
Thread-safe graph manager for DNSRecon infrastructure mapping.
|
||||
Uses NetworkX for in-memory graph storage with confidence scoring.
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize empty directed graph."""
|
||||
self.graph = nx.DiGraph()
|
||||
# self.lock = threading.Lock()
|
||||
self.creation_time = datetime.now(timezone.utc).isoformat()
|
||||
self.last_modified = self.creation_time
|
||||
|
||||
def add_node(self, node_id: str, node_type: NodeType,
|
||||
|
||||
def add_node(self, node_id: str, node_type: NodeType,
|
||||
metadata: Optional[Dict[str, Any]] = None) -> bool:
|
||||
"""
|
||||
Add a node to the graph.
|
||||
|
||||
|
||||
Args:
|
||||
node_id: Unique identifier for the node
|
||||
node_type: Type of the node (Domain, IP, Certificate, ASN)
|
||||
metadata: Additional metadata for the node
|
||||
|
||||
|
||||
Returns:
|
||||
bool: True if node was added, False if it already exists
|
||||
"""
|
||||
@@ -70,33 +85,33 @@ class GraphManager:
|
||||
existing_metadata.update(metadata)
|
||||
self.graph.nodes[node_id]['metadata'] = existing_metadata
|
||||
return False
|
||||
|
||||
|
||||
node_attributes = {
|
||||
'type': node_type.value,
|
||||
'added_timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'metadata': metadata or {}
|
||||
}
|
||||
|
||||
|
||||
self.graph.add_node(node_id, **node_attributes)
|
||||
self.last_modified = datetime.now(timezone.utc).isoformat()
|
||||
return True
|
||||
|
||||
def add_edge(self, source_id: str, target_id: str,
|
||||
|
||||
def add_edge(self, source_id: str, target_id: str,
|
||||
relationship_type: RelationshipType,
|
||||
confidence_score: Optional[float] = None,
|
||||
source_provider: str = "unknown",
|
||||
raw_data: Optional[Dict[str, Any]] = None) -> bool:
|
||||
"""
|
||||
Add an edge between two nodes.
|
||||
|
||||
|
||||
Args:
|
||||
source_id: Source node identifier
|
||||
target_id: Target node identifier
|
||||
target_id: Target node identifier
|
||||
relationship_type: Type of relationship
|
||||
confidence_score: Custom confidence score (overrides default)
|
||||
source_provider: Provider that discovered this relationship
|
||||
raw_data: Raw data from provider response
|
||||
|
||||
|
||||
Returns:
|
||||
bool: True if edge was added, False if it already exists
|
||||
"""
|
||||
@@ -112,14 +127,14 @@ class GraphManager:
|
||||
# Update confidence score if new score is higher
|
||||
existing_confidence = self.graph.edges[source_id, target_id]['confidence_score']
|
||||
new_confidence = confidence_score or relationship_type.default_confidence
|
||||
|
||||
|
||||
if new_confidence > existing_confidence:
|
||||
self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
|
||||
self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
|
||||
self.graph.edges[source_id, target_id]['updated_by'] = source_provider
|
||||
|
||||
|
||||
return False
|
||||
|
||||
|
||||
edge_attributes = {
|
||||
'relationship_type': relationship_type.relationship_name,
|
||||
'confidence_score': confidence_score or relationship_type.default_confidence,
|
||||
@@ -127,7 +142,7 @@ class GraphManager:
|
||||
'discovery_timestamp': datetime.now(timezone.utc).isoformat(),
|
||||
'raw_data': raw_data or {}
|
||||
}
|
||||
|
||||
|
||||
self.graph.add_edge(source_id, target_id, **edge_attributes)
|
||||
self.last_modified = datetime.now(timezone.utc).isoformat()
|
||||
return True
|
||||
@@ -136,19 +151,19 @@ class GraphManager:
|
||||
"""Get total number of nodes in the graph."""
|
||||
#with self.lock:
|
||||
return self.graph.number_of_nodes()
|
||||
|
||||
|
||||
def get_edge_count(self) -> int:
|
||||
"""Get total number of edges in the graph."""
|
||||
#with self.lock:
|
||||
return self.graph.number_of_edges()
|
||||
|
||||
|
||||
def get_nodes_by_type(self, node_type: NodeType) -> List[str]:
|
||||
"""
|
||||
Get all nodes of a specific type.
|
||||
|
||||
|
||||
Args:
|
||||
node_type: Type of nodes to retrieve
|
||||
|
||||
|
||||
Returns:
|
||||
List of node identifiers
|
||||
"""
|
||||
@@ -157,32 +172,32 @@ class GraphManager:
|
||||
node_id for node_id, attributes in self.graph.nodes(data=True)
|
||||
if attributes.get('type') == node_type.value
|
||||
]
|
||||
|
||||
|
||||
def get_neighbors(self, node_id: str) -> List[str]:
|
||||
"""
|
||||
Get all neighboring nodes (both incoming and outgoing).
|
||||
|
||||
|
||||
Args:
|
||||
node_id: Node identifier
|
||||
|
||||
|
||||
Returns:
|
||||
List of neighboring node identifiers
|
||||
"""
|
||||
#with self.lock:
|
||||
if not self.graph.has_node(node_id):
|
||||
return []
|
||||
|
||||
|
||||
predecessors = list(self.graph.predecessors(node_id))
|
||||
successors = list(self.graph.successors(node_id))
|
||||
return list(set(predecessors + successors))
|
||||
|
||||
|
||||
def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
|
||||
"""
|
||||
Get edges with confidence score above threshold.
|
||||
|
||||
|
||||
Args:
|
||||
min_confidence: Minimum confidence threshold
|
||||
|
||||
|
||||
Returns:
|
||||
List of tuples (source, target, attributes)
|
||||
"""
|
||||
@@ -192,18 +207,49 @@ class GraphManager:
|
||||
for source, target, attributes in self.graph.edges(data=True)
|
||||
if attributes.get('confidence_score', 0) >= min_confidence
|
||||
]
|
||||
|
||||
|
||||
def get_graph_data(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Export graph data for visualization.
|
||||
|
||||
|
||||
Returns:
|
||||
Dictionary containing nodes and edges for frontend visualization
|
||||
"""
|
||||
#with self.lock:
|
||||
nodes = []
|
||||
edges = []
|
||||
|
||||
|
||||
# Create a dictionary to hold aggregated data for each node
|
||||
node_details = defaultdict(lambda: defaultdict(list))
|
||||
|
||||
for source, target, attributes in self.graph.edges(data=True):
|
||||
provider = attributes.get('source_provider', 'unknown')
|
||||
raw_data = attributes.get('raw_data', {})
|
||||
|
||||
if provider == 'dns':
|
||||
record_type = raw_data.get('query_type', 'UNKNOWN')
|
||||
value = raw_data.get('value', target)
|
||||
# DNS data is always about the source node of the query
|
||||
node_details[source]['dns_records'].append(f"{record_type}: {value}")
|
||||
|
||||
elif provider == 'crtsh':
|
||||
# Data from crt.sh are domain names found in certificates (SANs)
|
||||
node_details[source]['related_domains_san'].append(target)
|
||||
|
||||
elif provider == 'shodan':
|
||||
# Shodan data is about the IP, which can be either the source or target
|
||||
source_node_type = self.graph.nodes[source].get('type')
|
||||
target_node_type = self.graph.nodes[target].get('type')
|
||||
|
||||
if source_node_type == 'ip':
|
||||
node_details[source]['shodan'] = raw_data
|
||||
elif target_node_type == 'ip':
|
||||
node_details[target]['shodan'] = raw_data
|
||||
|
||||
elif provider == 'virustotal':
|
||||
# VirusTotal data is about the source node of the query
|
||||
node_details[source]['virustotal'] = raw_data
|
||||
|
||||
# Format nodes for visualization
|
||||
for node_id, attributes in self.graph.nodes(data=True):
|
||||
node_data = {
|
||||
@@ -213,7 +259,18 @@ class GraphManager:
|
||||
'metadata': attributes.get('metadata', {}),
|
||||
'added_timestamp': attributes.get('added_timestamp')
|
||||
}
|
||||
|
||||
|
||||
# Add the aggregated details to the metadata
|
||||
if node_id in node_details:
|
||||
for key, value in node_details[node_id].items():
|
||||
# Use a set to avoid adding duplicate entries to lists
|
||||
if key in node_data['metadata'] and isinstance(node_data['metadata'][key], list):
|
||||
existing_values = set(node_data['metadata'][key])
|
||||
new_values = [v for v in value if v not in existing_values]
|
||||
node_data['metadata'][key].extend(new_values)
|
||||
else:
|
||||
node_data['metadata'][key] = value
|
||||
|
||||
# Color coding by type - now returns color objects for enhanced visualization
|
||||
type_colors = {
|
||||
'domain': {
|
||||
@@ -239,18 +296,24 @@ class GraphManager:
|
||||
'border': '#0088cc',
|
||||
'highlight': {'background': '#44ccff', 'border': '#00aaff'},
|
||||
'hover': {'background': '#22bbff', 'border': '#0099dd'}
|
||||
},
|
||||
'large_entity': {
|
||||
'background': '#ff6b6b',
|
||||
'border': '#cc3a3a',
|
||||
'highlight': {'background': '#ff8c8c', 'border': '#ff6b6b'},
|
||||
'hover': {'background': '#ff7a7a', 'border': '#dd4a4a'}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
node_color_config = type_colors.get(attributes.get('type', 'unknown'), type_colors['domain'])
|
||||
node_data['color'] = node_color_config
|
||||
|
||||
|
||||
# Pass the has_valid_cert metadata for styling
|
||||
if 'metadata' in attributes and 'has_valid_cert' in attributes['metadata']:
|
||||
node_data['has_valid_cert'] = attributes['metadata']['has_valid_cert']
|
||||
|
||||
nodes.append(node_data)
|
||||
|
||||
|
||||
# Format edges for visualization
|
||||
for source, target, attributes in self.graph.edges(data=True):
|
||||
edge_data = {
|
||||
@@ -261,7 +324,7 @@ class GraphManager:
|
||||
'source_provider': attributes.get('source_provider', ''),
|
||||
'discovery_timestamp': attributes.get('discovery_timestamp')
|
||||
}
|
||||
|
||||
|
||||
# Enhanced edge styling based on confidence
|
||||
confidence = attributes.get('confidence_score', 0)
|
||||
if confidence >= 0.8:
|
||||
@@ -275,7 +338,7 @@ class GraphManager:
|
||||
elif confidence >= 0.6:
|
||||
edge_data['color'] = {
|
||||
'color': '#ff9900',
|
||||
'highlight': '#ffbb44',
|
||||
'highlight': '#ffbb44',
|
||||
'hover': '#ffaa22',
|
||||
'inherit': False
|
||||
}
|
||||
@@ -288,13 +351,13 @@ class GraphManager:
|
||||
'inherit': False
|
||||
}
|
||||
edge_data['width'] = 2
|
||||
|
||||
|
||||
# Add dashed line for low confidence
|
||||
if confidence < 0.6:
|
||||
edge_data['dashes'] = [5, 5]
|
||||
|
||||
|
||||
edges.append(edge_data)
|
||||
|
||||
|
||||
return {
|
||||
'nodes': nodes,
|
||||
'edges': edges,
|
||||
@@ -305,18 +368,18 @@ class GraphManager:
|
||||
'last_modified': self.last_modified
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def export_json(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Export complete graph data as JSON for download.
|
||||
|
||||
|
||||
Returns:
|
||||
Dictionary containing complete graph data with metadata
|
||||
"""
|
||||
#with self.lock:
|
||||
# Get basic graph data
|
||||
graph_data = self.get_graph_data()
|
||||
|
||||
|
||||
# Add comprehensive metadata
|
||||
export_data = {
|
||||
'export_metadata': {
|
||||
@@ -339,13 +402,13 @@ class GraphManager:
|
||||
],
|
||||
'confidence_distribution': self._get_confidence_distribution()
|
||||
}
|
||||
|
||||
|
||||
return export_data
|
||||
|
||||
|
||||
def _get_confidence_distribution(self) -> Dict[str, int]:
|
||||
"""Get distribution of confidence scores."""
|
||||
distribution = {'high': 0, 'medium': 0, 'low': 0}
|
||||
|
||||
|
||||
for _, _, attributes in self.graph.edges(data=True):
|
||||
confidence = attributes.get('confidence_score', 0)
|
||||
if confidence >= 0.8:
|
||||
@@ -354,13 +417,13 @@ class GraphManager:
|
||||
distribution['medium'] += 1
|
||||
else:
|
||||
distribution['low'] += 1
|
||||
|
||||
|
||||
return distribution
|
||||
|
||||
|
||||
def get_statistics(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get comprehensive graph statistics.
|
||||
|
||||
|
||||
Returns:
|
||||
Dictionary containing various graph metrics
|
||||
"""
|
||||
@@ -377,26 +440,26 @@ class GraphManager:
|
||||
'confidence_distribution': self._get_confidence_distribution(),
|
||||
'provider_distribution': {}
|
||||
}
|
||||
|
||||
|
||||
# Node type distribution
|
||||
for node_type in NodeType:
|
||||
count = len(self.get_nodes_by_type(node_type))
|
||||
stats['node_type_distribution'][node_type.value] = count
|
||||
|
||||
|
||||
# Relationship type distribution
|
||||
for _, _, attributes in self.graph.edges(data=True):
|
||||
rel_type = attributes.get('relationship_type', 'unknown')
|
||||
stats['relationship_type_distribution'][rel_type] = \
|
||||
stats['relationship_type_distribution'].get(rel_type, 0) + 1
|
||||
|
||||
|
||||
# Provider distribution
|
||||
for _, _, attributes in self.graph.edges(data=True):
|
||||
provider = attributes.get('source_provider', 'unknown')
|
||||
stats['provider_distribution'][provider] = \
|
||||
stats['provider_distribution'].get(provider, 0) + 1
|
||||
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def clear(self) -> None:
|
||||
"""Clear all nodes and edges from the graph."""
|
||||
#with self.lock:
|
||||
|
||||
@@ -8,6 +8,7 @@ import time
|
||||
import traceback
|
||||
from typing import List, Set, Dict, Any, Optional, Tuple
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError
|
||||
from collections import defaultdict
|
||||
|
||||
from core.graph_manager import GraphManager, NodeType, RelationshipType
|
||||
from core.logger import get_forensic_logger, new_session
|
||||
@@ -334,9 +335,7 @@ class Scanner:
|
||||
print(f"Querying {len(self.providers)} providers for domain: {domain}")
|
||||
discovered_domains = set()
|
||||
discovered_ips = set()
|
||||
|
||||
# Define a threshold for creating a "large entity" node
|
||||
LARGE_ENTITY_THRESHOLD = 50
|
||||
relationships_by_type = defaultdict(list)
|
||||
|
||||
if not self.providers or self.stop_event.is_set():
|
||||
return discovered_domains, discovered_ips
|
||||
@@ -355,35 +354,72 @@ class Scanner:
|
||||
relationships = future.result()
|
||||
print(f"Provider {provider.get_name()} returned {len(relationships)} relationships")
|
||||
|
||||
# Check if the number of relationships exceeds the threshold
|
||||
if len(relationships) > LARGE_ENTITY_THRESHOLD:
|
||||
# Create a single "large entity" node
|
||||
large_entity_id = f"large_entity_{provider.get_name()}_{domain}"
|
||||
self.graph.add_node(large_entity_id, NodeType.LARGE_ENTITY, metadata={'count': len(relationships), 'provider': provider.get_name()})
|
||||
self.graph.add_edge(domain, large_entity_id, RelationshipType.PASSIVE_DNS, 1.0, provider.get_name(), {})
|
||||
print(f"Created large entity node for {domain} from {provider.get_name()} with {len(relationships)} relationships")
|
||||
continue # Skip adding individual nodes
|
||||
for rel in relationships:
|
||||
relationships_by_type[rel[2]].append(rel)
|
||||
|
||||
for source, target, rel_type, confidence, raw_data in relationships:
|
||||
if self._is_valid_ip(target):
|
||||
target_node_type = NodeType.IP
|
||||
discovered_ips.add(target)
|
||||
elif self._is_valid_domain(target):
|
||||
target_node_type = NodeType.DOMAIN
|
||||
discovered_domains.add(target)
|
||||
else:
|
||||
target_node_type = NodeType.ASN if target.startswith('AS') else NodeType.CERTIFICATE
|
||||
|
||||
self.graph.add_node(source, NodeType.DOMAIN)
|
||||
self.graph.add_node(target, target_node_type)
|
||||
if self.graph.add_edge(source, target, rel_type, confidence, provider.get_name(), raw_data):
|
||||
print(f"Added relationship: {source} -> {target} ({rel_type.relationship_name})")
|
||||
except (Exception, CancelledError) as e:
|
||||
print(f"Provider {provider.get_name()} failed for {domain}: {e}")
|
||||
|
||||
for rel_type, relationships in relationships_by_type.items():
|
||||
if len(relationships) > config.large_entity_threshold and rel_type == RelationshipType.SAN_CERTIFICATE:
|
||||
self._handle_large_entity(domain, relationships, rel_type, provider.get_name())
|
||||
else:
|
||||
for source, target, rel_type, confidence, raw_data in relationships:
|
||||
# Determine if the target should create a new node
|
||||
create_node = rel_type in [
|
||||
RelationshipType.A_RECORD,
|
||||
RelationshipType.AAAA_RECORD,
|
||||
RelationshipType.CNAME_RECORD,
|
||||
RelationshipType.MX_RECORD,
|
||||
RelationshipType.NS_RECORD,
|
||||
RelationshipType.PTR_RECORD,
|
||||
RelationshipType.SAN_CERTIFICATE
|
||||
]
|
||||
|
||||
# Determine if the target should be subject to recursion
|
||||
recurse = rel_type in [
|
||||
RelationshipType.A_RECORD,
|
||||
RelationshipType.AAAA_RECORD,
|
||||
RelationshipType.CNAME_RECORD,
|
||||
RelationshipType.MX_RECORD,
|
||||
RelationshipType.SAN_CERTIFICATE
|
||||
]
|
||||
|
||||
if create_node:
|
||||
target_node_type = NodeType.IP if self._is_valid_ip(target) else NodeType.DOMAIN
|
||||
self.graph.add_node(target, target_node_type)
|
||||
if self.graph.add_edge(source, target, rel_type, confidence, provider.get_name(), raw_data):
|
||||
print(f"Added relationship: {source} -> {target} ({rel_type.relationship_name})")
|
||||
else:
|
||||
# For records that don't create nodes, we still want to log the relationship
|
||||
self.logger.log_relationship_discovery(
|
||||
source_node=source,
|
||||
target_node=target,
|
||||
relationship_type=rel_type.relationship_name,
|
||||
confidence_score=confidence,
|
||||
provider=provider.name,
|
||||
raw_data=raw_data,
|
||||
discovery_method=f"dns_{rel_type.name.lower()}_record"
|
||||
)
|
||||
|
||||
if recurse:
|
||||
if self._is_valid_ip(target):
|
||||
discovered_ips.add(target)
|
||||
elif self._is_valid_domain(target):
|
||||
discovered_domains.add(target)
|
||||
|
||||
print(f"Domain {domain}: discovered {len(discovered_domains)} domains, {len(discovered_ips)} IPs")
|
||||
return discovered_domains, discovered_ips
|
||||
|
||||
def _handle_large_entity(self, source_domain: str, relationships: list, rel_type: RelationshipType, provider_name: str):
|
||||
"""
|
||||
Handles the creation of a large entity node when a threshold is exceeded.
|
||||
"""
|
||||
print(f"Large number of {rel_type.name} relationships for {source_domain}. Creating a large entity node.")
|
||||
entity_name = f"Large collection of {rel_type.name} for {source_domain}"
|
||||
self.graph.add_node(entity_name, NodeType.LARGE_ENTITY, metadata={"count": len(relationships)})
|
||||
self.graph.add_edge(source_domain, entity_name, rel_type, 0.9, provider_name, {"info": "Aggregated node"})
|
||||
|
||||
def _query_providers_for_ip(self, ip: str) -> None:
|
||||
"""
|
||||
Query all enabled providers for information about an IP address.
|
||||
|
||||
Reference in New Issue
Block a user