404 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			404 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""
 | 
						|
Graph data model for DNSRecon using NetworkX.
 | 
						|
Manages in-memory graph storage with confidence scoring and forensic metadata.
 | 
						|
"""
 | 
						|
 | 
						|
import json
 | 
						|
import threading
 | 
						|
from datetime import datetime
 | 
						|
from typing import Dict, List, Any, Optional, Tuple, Set
 | 
						|
from enum import Enum
 | 
						|
from datetime import timezone
 | 
						|
 | 
						|
import networkx as nx
 | 
						|
 | 
						|
 | 
						|
class NodeType(Enum):
 | 
						|
    """Enumeration of supported node types."""
 | 
						|
    DOMAIN = "domain"
 | 
						|
    IP = "ip"
 | 
						|
    CERTIFICATE = "certificate"
 | 
						|
    ASN = "asn"
 | 
						|
 | 
						|
 | 
						|
class RelationshipType(Enum):
 | 
						|
    """Enumeration of supported relationship types with confidence scores."""
 | 
						|
    SAN_CERTIFICATE = ("san", 0.9)           # Certificate SAN relationships
 | 
						|
    A_RECORD = ("a_record", 0.8)             # A/AAAA record relationships
 | 
						|
    CNAME_RECORD = ("cname", 0.8)            # CNAME relationships
 | 
						|
    PASSIVE_DNS = ("passive_dns", 0.6)       # Passive DNS relationships
 | 
						|
    ASN_MEMBERSHIP = ("asn", 0.7)            # ASN relationships
 | 
						|
    MX_RECORD = ("mx_record", 0.7)           # MX record relationships
 | 
						|
    NS_RECORD = ("ns_record", 0.7)           # NS record relationships
 | 
						|
    
 | 
						|
    def __init__(self, relationship_name: str, default_confidence: float):
 | 
						|
        self.relationship_name = relationship_name
 | 
						|
        self.default_confidence = default_confidence
 | 
						|
 | 
						|
 | 
						|
class GraphManager:
 | 
						|
    """
 | 
						|
    Thread-safe graph manager for DNSRecon infrastructure mapping.
 | 
						|
    Uses NetworkX for in-memory graph storage with confidence scoring.
 | 
						|
    """
 | 
						|
    
 | 
						|
    def __init__(self):
 | 
						|
        """Initialize empty directed graph."""
 | 
						|
        self.graph = nx.DiGraph()
 | 
						|
        # self.lock = threading.Lock()
 | 
						|
        self.creation_time = datetime.now(timezone.utc).isoformat()
 | 
						|
        self.last_modified = self.creation_time
 | 
						|
        
 | 
						|
    def add_node(self, node_id: str, node_type: NodeType, 
 | 
						|
                 metadata: Optional[Dict[str, Any]] = None) -> bool:
 | 
						|
        """
 | 
						|
        Add a node to the graph.
 | 
						|
        
 | 
						|
        Args:
 | 
						|
            node_id: Unique identifier for the node
 | 
						|
            node_type: Type of the node (Domain, IP, Certificate, ASN)
 | 
						|
            metadata: Additional metadata for the node
 | 
						|
            
 | 
						|
        Returns:
 | 
						|
            bool: True if node was added, False if it already exists
 | 
						|
        """
 | 
						|
        if self.graph.has_node(node_id):
 | 
						|
            # Update metadata if node exists
 | 
						|
            existing_metadata = self.graph.nodes[node_id].get('metadata', {})
 | 
						|
            if metadata:
 | 
						|
                existing_metadata.update(metadata)
 | 
						|
                self.graph.nodes[node_id]['metadata'] = existing_metadata
 | 
						|
            return False
 | 
						|
        
 | 
						|
        node_attributes = {
 | 
						|
            'type': node_type.value,
 | 
						|
            'added_timestamp': datetime.now(timezone.utc).isoformat(),
 | 
						|
            'metadata': metadata or {}
 | 
						|
        }
 | 
						|
        
 | 
						|
        self.graph.add_node(node_id, **node_attributes)
 | 
						|
        self.last_modified = datetime.now(timezone.utc).isoformat()
 | 
						|
        return True
 | 
						|
    
 | 
						|
    def add_edge(self, source_id: str, target_id: str, 
 | 
						|
                 relationship_type: RelationshipType,
 | 
						|
                 confidence_score: Optional[float] = None,
 | 
						|
                 source_provider: str = "unknown",
 | 
						|
                 raw_data: Optional[Dict[str, Any]] = None) -> bool:
 | 
						|
        """
 | 
						|
        Add an edge between two nodes.
 | 
						|
        
 | 
						|
        Args:
 | 
						|
            source_id: Source node identifier
 | 
						|
            target_id: Target node identifier  
 | 
						|
            relationship_type: Type of relationship
 | 
						|
            confidence_score: Custom confidence score (overrides default)
 | 
						|
            source_provider: Provider that discovered this relationship
 | 
						|
            raw_data: Raw data from provider response
 | 
						|
            
 | 
						|
        Returns:
 | 
						|
            bool: True if edge was added, False if it already exists
 | 
						|
        """
 | 
						|
        #with self.lock:
 | 
						|
        # Ensure both nodes exist
 | 
						|
        if not self.graph.has_node(source_id) or not self.graph.has_node(target_id):
 | 
						|
            # If the target node is a subdomain, it should be added.
 | 
						|
            # The scanner will handle this logic.
 | 
						|
            pass
 | 
						|
 | 
						|
        # Check if edge already exists
 | 
						|
        if self.graph.has_edge(source_id, target_id):
 | 
						|
            # Update confidence score if new score is higher
 | 
						|
            existing_confidence = self.graph.edges[source_id, target_id]['confidence_score']
 | 
						|
            new_confidence = confidence_score or relationship_type.default_confidence
 | 
						|
            
 | 
						|
            if new_confidence > existing_confidence:
 | 
						|
                self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
 | 
						|
                self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
 | 
						|
                self.graph.edges[source_id, target_id]['updated_by'] = source_provider
 | 
						|
            
 | 
						|
            return False
 | 
						|
        
 | 
						|
        edge_attributes = {
 | 
						|
            'relationship_type': relationship_type.relationship_name,
 | 
						|
            'confidence_score': confidence_score or relationship_type.default_confidence,
 | 
						|
            'source_provider': source_provider,
 | 
						|
            'discovery_timestamp': datetime.now(timezone.utc).isoformat(),
 | 
						|
            'raw_data': raw_data or {}
 | 
						|
        }
 | 
						|
        
 | 
						|
        self.graph.add_edge(source_id, target_id, **edge_attributes)
 | 
						|
        self.last_modified = datetime.now(timezone.utc).isoformat()
 | 
						|
        return True
 | 
						|
 | 
						|
    def get_node_count(self) -> int:
 | 
						|
        """Get total number of nodes in the graph."""
 | 
						|
        #with self.lock:
 | 
						|
        return self.graph.number_of_nodes()
 | 
						|
    
 | 
						|
    def get_edge_count(self) -> int:
 | 
						|
        """Get total number of edges in the graph."""
 | 
						|
        #with self.lock:
 | 
						|
        return self.graph.number_of_edges()
 | 
						|
    
 | 
						|
    def get_nodes_by_type(self, node_type: NodeType) -> List[str]:
 | 
						|
        """
 | 
						|
        Get all nodes of a specific type.
 | 
						|
        
 | 
						|
        Args:
 | 
						|
            node_type: Type of nodes to retrieve
 | 
						|
            
 | 
						|
        Returns:
 | 
						|
            List of node identifiers
 | 
						|
        """
 | 
						|
        #with self.lock:
 | 
						|
        return [
 | 
						|
            node_id for node_id, attributes in self.graph.nodes(data=True)
 | 
						|
            if attributes.get('type') == node_type.value
 | 
						|
        ]
 | 
						|
    
 | 
						|
    def get_neighbors(self, node_id: str) -> List[str]:
 | 
						|
        """
 | 
						|
        Get all neighboring nodes (both incoming and outgoing).
 | 
						|
        
 | 
						|
        Args:
 | 
						|
            node_id: Node identifier
 | 
						|
            
 | 
						|
        Returns:
 | 
						|
            List of neighboring node identifiers
 | 
						|
        """
 | 
						|
        #with self.lock:
 | 
						|
        if not self.graph.has_node(node_id):
 | 
						|
            return []
 | 
						|
        
 | 
						|
        predecessors = list(self.graph.predecessors(node_id))
 | 
						|
        successors = list(self.graph.successors(node_id))
 | 
						|
        return list(set(predecessors + successors))
 | 
						|
    
 | 
						|
    def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
 | 
						|
        """
 | 
						|
        Get edges with confidence score above threshold.
 | 
						|
        
 | 
						|
        Args:
 | 
						|
            min_confidence: Minimum confidence threshold
 | 
						|
            
 | 
						|
        Returns:
 | 
						|
            List of tuples (source, target, attributes)
 | 
						|
        """
 | 
						|
        #with self.lock:
 | 
						|
        return [
 | 
						|
            (source, target, attributes)
 | 
						|
            for source, target, attributes in self.graph.edges(data=True)
 | 
						|
            if attributes.get('confidence_score', 0) >= min_confidence
 | 
						|
        ]
 | 
						|
    
 | 
						|
    def get_graph_data(self) -> Dict[str, Any]:
 | 
						|
        """
 | 
						|
        Export graph data for visualization.
 | 
						|
        
 | 
						|
        Returns:
 | 
						|
            Dictionary containing nodes and edges for frontend visualization
 | 
						|
        """
 | 
						|
        #with self.lock:
 | 
						|
        nodes = []
 | 
						|
        edges = []
 | 
						|
        
 | 
						|
        # Format nodes for visualization
 | 
						|
        for node_id, attributes in self.graph.nodes(data=True):
 | 
						|
            node_data = {
 | 
						|
                'id': node_id,
 | 
						|
                'label': node_id,
 | 
						|
                'type': attributes.get('type', 'unknown'),
 | 
						|
                'metadata': attributes.get('metadata', {}),
 | 
						|
                'added_timestamp': attributes.get('added_timestamp')
 | 
						|
            }
 | 
						|
            
 | 
						|
            # Color coding by type - now returns color objects for enhanced visualization
 | 
						|
            type_colors = {
 | 
						|
                'domain': {
 | 
						|
                    'background': '#00ff41',
 | 
						|
                    'border': '#00aa2e',
 | 
						|
                    'highlight': {'background': '#44ff75', 'border': '#00ff41'},
 | 
						|
                    'hover': {'background': '#22ff63', 'border': '#00cc35'}
 | 
						|
                },
 | 
						|
                'ip': {
 | 
						|
                    'background': '#ff9900',
 | 
						|
                    'border': '#cc7700',
 | 
						|
                    'highlight': {'background': '#ffbb44', 'border': '#ff9900'},
 | 
						|
                    'hover': {'background': '#ffaa22', 'border': '#dd8800'}
 | 
						|
                },
 | 
						|
                'certificate': {
 | 
						|
                    'background': '#c7c7c7',
 | 
						|
                    'border': '#999999',
 | 
						|
                    'highlight': {'background': '#e0e0e0', 'border': '#c7c7c7'},
 | 
						|
                    'hover': {'background': '#d4d4d4', 'border': '#aaaaaa'}
 | 
						|
                },
 | 
						|
                'asn': {
 | 
						|
                    'background': '#00aaff',
 | 
						|
                    'border': '#0088cc',
 | 
						|
                    'highlight': {'background': '#44ccff', 'border': '#00aaff'},
 | 
						|
                    'hover': {'background': '#22bbff', 'border': '#0099dd'}
 | 
						|
                }
 | 
						|
            }
 | 
						|
            
 | 
						|
            node_color_config = type_colors.get(attributes.get('type', 'unknown'), type_colors['domain'])
 | 
						|
            node_data['color'] = node_color_config
 | 
						|
            
 | 
						|
            # Pass the has_valid_cert metadata for styling
 | 
						|
            if 'metadata' in attributes and 'has_valid_cert' in attributes['metadata']:
 | 
						|
                node_data['has_valid_cert'] = attributes['metadata']['has_valid_cert']
 | 
						|
 | 
						|
            nodes.append(node_data)
 | 
						|
        
 | 
						|
        # Format edges for visualization
 | 
						|
        for source, target, attributes in self.graph.edges(data=True):
 | 
						|
            edge_data = {
 | 
						|
                'from': source,
 | 
						|
                'to': target,
 | 
						|
                'label': attributes.get('relationship_type', ''),
 | 
						|
                'confidence_score': attributes.get('confidence_score', 0),
 | 
						|
                'source_provider': attributes.get('source_provider', ''),
 | 
						|
                'discovery_timestamp': attributes.get('discovery_timestamp')
 | 
						|
            }
 | 
						|
            
 | 
						|
            # Enhanced edge styling based on confidence
 | 
						|
            confidence = attributes.get('confidence_score', 0)
 | 
						|
            if confidence >= 0.8:
 | 
						|
                edge_data['color'] = {
 | 
						|
                    'color': '#00ff41',
 | 
						|
                    'highlight': '#44ff75',
 | 
						|
                    'hover': '#22ff63',
 | 
						|
                    'inherit': False
 | 
						|
                }
 | 
						|
                edge_data['width'] = 4
 | 
						|
            elif confidence >= 0.6:
 | 
						|
                edge_data['color'] = {
 | 
						|
                    'color': '#ff9900',
 | 
						|
                    'highlight': '#ffbb44', 
 | 
						|
                    'hover': '#ffaa22',
 | 
						|
                    'inherit': False
 | 
						|
                }
 | 
						|
                edge_data['width'] = 3
 | 
						|
            else:
 | 
						|
                edge_data['color'] = {
 | 
						|
                    'color': '#666666',
 | 
						|
                    'highlight': '#888888',
 | 
						|
                    'hover': '#777777',
 | 
						|
                    'inherit': False
 | 
						|
                }
 | 
						|
                edge_data['width'] = 2
 | 
						|
            
 | 
						|
            # Add dashed line for low confidence
 | 
						|
            if confidence < 0.6:
 | 
						|
                edge_data['dashes'] = [5, 5]
 | 
						|
            
 | 
						|
            edges.append(edge_data)
 | 
						|
        
 | 
						|
        return {
 | 
						|
            'nodes': nodes,
 | 
						|
            'edges': edges,
 | 
						|
            'statistics': {
 | 
						|
                'node_count': len(nodes),
 | 
						|
                'edge_count': len(edges),
 | 
						|
                'creation_time': self.creation_time,
 | 
						|
                'last_modified': self.last_modified
 | 
						|
            }
 | 
						|
        }
 | 
						|
    
 | 
						|
    def export_json(self) -> Dict[str, Any]:
 | 
						|
        """
 | 
						|
        Export complete graph data as JSON for download.
 | 
						|
        
 | 
						|
        Returns:
 | 
						|
            Dictionary containing complete graph data with metadata
 | 
						|
        """
 | 
						|
        #with self.lock:
 | 
						|
        # Get basic graph data
 | 
						|
        graph_data = self.get_graph_data()
 | 
						|
        
 | 
						|
        # Add comprehensive metadata
 | 
						|
        export_data = {
 | 
						|
            'export_metadata': {
 | 
						|
                'export_timestamp': datetime.now(timezone.utc).isoformat(),
 | 
						|
                'graph_creation_time': self.creation_time,
 | 
						|
                'last_modified': self.last_modified,
 | 
						|
                'total_nodes': self.graph.number_of_nodes(),
 | 
						|
                'total_edges': self.graph.number_of_edges(),
 | 
						|
                'graph_format': 'dnsrecon_v1'
 | 
						|
            },
 | 
						|
            'nodes': graph_data['nodes'],
 | 
						|
            'edges': graph_data['edges'],
 | 
						|
            'node_types': [node_type.value for node_type in NodeType],
 | 
						|
            'relationship_types': [
 | 
						|
                {
 | 
						|
                    'name': rel_type.relationship_name,
 | 
						|
                    'default_confidence': rel_type.default_confidence
 | 
						|
                }
 | 
						|
                for rel_type in RelationshipType
 | 
						|
            ],
 | 
						|
            'confidence_distribution': self._get_confidence_distribution()
 | 
						|
        }
 | 
						|
        
 | 
						|
        return export_data
 | 
						|
    
 | 
						|
    def _get_confidence_distribution(self) -> Dict[str, int]:
 | 
						|
        """Get distribution of confidence scores."""
 | 
						|
        distribution = {'high': 0, 'medium': 0, 'low': 0}
 | 
						|
        
 | 
						|
        for _, _, attributes in self.graph.edges(data=True):
 | 
						|
            confidence = attributes.get('confidence_score', 0)
 | 
						|
            if confidence >= 0.8:
 | 
						|
                distribution['high'] += 1
 | 
						|
            elif confidence >= 0.6:
 | 
						|
                distribution['medium'] += 1
 | 
						|
            else:
 | 
						|
                distribution['low'] += 1
 | 
						|
        
 | 
						|
        return distribution
 | 
						|
    
 | 
						|
    def get_statistics(self) -> Dict[str, Any]:
 | 
						|
        """
 | 
						|
        Get comprehensive graph statistics.
 | 
						|
        
 | 
						|
        Returns:
 | 
						|
            Dictionary containing various graph metrics
 | 
						|
        """
 | 
						|
        #with self.lock:
 | 
						|
        stats = {
 | 
						|
            'basic_metrics': {
 | 
						|
                'total_nodes': self.graph.number_of_nodes(),
 | 
						|
                'total_edges': self.graph.number_of_edges(),
 | 
						|
                'creation_time': self.creation_time,
 | 
						|
                'last_modified': self.last_modified
 | 
						|
            },
 | 
						|
            'node_type_distribution': {},
 | 
						|
            'relationship_type_distribution': {},
 | 
						|
            'confidence_distribution': self._get_confidence_distribution(),
 | 
						|
            'provider_distribution': {}
 | 
						|
        }
 | 
						|
        
 | 
						|
        # Node type distribution
 | 
						|
        for node_type in NodeType:
 | 
						|
            count = len(self.get_nodes_by_type(node_type))
 | 
						|
            stats['node_type_distribution'][node_type.value] = count
 | 
						|
        
 | 
						|
        # Relationship type distribution
 | 
						|
        for _, _, attributes in self.graph.edges(data=True):
 | 
						|
            rel_type = attributes.get('relationship_type', 'unknown')
 | 
						|
            stats['relationship_type_distribution'][rel_type] = \
 | 
						|
                stats['relationship_type_distribution'].get(rel_type, 0) + 1
 | 
						|
        
 | 
						|
        # Provider distribution
 | 
						|
        for _, _, attributes in self.graph.edges(data=True):
 | 
						|
            provider = attributes.get('source_provider', 'unknown')
 | 
						|
            stats['provider_distribution'][provider] = \
 | 
						|
                stats['provider_distribution'].get(provider, 0) + 1
 | 
						|
        
 | 
						|
        return stats
 | 
						|
    
 | 
						|
    def clear(self) -> None:
 | 
						|
        """Clear all nodes and edges from the graph."""
 | 
						|
        #with self.lock:
 | 
						|
        self.graph.clear()
 | 
						|
        self.creation_time = datetime.now(timezone.utc).isoformat()
 | 
						|
        self.last_modified = self.creation_time |