dnsrecon/core/graph_manager.py

"""
Graph data model for DNSRecon using NetworkX.
Manages in-memory graph storage with confidence scoring and forensic metadata.
"""

from datetime import datetime
from typing import Dict, List, Any, Optional, Tuple
from enum import Enum
from datetime import timezone

import networkx as nx


class NodeType(Enum):
    """Enumeration of supported node types."""
    DOMAIN = "domain"
    IP = "ip"
    ASN = "asn"
    DNS_RECORD = "dns_record"
    LARGE_ENTITY = "large_entity"


class RelationshipType(Enum):
    """Enumeration of supported relationship types with confidence scores."""
    SAN_CERTIFICATE = ("san", 0.9)
    A_RECORD = ("a_record", 0.8)
    AAAA_RECORD = ("aaaa_record", 0.8)
    CNAME_RECORD = ("cname", 0.8)
    MX_RECORD = ("mx_record", 0.7)
    NS_RECORD = ("ns_record", 0.7)
    PTR_RECORD = ("ptr_record", 0.8)
    SOA_RECORD = ("soa_record", 0.7)
    TXT_RECORD = ("txt_record", 0.7)
    SRV_RECORD = ("srv_record", 0.7)
    CAA_RECORD = ("caa_record", 0.7)
    DNSKEY_RECORD = ("dnskey_record", 0.7)
    DS_RECORD = ("ds_record", 0.7)
    RRSIG_RECORD = ("rrsig_record", 0.7)
    SSHFP_RECORD = ("sshfp_record", 0.7)
    TLSA_RECORD = ("tlsa_record", 0.7)
    NAPTR_RECORD = ("naptr_record", 0.7)
    SPF_RECORD = ("spf_record", 0.7)
    DNS_RECORD = ("dns_record", 0.8)
    PASSIVE_DNS = ("passive_dns", 0.6)
    ASN_MEMBERSHIP = ("asn", 0.7)


    def __init__(self, relationship_name: str, default_confidence: float):
        self.relationship_name = relationship_name
        self.default_confidence = default_confidence


class GraphManager:
    """
    Thread-safe graph manager for DNSRecon infrastructure mapping.
    Uses NetworkX for in-memory graph storage with confidence scoring.
    """

    def __init__(self):
        """Initialize empty directed graph."""
        self.graph = nx.DiGraph()
        # self.lock = threading.Lock()
        self.creation_time = datetime.now(timezone.utc).isoformat()
        self.last_modified = self.creation_time

    def add_node(self, node_id: str, node_type: NodeType,
                 metadata: Optional[Dict[str, Any]] = None) -> bool:
        """
        Add a node to the graph.

        Args:
            node_id: Unique identifier for the node
            node_type: Type of the node (Domain, IP, Certificate, ASN)
            metadata: Additional metadata for the node

        Returns:
            bool: True if node was added, False if it already exists
        """
        if self.graph.has_node(node_id):
            # Update metadata if node exists
            existing_metadata = self.graph.nodes[node_id].get('metadata', {})
            if metadata:
                existing_metadata.update(metadata)
                self.graph.nodes[node_id]['metadata'] = existing_metadata
            return False

        node_attributes = {
            'type': node_type.value,
            'added_timestamp': datetime.now(timezone.utc).isoformat(),
            'metadata': metadata or {}
        }

        self.graph.add_node(node_id, **node_attributes)
        self.last_modified = datetime.now(timezone.utc).isoformat()
        return True

    def add_edge(self, source_id: str, target_id: str,
                 relationship_type: RelationshipType,
                 confidence_score: Optional[float] = None,
                 source_provider: str = "unknown",
                 raw_data: Optional[Dict[str, Any]] = None) -> bool:
        """
        Add an edge between two nodes.

        Args:
            source_id: Source node identifier
            target_id: Target node identifier
            relationship_type: Type of relationship
            confidence_score: Custom confidence score (overrides default)
            source_provider: Provider that discovered this relationship
            raw_data: Raw data from provider response

        Returns:
            bool: True if edge was added, False if it already exists
        """

        if not self.graph.has_node(source_id) or not self.graph.has_node(target_id):
            # If the target node is a subdomain, it should be added.
            # The scanner will handle this logic.
            pass

        # Check if edge already exists
        if self.graph.has_edge(source_id, target_id):
            # Update confidence score if new score is higher
            existing_confidence = self.graph.edges[source_id, target_id]['confidence_score']
            new_confidence = confidence_score or relationship_type.default_confidence

            if new_confidence > existing_confidence:
                self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
                self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
                self.graph.edges[source_id, target_id]['updated_by'] = source_provider

            return False

        edge_attributes = {
            'relationship_type': relationship_type.relationship_name,
            'confidence_score': confidence_score or relationship_type.default_confidence,
            'source_provider': source_provider,
            'discovery_timestamp': datetime.now(timezone.utc).isoformat(),
            'raw_data': raw_data or {}
        }

        self.graph.add_edge(source_id, target_id, **edge_attributes)
        self.last_modified = datetime.now(timezone.utc).isoformat()
        return True

    def get_node_count(self) -> int:
        """Get total number of nodes in the graph."""
        return self.graph.number_of_nodes()

    def get_edge_count(self) -> int:
        """Get total number of edges in the graph."""
        return self.graph.number_of_edges()

    def get_nodes_by_type(self, node_type: NodeType) -> List[str]:
        """
        Get all nodes of a specific type.

        Args:
            node_type: Type of nodes to retrieve

        Returns:
            List of node identifiers
        """
        return [
            node_id for node_id, attributes in self.graph.nodes(data=True)
            if attributes.get('type') == node_type.value
        ]

    def get_neighbors(self, node_id: str) -> List[str]:
        """
        Get all neighboring nodes (both incoming and outgoing).

        Args:
            node_id: Node identifier

        Returns:
            List of neighboring node identifiers
        """
        if not self.graph.has_node(node_id):
            return []

        predecessors = list(self.graph.predecessors(node_id))
        successors = list(self.graph.successors(node_id))
        return list(set(predecessors + successors))

    def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
        """
        Get edges with confidence score above threshold.

        Args:
            min_confidence: Minimum confidence threshold

        Returns:
            List of tuples (source, target, attributes)
        """
        return [
            (source, target, attributes)
            for source, target, attributes in self.graph.edges(data=True)
            if attributes.get('confidence_score', 0) >= min_confidence
        ]

    def get_graph_data(self) -> Dict[str, Any]:
        """
        Export graph data for visualization.
        Uses comprehensive metadata collected during scanning.
        """
        nodes = []
        edges = []

        # Create nodes with the comprehensive metadata already collected
        for node_id, attributes in self.graph.nodes(data=True):
            node_data = {
                'id': node_id,
                'label': node_id,
                'type': attributes.get('type', 'unknown'),
                'metadata': attributes.get('metadata', {}),
                'added_timestamp': attributes.get('added_timestamp')
            }

            # Handle certificate node labeling
            if node_id.startswith('cert_'):
                # For certificate nodes, create a more informative label
                cert_metadata = node_data['metadata']
                issuer = cert_metadata.get('issuer_name', 'Unknown')
                valid_status = "✓" if cert_metadata.get('is_currently_valid') else "✗"
                node_data['label'] = f"Certificate {valid_status}\n{issuer[:30]}..."

            # Color coding by type
            type_colors = {
                'domain': {
                    'background': '#00ff41',
                    'border': '#00aa2e',
                    'highlight': {'background': '#44ff75', 'border': '#00ff41'},
                    'hover': {'background': '#22ff63', 'border': '#00cc35'}
                },
                'ip': {
                    'background': '#ff9900',
                    'border': '#cc7700',
                    'highlight': {'background': '#ffbb44', 'border': '#ff9900'},
                    'hover': {'background': '#ffaa22', 'border': '#dd8800'}
                },
                'asn': {
                    'background': '#00aaff',
                    'border': '#0088cc',
                    'highlight': {'background': '#44ccff', 'border': '#00aaff'},
                    'hover': {'background': '#22bbff', 'border': '#0099dd'}
                },
                'dns_record': {
                    'background': '#9d4edd',
                    'border': '#7b2cbf',
                    'highlight': {'background': '#c77dff', 'border': '#9d4edd'},
                    'hover': {'background': '#b392f0', 'border': '#8b5cf6'}
                },
                'large_entity': {
                    'background': '#ff6b6b',
                    'border': '#cc3a3a',
                    'highlight': {'background': '#ff8c8c', 'border': '#ff6b6b'},
                    'hover': {'background': '#ff7a7a', 'border': '#dd4a4a'}
                }
            }

            node_color_config = type_colors.get(attributes.get('type', 'unknown'), type_colors['domain'])

            node_data['color'] = node_color_config

            # Add certificate validity indicator if available
            metadata = node_data['metadata']
            if 'certificate_data' in metadata and 'has_valid_cert' in metadata['certificate_data']:
                node_data['has_valid_cert'] = metadata['certificate_data']['has_valid_cert']

            nodes.append(node_data)

        # Create edges (unchanged from original)
        for source, target, attributes in self.graph.edges(data=True):
            edge_data = {
                'from': source,
                'to': target,
                'label': attributes.get('relationship_type', ''),
                'confidence_score': attributes.get('confidence_score', 0),
                'source_provider': attributes.get('source_provider', ''),
                'discovery_timestamp': attributes.get('discovery_timestamp')
            }

            # Enhanced edge styling based on confidence
            confidence = attributes.get('confidence_score', 0)
            if confidence >= 0.8:
                edge_data['color'] = {
                    'color': '#00ff41',
                    'highlight': '#44ff75',
                    'hover': '#22ff63',
                    'inherit': False
                }
                edge_data['width'] = 4
            elif confidence >= 0.6:
                edge_data['color'] = {
                    'color': '#ff9900',
                    'highlight': '#ffbb44',
                    'hover': '#ffaa22',
                    'inherit': False
                }
                edge_data['width'] = 3
            else:
                edge_data['color'] = {
                    'color': '#666666',
                    'highlight': '#888888',
                    'hover': '#777777',
                    'inherit': False
                }
                edge_data['width'] = 2

            # Add dashed line for low confidence
            if confidence < 0.6:
                edge_data['dashes'] = [5, 5]

            edges.append(edge_data)

        return {
            'nodes': nodes,
            'edges': edges,
            'statistics': {
                'node_count': len(nodes),
                'edge_count': len(edges),
                'creation_time': self.creation_time,
                'last_modified': self.last_modified
            }
        }

    def export_json(self) -> Dict[str, Any]:
        """
        Export complete graph data as JSON for download.

        Returns:
            Dictionary containing complete graph data with metadata
        """
        # Get basic graph data
        graph_data = self.get_graph_data()

        # Add comprehensive metadata
        export_data = {
            'export_metadata': {
                'export_timestamp': datetime.now(timezone.utc).isoformat(),
                'graph_creation_time': self.creation_time,
                'last_modified': self.last_modified,
                'total_nodes': self.graph.number_of_nodes(),
                'total_edges': self.graph.number_of_edges(),
                'graph_format': 'dnsrecon_v1'
            },
            'nodes': graph_data['nodes'],
            'edges': graph_data['edges'],
            'node_types': [node_type.value for node_type in NodeType],
            'relationship_types': [
                {
                    'name': rel_type.relationship_name,
                    'default_confidence': rel_type.default_confidence
                }
                for rel_type in RelationshipType
            ],
            'confidence_distribution': self._get_confidence_distribution()
        }

        return export_data

    def _get_confidence_distribution(self) -> Dict[str, int]:
        """Get distribution of confidence scores."""
        distribution = {'high': 0, 'medium': 0, 'low': 0}

        for _, _, attributes in self.graph.edges(data=True):
            confidence = attributes.get('confidence_score', 0)
            if confidence >= 0.8:
                distribution['high'] += 1
            elif confidence >= 0.6:
                distribution['medium'] += 1
            else:
                distribution['low'] += 1

        return distribution

    def get_statistics(self) -> Dict[str, Any]:
        """
        Get comprehensive graph statistics.

        Returns:
            Dictionary containing various graph metrics
        """
        stats = {
            'basic_metrics': {
                'total_nodes': self.graph.number_of_nodes(),
                'total_edges': self.graph.number_of_edges(),
                'creation_time': self.creation_time,
                'last_modified': self.last_modified
            },
            'node_type_distribution': {},
            'relationship_type_distribution': {},
            'confidence_distribution': self._get_confidence_distribution(),
            'provider_distribution': {}
        }

        # Node type distribution
        for node_type in NodeType:
            count = len(self.get_nodes_by_type(node_type))
            stats['node_type_distribution'][node_type.value] = count

        # Relationship type distribution
        for _, _, attributes in self.graph.edges(data=True):
            rel_type = attributes.get('relationship_type', 'unknown')
            stats['relationship_type_distribution'][rel_type] = \
                stats['relationship_type_distribution'].get(rel_type, 0) + 1

        # Provider distribution
        for _, _, attributes in self.graph.edges(data=True):
            provider = attributes.get('source_provider', 'unknown')
            stats['provider_distribution'][provider] = \
                stats['provider_distribution'].get(provider, 0) + 1

        return stats

    def clear(self) -> None:
        """Clear all nodes and edges from the graph."""
        self.graph.clear()
        self.creation_time = datetime.now(timezone.utc).isoformat()
        self.last_modified = self.creation_time