implement new data api

2025-09-16 20:21:08 +02:00
parent 15421dd4a5
commit 97aa18f788
11 changed files with 1206 additions and 1120 deletions
--- a/core/graph_manager.py
+++ b/core/graph_manager.py
@@ -1,8 +1,9 @@
-# core/graph_manager.py
+# dnsrecon-reduced/core/graph_manager.py

 """
 Graph data model for DNSRecon using NetworkX.
 Manages in-memory graph storage with confidence scoring and forensic metadata.
+Now fully compatible with the unified ProviderResult data model.
 """
 import re
 from datetime import datetime, timezone
@@ -28,6 +29,7 @@ class GraphManager:
    """
    Thread-safe graph manager for DNSRecon infrastructure mapping.
    Uses NetworkX for in-memory graph storage with confidence scoring.
+    Compatible with unified ProviderResult data model.
    """

    def __init__(self):
@@ -192,21 +194,36 @@ class GraphManager:
                })
        return all_correlations

-    def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[Dict[str, Any]] = None,
+    def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None,
                description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
-        """Add a node to the graph, update attributes, and process correlations."""
+        """
+        Add a node to the graph, update attributes, and process correlations.
+        Now compatible with unified data model - attributes are dictionaries from converted StandardAttribute objects.
+        """
        is_new_node = not self.graph.has_node(node_id)
        if is_new_node:
            self.graph.add_node(node_id, type=node_type.value,
                                added_timestamp=datetime.now(timezone.utc).isoformat(),
-                                attributes=attributes or {},
+                                attributes=attributes or [],  # Store as a list from the start
                                description=description,
                                metadata=metadata or {})
        else:
-            # Safely merge new attributes into existing attributes
+            # Safely merge new attributes into the existing list of attributes
            if attributes:
-                existing_attributes = self.graph.nodes[node_id].get('attributes', {})
-                existing_attributes.update(attributes)
+                existing_attributes = self.graph.nodes[node_id].get('attributes', [])
+                
+                # Handle cases where old data might still be in dictionary format
+                if not isinstance(existing_attributes, list):
+                    existing_attributes = []
+
+                # Create a set of existing attribute names for efficient duplicate checking
+                existing_attr_names = {attr['name'] for attr in existing_attributes}
+
+                for new_attr in attributes:
+                    if new_attr['name'] not in existing_attr_names:
+                        existing_attributes.append(new_attr)
+                        existing_attr_names.add(new_attr['name'])
+                
                self.graph.nodes[node_id]['attributes'] = existing_attributes
            if description:
                self.graph.nodes[node_id]['description'] = description
@@ -485,19 +502,28 @@ class GraphManager:
                if d.get('confidence_score', 0) >= min_confidence]

    def get_graph_data(self) -> Dict[str, Any]:
-        """Export graph data formatted for frontend visualization."""
+        """
+        Export graph data formatted for frontend visualization.
+        Compatible with unified data model - preserves all attribute information for frontend display.
+        """
        nodes = []
        for node_id, attrs in self.graph.nodes(data=True):
            node_data = {'id': node_id, 'label': node_id, 'type': attrs.get('type', 'unknown'),
-                         'attributes': attrs.get('attributes', {}),
+                         'attributes': attrs.get('attributes', []), # Ensure attributes is a list
                         'description': attrs.get('description', ''),
                         'metadata': attrs.get('metadata', {}),
                         'added_timestamp': attrs.get('added_timestamp')}
+            
            # Customize node appearance based on type and attributes
            node_type = node_data['type']
-            attributes = node_data['attributes']
-            if node_type == 'domain' and attributes.get('certificates', {}).get('has_valid_cert') is False:
-                node_data['color'] = {'background': '#c7c7c7', 'border': '#999'} # Gray for invalid cert
+            attributes_list = node_data['attributes']
+            
+            # CORRECTED LOGIC: Handle certificate validity styling
+            if node_type == 'domain' and isinstance(attributes_list, list):
+                # Find the certificates attribute in the list
+                cert_attr = next((attr for attr in attributes_list if attr.get('name') == 'certificates'), None)
+                if cert_attr and cert_attr.get('value', {}).get('has_valid_cert') is False:
+                    node_data['color'] = {'background': '#c7c7c7', 'border': '#999'} # Gray for invalid cert
            
            # Add incoming and outgoing edges to node data
            if self.graph.has_node(node_id):
@@ -528,7 +554,7 @@ class GraphManager:
                'last_modified': self.last_modified,
                'total_nodes': self.get_node_count(),
                'total_edges': self.get_edge_count(),
-                'graph_format': 'dnsrecon_v1_nodeling'
+                'graph_format': 'dnsrecon_v1_unified_model'
            },
            'graph': graph_data,
            'statistics': self.get_statistics()
--- a/core/provider_result.py
+++ b/core/provider_result.py
@@ -0,0 +1,106 @@
+# dnsrecon-reduced/core/provider_result.py
+
+"""
+Unified data model for DNSRecon passive reconnaissance.
+Standardizes the data structure across all providers to ensure consistent processing.
+"""
+
+from typing import Any, Optional, List, Dict
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+
+
+@dataclass
+class StandardAttribute:
+    """A unified data structure for a single piece of information about a node."""
+    target_node: str
+    name: str
+    value: Any
+    type: str
+    provider: str
+    confidence: float
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    metadata: Optional[Dict[str, Any]] = field(default_factory=dict)
+
+    def __post_init__(self):
+        """Validate the attribute after initialization."""
+        if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
+            raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
+
+
+@dataclass
+class Relationship:
+    """A unified data structure for a directional link between two nodes."""
+    source_node: str
+    target_node: str
+    relationship_type: str
+    confidence: float
+    provider: str
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
+    raw_data: Optional[Dict[str, Any]] = field(default_factory=dict)
+
+    def __post_init__(self):
+        """Validate the relationship after initialization."""
+        if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
+            raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
+
+
+@dataclass
+class ProviderResult:
+    """A container for all data returned by a provider from a single query."""
+    attributes: List[StandardAttribute] = field(default_factory=list)
+    relationships: List[Relationship] = field(default_factory=list)
+
+    def add_attribute(self, target_node: str, name: str, value: Any, attr_type: str, 
+                     provider: str, confidence: float = 0.8, 
+                     metadata: Optional[Dict[str, Any]] = None) -> None:
+        """Helper method to add an attribute to the result."""
+        self.attributes.append(StandardAttribute(
+            target_node=target_node,
+            name=name,
+            value=value,
+            type=attr_type,
+            provider=provider,
+            confidence=confidence,
+            metadata=metadata or {}
+        ))
+
+    def add_relationship(self, source_node: str, target_node: str, relationship_type: str,
+                        provider: str, confidence: float = 0.8, 
+                        raw_data: Optional[Dict[str, Any]] = None) -> None:
+        """Helper method to add a relationship to the result."""
+        self.relationships.append(Relationship(
+            source_node=source_node,
+            target_node=target_node,
+            relationship_type=relationship_type,
+            confidence=confidence,
+            provider=provider,
+            raw_data=raw_data or {}
+        ))
+
+    def get_discovered_nodes(self) -> set:
+        """Get all unique node identifiers discovered in this result."""
+        nodes = set()
+        
+        # Add nodes from relationships
+        for rel in self.relationships:
+            nodes.add(rel.source_node)
+            nodes.add(rel.target_node)
+            
+        # Add nodes from attributes
+        for attr in self.attributes:
+            nodes.add(attr.target_node)
+            
+        return nodes
+
+    def get_relationship_count(self) -> int:
+        """Get the total number of relationships in this result."""
+        return len(self.relationships)
+
+    def get_attribute_count(self) -> int:
+        """Get the total number of attributes in this result."""
+        return len(self.attributes)
+
+    def is_large_entity(self, threshold: int) -> bool:
+        """Check if this result qualifies as a large entity based on relationship count."""
+        return self.get_relationship_count() > threshold
--- a/core/rate_limiter.py
+++ b/core/rate_limiter.py
@@ -1,7 +1,6 @@
 # dnsrecon-reduced/core/rate_limiter.py

 import time
-import redis

 class GlobalRateLimiter:
    def __init__(self, redis_client):
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -2,18 +2,18 @@

 import threading
 import traceback
-import time
 import os
 import importlib
 import redis
 from typing import List, Set, Dict, Any, Tuple, Optional
-from concurrent.futures import ThreadPoolExecutor, as_completed, CancelledError, Future
+from concurrent.futures import ThreadPoolExecutor
 from collections import defaultdict
 from queue import PriorityQueue
 from datetime import datetime, timezone

 from core.graph_manager import GraphManager, NodeType
 from core.logger import get_forensic_logger, new_session
+from core.provider_result import ProviderResult
 from utils.helpers import _is_valid_ip, _is_valid_domain
 from providers.base_provider import BaseProvider
 from core.rate_limiter import GlobalRateLimiter
@@ -30,6 +30,7 @@ class ScanStatus:
 class Scanner:
    """
    Main scanning orchestrator for DNSRecon passive reconnaissance.
+    Now provider-agnostic, consuming standardized ProviderResult objects.
    """

    def __init__(self, session_config=None):
@@ -470,6 +471,10 @@ class Scanner:
            print(f"  - Tasks processed: {len(processed_tasks)}")

    def _query_single_provider_for_target(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]:
+        """
+        Query a single provider and process the unified ProviderResult.
+        Now provider-agnostic - handles any provider that returns ProviderResult.
+        """
        if self._is_stop_requested():
            print(f"Stop requested before querying {provider.get_name()} for {target}")
            return set(), set(), False
@@ -478,21 +483,24 @@ class Scanner:
        target_type = NodeType.IP if is_ip else NodeType.DOMAIN
        print(f"Querying {provider.get_name()} for {target_type.value}: {target} at depth {depth}")

+        # Ensure target node exists in graph
        self.graph.add_node(target, target_type)
        self._initialize_provider_states(target)

        new_targets = set()
        large_entity_members = set()
-        node_attributes = defaultdict(lambda: defaultdict(list))
        provider_successful = True

        try:
-            provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
-            if provider_results is None:
+            # Query provider - now returns unified ProviderResult
+            provider_result = self._query_single_provider_unified(provider, target, is_ip, depth)
+            
+            if provider_result is None:
                provider_successful = False
            elif not self._is_stop_requested():
-                discovered, is_large_entity = self._process_provider_results(
-                    target, provider, provider_results, node_attributes, depth
+                # Process the unified result
+                discovered, is_large_entity = self._process_provider_result_unified(
+                    target, provider, provider_result, depth
                )
                if is_large_entity:
                    large_entity_members.update(discovered)
@@ -504,15 +512,177 @@ class Scanner:
            provider_successful = False
            self._log_provider_error(target, provider.get_name(), str(e))

-        if not self._is_stop_requested():
-            for node_id, attributes in node_attributes.items():
-                if self.graph.graph.has_node(node_id):
-                    node_is_ip = _is_valid_ip(node_id)
-                    node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
-                    self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
-
        return new_targets, large_entity_members, provider_successful

+    def _query_single_provider_unified(self, provider: BaseProvider, target: str, is_ip: bool, current_depth: int) -> Optional[ProviderResult]:
+        """
+        Query a single provider with stop signal checking, now returns ProviderResult.
+        """
+        provider_name = provider.get_name()
+        start_time = datetime.now(timezone.utc)
+        
+        if self._is_stop_requested():
+            print(f"Stop requested before querying {provider_name} for {target}")
+            return None
+        
+        print(f"Querying {provider_name} for {target}")
+        
+        self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}")
+        
+        try:
+            # Query the provider - returns unified ProviderResult
+            if is_ip:
+                result = provider.query_ip(target)
+            else:
+                result = provider.query_domain(target)
+            
+            if self._is_stop_requested():
+                print(f"Stop requested after querying {provider_name} for {target}")
+                return None
+            
+            # Update provider state with relationship count (more meaningful than raw result count)
+            relationship_count = result.get_relationship_count() if result else 0
+            self._update_provider_state(target, provider_name, 'success', relationship_count, None, start_time)
+            
+            print(f"✓ {provider_name} returned {relationship_count} relationships for {target}")
+            return result
+            
+        except Exception as e:
+            self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
+            print(f"✗ {provider_name} failed for {target}: {e}")
+            return None
+
+    def _process_provider_result_unified(self, target: str, provider: BaseProvider, 
+                                       provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
+        """
+        Process a unified ProviderResult object to update the graph.
+        Returns (discovered_targets, is_large_entity).
+        """
+        provider_name = provider.get_name()
+        discovered_targets = set()
+
+        if self._is_stop_requested():
+            print(f"Stop requested before processing results from {provider_name} for {target}")
+            return discovered_targets, False
+
+        # Check for large entity based on relationship count
+        if provider_result.get_relationship_count() > self.config.large_entity_threshold:
+            print(f"Large entity detected: {provider_name} returned {provider_result.get_relationship_count()} relationships for {target}")
+            members = self._create_large_entity_from_provider_result(target, provider_name, provider_result, current_depth)
+            return members, True
+
+        # Process relationships
+        for i, relationship in enumerate(provider_result.relationships):
+            if i % 5 == 0 and self._is_stop_requested():  # Check periodically for stop
+                print(f"Stop requested while processing relationships from {provider_name} for {target}")
+                break
+
+            # Add nodes for relationship endpoints
+            source_node = relationship.source_node
+            target_node = relationship.target_node
+            
+            # Determine node types
+            source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
+            if target_node.startswith('AS') and target_node[2:].isdigit():
+                target_type = NodeType.ASN
+            elif _is_valid_ip(target_node):
+                target_type = NodeType.IP
+            else:
+                target_type = NodeType.DOMAIN
+            
+            # Add nodes to graph
+            self.graph.add_node(source_node, source_type)
+            self.graph.add_node(target_node, target_type)
+            
+            # Add edge to graph
+            if self.graph.add_edge(
+                source_node, target_node, 
+                relationship.relationship_type, 
+                relationship.confidence, 
+                provider_name, 
+                relationship.raw_data
+            ):
+                print(f"Added relationship: {source_node} -> {target_node} ({relationship.relationship_type})")
+            
+            # Track discovered targets for further processing
+            if _is_valid_domain(target_node) or _is_valid_ip(target_node):
+                discovered_targets.add(target_node)
+
+        # Process attributes, preserving them as a list of objects
+        attributes_by_node = defaultdict(list)
+        for attribute in provider_result.attributes:
+            # Convert the StandardAttribute object to a dictionary that the frontend can use
+            attr_dict = {
+                "name": attribute.name,
+                "value": attribute.value,
+                "type": attribute.type,
+                "provider": attribute.provider,
+                "confidence": attribute.confidence,
+                "metadata": attribute.metadata
+            }
+            attributes_by_node[attribute.target_node].append(attr_dict)
+
+        # Add attributes to nodes
+        for node_id, node_attributes_list in attributes_by_node.items():
+            if self.graph.graph.has_node(node_id):
+                # Determine node type
+                if _is_valid_ip(node_id):
+                    node_type = NodeType.IP
+                elif node_id.startswith('AS') and node_id[2:].isdigit():
+                    node_type = NodeType.ASN
+                else:
+                    node_type = NodeType.DOMAIN
+                
+                # Add node with the list of attributes
+                self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
+
+        return discovered_targets, False
+
+    def _create_large_entity_from_provider_result(self, source: str, provider_name: str, 
+                                                provider_result: ProviderResult, current_depth: int) -> Set[str]:
+        """
+        Create a large entity node from a ProviderResult and return the members for DNS processing.
+        """
+        entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}"
+        
+        # Extract target nodes from relationships
+        targets = [rel.target_node for rel in provider_result.relationships]
+        node_type = 'unknown'
+        
+        if targets:
+            if _is_valid_domain(targets[0]):
+                node_type = 'domain'
+            elif _is_valid_ip(targets[0]):
+                node_type = 'ip'
+        
+        # Create nodes in graph (they exist but are grouped)
+        for target in targets:
+            target_node_type = NodeType.DOMAIN if node_type == 'domain' else NodeType.IP
+            self.graph.add_node(target, target_node_type)
+
+        attributes = {
+            'count': len(targets),
+            'nodes': targets,
+            'node_type': node_type,
+            'source_provider': provider_name,
+            'discovery_depth': current_depth,
+            'threshold_exceeded': self.config.large_entity_threshold,
+        }
+        description = f'Large entity created due to {len(targets)} relationships from {provider_name}'
+        
+        self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes, description=description)
+        
+        # Create edge from source to large entity
+        if provider_result.relationships:
+            rel_type = provider_result.relationships[0].relationship_type
+            self.graph.add_edge(source, entity_id, rel_type, 0.9, provider_name, 
+                              {'large_entity_info': f'Contains {len(targets)} {node_type}s'})
+        
+        self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}")
+        print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}")
+        
+        return set(targets)
+
    def stop_scan(self) -> bool:
        """Request immediate scan termination with proper cleanup."""
        try:
@@ -558,6 +728,73 @@ class Scanner:
            traceback.print_exc()
            return False

+    def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
+        """
+        Extracts a node from a large entity, re-creates its original edge, and
+        re-queues it for full scanning.
+        """
+        if not self.graph.graph.has_node(large_entity_id):
+            print(f"ERROR: Large entity {large_entity_id} not found.")
+            return False
+
+        # 1. Get the original source node that discovered the large entity
+        predecessors = list(self.graph.graph.predecessors(large_entity_id))
+        if not predecessors:
+            print(f"ERROR: No source node found for large entity {large_entity_id}.")
+            return False
+        source_node_id = predecessors[0]
+        
+        # Get the original edge data to replicate it for the extracted node
+        original_edge_data = self.graph.graph.get_edge_data(source_node_id, large_entity_id)
+        if not original_edge_data:
+             print(f"ERROR: Could not find original edge data from {source_node_id} to {large_entity_id}.")
+             return False
+
+        # 2. Modify the graph data structure first
+        success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract)
+        if not success:
+            print(f"ERROR: Node {node_id_to_extract} could not be removed from {large_entity_id}'s attributes.")
+            return False
+
+        # 3. Create the direct edge from the original source to the newly extracted node
+        print(f"Re-creating direct edge from {source_node_id} to extracted node {node_id_to_extract}")
+        self.graph.add_edge(
+            source_id=source_node_id,
+            target_id=node_id_to_extract,
+            relationship_type=original_edge_data.get('relationship_type', 'extracted_from_large_entity'),
+            confidence_score=original_edge_data.get('confidence_score', 0.85), # Slightly lower confidence
+            source_provider=original_edge_data.get('source_provider', 'unknown'),
+            raw_data={'context': f'Extracted from large entity {large_entity_id}'}
+        )
+
+        # 4. Re-queue the extracted node for full processing by all eligible providers
+        print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...")
+        is_ip = _is_valid_ip(node_id_to_extract)
+        current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0)
+        
+        eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
+        for provider in eligible_providers:
+            provider_name = provider.get_name()
+            self.task_queue.put((self._get_priority(provider_name), (provider_name, node_id_to_extract, current_depth)))
+            self.total_tasks_ever_enqueued += 1
+
+        # 5. If the scanner is not running, we need to kickstart it to process this one item.
+        if self.status != ScanStatus.RUNNING:
+            print("Scanner is idle. Starting a mini-scan to process the extracted node.")
+            self.status = ScanStatus.RUNNING
+            self._update_session_state()
+            
+            if not self.scan_thread or not self.scan_thread.is_alive():
+                 self.scan_thread = threading.Thread(
+                    target=self._execute_scan,
+                    args=(self.current_target, self.max_depth),
+                    daemon=True
+                )
+                 self.scan_thread.start()
+
+        print(f"Successfully extracted and re-queued {node_id_to_extract} from {large_entity_id}.")
+        return True
+
    def _update_session_state(self) -> None:
        """
        Update the scanner state in Redis for GUI updates.
@@ -656,39 +893,6 @@ class Scanner:
        provider_state = provider_states.get(provider_name)
        return provider_state is not None and provider_state.get('status') == 'success'

-    def _query_single_provider_forensic(self, provider, target: str, is_ip: bool, current_depth: int) -> Optional[List]:
-        """Query a single provider with stop signal checking."""
-        provider_name = provider.get_name()
-        start_time = datetime.now(timezone.utc)
-        
-        if self._is_stop_requested():
-            print(f"Stop requested before querying {provider_name} for {target}")
-            return None
-        
-        print(f"Querying {provider_name} for {target}")
-        
-        self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}")
-        
-        try:
-            if is_ip:
-                results = provider.query_ip(target)
-            else:
-                results = provider.query_domain(target)
-            
-            if self._is_stop_requested():
-                print(f"Stop requested after querying {provider_name} for {target}")
-                return None
-            
-            self._update_provider_state(target, provider_name, 'success', len(results), None, start_time)
-            
-            print(f"✓ {provider_name} returned {len(results)} results for {target}")
-            return results
-            
-        except Exception as e:
-            self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
-            print(f"✗ {provider_name} failed for {target}: {e}")
-            return None
-
    def _update_provider_state(self, target: str, provider_name: str, status: str, 
                              results_count: int, error: Optional[str], start_time: datetime) -> None:
        """Update provider state in node metadata for forensic tracking."""
@@ -711,237 +915,6 @@ class Scanner:
        
        self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)")

-    def _process_provider_results(self, target: str, provider, results: List,
-                                        node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]:
-        """Process provider results, returns (discovered_targets, is_large_entity)."""
-        provider_name = provider.get_name()
-        discovered_targets = set()
-
-        if self._is_stop_requested():
-            print(f"Stop requested before processing results from {provider_name} for {target}")
-            return discovered_targets, False
-
-        if len(results) > self.config.large_entity_threshold:
-            print(f"Large entity detected: {provider_name} returned {len(results)} results for {target}")
-            members = self._create_large_entity(target, provider_name, results, current_depth)
-            return members, True
-
-        for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results):
-            if i % 5 == 0 and self._is_stop_requested():  # Check more frequently
-                print(f"Stop requested while processing results from {provider_name} for {target}")
-                break
-
-            self.logger.log_relationship_discovery(
-                source_node=source,
-                target_node=rel_target,
-                relationship_type=rel_type,
-                confidence_score=confidence,
-                provider=provider_name,
-                raw_data=raw_data,
-                discovery_method=f"{provider_name}_query_depth_{current_depth}"
-            )
-
-            # Collect attributes for the source node
-            self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
-
-            # If the relationship is asn_membership, collect attributes for the target ASN node
-            if rel_type == 'asn_membership':
-                self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
-
-
-            if isinstance(rel_target, list):
-                # If the target is a list, iterate and process each item
-                for single_target in rel_target:
-                    if _is_valid_ip(single_target):
-                        self.graph.add_node(single_target, NodeType.IP)
-                        if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
-                            print(f"Added IP relationship: {source} -> {single_target} ({rel_type})")
-                        discovered_targets.add(single_target)
-                    elif _is_valid_domain(single_target):
-                        self.graph.add_node(single_target, NodeType.DOMAIN)
-                        if self.graph.add_edge(source, single_target, rel_type, confidence, provider_name, raw_data):
-                            print(f"Added domain relationship: {source} -> {single_target} ({rel_type})")
-                        discovered_targets.add(single_target)
-                        self._collect_node_attributes(single_target, provider_name, rel_type, source, raw_data, node_attributes[single_target])
-
-            elif _is_valid_ip(rel_target):
-                self.graph.add_node(rel_target, NodeType.IP)
-                if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
-                    print(f"Added IP relationship: {source} -> {rel_target} ({rel_type})")
-                discovered_targets.add(rel_target)
-
-            elif rel_target.startswith('AS') and rel_target[2:].isdigit():
-                self.graph.add_node(rel_target, NodeType.ASN)
-                if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
-                    print(f"Added ASN relationship: {source} -> {rel_target} ({rel_type})")
-
-            elif _is_valid_domain(rel_target):
-                self.graph.add_node(rel_target, NodeType.DOMAIN)
-                if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
-                    print(f"Added domain relationship: {source} -> {rel_target} ({rel_type})")
-                discovered_targets.add(rel_target)
-                self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
-            
-            else:
-                self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
-
-        return discovered_targets, False
-
-    def _create_large_entity(self, source: str, provider_name: str, results: List, current_depth: int) -> Set[str]:
-        """Create a large entity node and returns the members for DNS processing."""
-        entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}"
-        
-        targets = [rel[1] for rel in results if len(rel) > 1]
-        node_type = 'unknown'
-        
-        if targets:
-            if _is_valid_domain(targets[0]):
-                node_type = 'domain'
-            elif _is_valid_ip(targets[0]):
-                node_type = 'ip'
-        
-        # We still create the nodes so they exist in the graph, they are just not processed for edges yet.
-        for target in targets:
-            self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP)
-
-        attributes = {
-            'count': len(targets),
-            'nodes': targets,
-            'node_type': node_type,
-            'source_provider': provider_name,
-            'discovery_depth': current_depth,
-            'threshold_exceeded': self.config.large_entity_threshold,
-        }
-        description = f'Large entity created due to {len(targets)} results from {provider_name}'
-        
-        self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes, description=description)
-        
-        if results:
-            rel_type = results[0][2]
-            self.graph.add_edge(source, entity_id, rel_type, 0.9, provider_name, 
-                              {'large_entity_info': f'Contains {len(targets)} {node_type}s'})
-        
-        self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}")
-        print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}")
-        
-        return set(targets)
-
-    def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
-        """
-        Extracts a node from a large entity, re-creates its original edge, and
-        re-queues it for full scanning.
-        """
-        if not self.graph.graph.has_node(large_entity_id):
-            print(f"ERROR: Large entity {large_entity_id} not found.")
-            return False
-
-        # 1. Get the original source node that discovered the large entity
-        predecessors = list(self.graph.graph.predecessors(large_entity_id))
-        if not predecessors:
-            print(f"ERROR: No source node found for large entity {large_entity_id}.")
-            return False
-        source_node_id = predecessors[0]
-        
-        # Get the original edge data to replicate it for the extracted node
-        original_edge_data = self.graph.graph.get_edge_data(source_node_id, large_entity_id)
-        if not original_edge_data:
-             print(f"ERROR: Could not find original edge data from {source_node_id} to {large_entity_id}.")
-             return False
-
-        # 2. Modify the graph data structure first
-        success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract)
-        if not success:
-            print(f"ERROR: Node {node_id_to_extract} could not be removed from {large_entity_id}'s attributes.")
-            return False
-
-        # 3. Create the direct edge from the original source to the newly extracted node
-        print(f"Re-creating direct edge from {source_node_id} to extracted node {node_id_to_extract}")
-        self.graph.add_edge(
-            source_id=source_node_id,
-            target_id=node_id_to_extract,
-            relationship_type=original_edge_data.get('relationship_type', 'extracted_from_large_entity'),
-            confidence_score=original_edge_data.get('confidence_score', 0.85), # Slightly lower confidence
-            source_provider=original_edge_data.get('source_provider', 'unknown'),
-            raw_data={'context': f'Extracted from large entity {large_entity_id}'}
-        )
-
-        # 4. Re-queue the extracted node for full processing by all eligible providers
-        print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...")
-        is_ip = _is_valid_ip(node_id_to_extract)
-        current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0)
-        
-        eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
-        for provider in eligible_providers:
-            provider_name = provider.get_name()
-            self.task_queue.put((self._get_priority(provider_name), (provider_name, node_id_to_extract, current_depth)))
-            self.total_tasks_ever_enqueued += 1
-
-        # 5. If the scanner is not running, we need to kickstart it to process this one item.
-        if self.status != ScanStatus.RUNNING:
-            print("Scanner is idle. Starting a mini-scan to process the extracted node.")
-            self.status = ScanStatus.RUNNING
-            self._update_session_state()
-            
-            if not self.scan_thread or not self.scan_thread.is_alive():
-                 self.scan_thread = threading.Thread(
-                    target=self._execute_scan,
-                    args=(self.current_target, self.max_depth),
-                    daemon=True
-                )
-                 self.scan_thread.start()
-
-        print(f"Successfully extracted and re-queued {node_id_to_extract} from {large_entity_id}.")
-        return True
-
-    def _collect_node_attributes(self, node_id: str, provider_name: str, rel_type: str,
-                                    target: str, raw_data: Dict[str, Any], attributes: Dict[str, Any]) -> None:
-        """Collect and organize attributes for a node."""
-        self.logger.logger.debug(f"Collecting attributes for {node_id} from {provider_name}: {rel_type}")
-
-        if provider_name == 'dns':
-            record_type = raw_data.get('query_type', 'UNKNOWN')
-            value = raw_data.get('value', target)
-            dns_entry = f"{record_type}: {value}"
-            if dns_entry not in attributes.get('dns_records', []):
-                attributes.setdefault('dns_records', []).append(dns_entry)
-
-        elif provider_name == 'crtsh':
-            if rel_type == "san_certificate":
-                domain_certs = raw_data.get('domain_certificates', {})
-                if node_id in domain_certs:
-                    cert_summary = domain_certs[node_id]
-                    attributes['certificates'] = cert_summary
-                    if target not in attributes.get('related_domains_san', []):
-                        attributes.setdefault('related_domains_san', []).append(target)
-
-        elif provider_name == 'shodan':
-            # This logic will now apply to the correct node (ASN or IP)
-            shodan_attributes = attributes.setdefault('shodan', {})
-            for key, value in raw_data.items():
-                if key not in shodan_attributes or not shodan_attributes.get(key):
-                    shodan_attributes[key] = value
-
-            if _is_valid_ip(node_id):
-                if 'ports' in raw_data:
-                    attributes['ports'] = raw_data['ports']
-                if 'os' in raw_data and raw_data['os']:
-                    attributes['os'] = raw_data['os']
-
-        if rel_type == "asn_membership":
-            # This is the key change: these attributes are for the target (the ASN),
-            # not the source (the IP). We will add them to the ASN node later.
-            pass
-        
-        record_type_name = rel_type
-        if record_type_name not in attributes:
-            attributes[record_type_name] = []
-        
-        if isinstance(target, list):
-            attributes[record_type_name].extend(target)
-        else:
-            if target not in attributes[record_type_name]:
-                attributes[record_type_name].append(target)
-
    def _log_target_processing_error(self, target: str, error: str) -> None:
        """Log target processing errors for forensic trail."""
        self.logger.logger.error(f"Target processing failed for {target}: {error}")
--- a/core/session_manager.py
+++ b/core/session_manager.py
@@ -5,15 +5,11 @@ import time
 import uuid
 import redis
 import pickle
-from typing import Dict, Optional, Any, List
+from typing import Dict, Optional, Any

 from core.scanner import Scanner
 from config import config

-# WARNING: Using pickle can be a security risk if the data source is not trusted.
-# In this case, we are only serializing/deserializing our own trusted Scanner objects,
-# which is generally safe. Do not unpickle data from untrusted sources.
-
 class SessionManager:
    """
    Manages multiple scanner instances for concurrent user sessions using Redis.