gradient

2025-09-24 09:30:42 +02:00
parent 571912218e
commit 897bb80183
15 changed files with 541 additions and 335 deletions
--- a/core/graph_manager.py
+++ b/core/graph_manager.py
@@ -2,7 +2,7 @@

 """
 Graph data model for DNScope using NetworkX.
-Manages in-memory graph storage with confidence scoring and forensic metadata.
+Manages in-memory graph storage with forensic metadata.
 Now fully compatible with the unified ProviderResult data model.
 UPDATED: Fixed correlation exclusion keys to match actual attribute names.
 UPDATED: Removed export_json() method - now handled by ExportManager.
@@ -31,7 +31,7 @@ class NodeType(Enum):
 class GraphManager:
    """
    Thread-safe graph manager for DNScope infrastructure mapping.
-    Uses NetworkX for in-memory graph storage with confidence scoring.
+    Uses NetworkX for in-memory graph storage.
    Compatible with unified ProviderResult data model.
    """

@@ -83,7 +83,7 @@ class GraphManager:
        return is_new_node

    def add_edge(self, source_id: str, target_id: str, relationship_type: str,
-                confidence_score: float = 0.5, source_provider: str = "unknown",
+                source_provider: str = "unknown",
                raw_data: Optional[Dict[str, Any]] = None) -> bool:
        """
        UPDATED: Add or update an edge between two nodes with raw relationship labels.
@@ -91,23 +91,13 @@ class GraphManager:
        if not self.graph.has_node(source_id) or not self.graph.has_node(target_id):
            return False

-        new_confidence = confidence_score
        
        # UPDATED: Use raw relationship type - no formatting
        edge_label = relationship_type
-        
-        if self.graph.has_edge(source_id, target_id):
-            # If edge exists, update confidence if the new score is higher.
-            if new_confidence > self.graph.edges[source_id, target_id].get('confidence_score', 0):
-                self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
-                self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
-                self.graph.edges[source_id, target_id]['updated_by'] = source_provider
-            return False

        # Add a new edge with raw attributes
        self.graph.add_edge(source_id, target_id,
                            relationship_type=edge_label,
-                            confidence_score=new_confidence,
                            source_provider=source_provider,
                            discovery_timestamp=datetime.now(timezone.utc).isoformat(),
                            raw_data=raw_data or {})
@@ -137,11 +127,6 @@ class GraphManager:
        """Get all nodes of a specific type."""
        return [n for n, d in self.graph.nodes(data=True) if d.get('type') == node_type.value]

-    def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
-        """Get edges with confidence score above a given threshold."""
-        return [(u, v, d) for u, v, d in self.graph.edges(data=True)
-                if d.get('confidence_score', 0) >= min_confidence]
-
    def get_graph_data(self) -> Dict[str, Any]:
        """
        Export graph data formatted for frontend visualization.
@@ -177,7 +162,6 @@ class GraphManager:
                'from': source, 
                'to': target,
                'label': attrs.get('relationship_type', ''),
-                'confidence_score': attrs.get('confidence_score', 0),
                'source_provider': attrs.get('source_provider', ''),
                'discovery_timestamp': attrs.get('discovery_timestamp')
            })
@@ -188,24 +172,6 @@ class GraphManager:
            'statistics': self.get_statistics()['basic_metrics']
        }

-    def _get_confidence_distribution(self) -> Dict[str, int]:
-        """Get distribution of edge confidence scores with empty graph handling."""
-        distribution = {'high': 0, 'medium': 0, 'low': 0}
-        
-        # FIXED: Handle empty graph case
-        if self.get_edge_count() == 0:
-            return distribution
-            
-        for _, _, data in self.graph.edges(data=True):
-            confidence = data.get('confidence_score', 0)
-            if confidence >= 0.8:
-                distribution['high'] += 1
-            elif confidence >= 0.6:
-                distribution['medium'] += 1
-            else:
-                distribution['low'] += 1
-        return distribution
-
    def get_statistics(self) -> Dict[str, Any]:
        """Get comprehensive statistics about the graph with proper empty graph handling."""
        
@@ -222,7 +188,6 @@ class GraphManager:
            },
            'node_type_distribution': {}, 
            'relationship_type_distribution': {},
-            'confidence_distribution': self._get_confidence_distribution(),
            'provider_distribution': {}
        }
        
--- a/core/logger.py
+++ b/core/logger.py
@@ -30,7 +30,6 @@ class RelationshipDiscovery:
    source_node: str
    target_node: str
    relationship_type: str
-    confidence_score: float
    provider: str
    raw_data: Dict[str, Any]
    discovery_method: str
@@ -157,7 +156,7 @@ class ForensicLogger:
            self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
    
    def log_relationship_discovery(self, source_node: str, target_node: str,
-                                 relationship_type: str, confidence_score: float,
+                                 relationship_type: str,
                                 provider: str, raw_data: Dict[str, Any],
                                 discovery_method: str) -> None:
        """
@@ -167,7 +166,6 @@ class ForensicLogger:
            source_node: Source node identifier
            target_node: Target node identifier
            relationship_type: Type of relationship (e.g., 'SAN', 'A_Record')
-            confidence_score: Confidence score (0.0 to 1.0)
            provider: Provider that discovered this relationship
            raw_data: Raw data from provider response
            discovery_method: Method used to discover relationship
@@ -177,7 +175,6 @@ class ForensicLogger:
            source_node=source_node,
            target_node=target_node,
            relationship_type=relationship_type,
-            confidence_score=confidence_score,
            provider=provider,
            raw_data=raw_data,
            discovery_method=discovery_method
@@ -188,7 +185,7 @@ class ForensicLogger:
        
        self.logger.info(
            f"Relationship Discovered - {source_node} -> {target_node} "
-            f"({relationship_type}) - Confidence: {confidence_score:.2f} - Provider: {provider}"
+            f"({relationship_type}) - Provider: {provider}"
        )
    
    def log_scan_start(self, target_domain: str, recursion_depth: int, 
@@ -238,7 +235,6 @@ class ForensicLogger:
                'successful_requests': len([req for req in provider_requests if req.error is None]),
                'failed_requests': len([req for req in provider_requests if req.error is not None]),
                'relationships_discovered': len(provider_relationships),
-                'avg_confidence': sum(rel.confidence_score for rel in provider_relationships) / len(provider_relationships) if provider_relationships else 0
            }
        
        return {
--- a/core/provider_result.py
+++ b/core/provider_result.py
@@ -18,33 +18,19 @@ class StandardAttribute:
    value: Any
    type: str
    provider: str
-    confidence: float
    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    metadata: Optional[Dict[str, Any]] = field(default_factory=dict)

-    def __post_init__(self):
-        """Validate the attribute after initialization."""
-        if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
-            raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
-
-
@dataclass
 class Relationship:
    """A unified data structure for a directional link between two nodes."""
    source_node: str
    target_node: str
    relationship_type: str
-    confidence: float
    provider: str
    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
    raw_data: Optional[Dict[str, Any]] = field(default_factory=dict)

-    def __post_init__(self):
-        """Validate the relationship after initialization."""
-        if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
-            raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
-
-
@dataclass
 class ProviderResult:
    """A container for all data returned by a provider from a single query."""
@@ -52,8 +38,7 @@ class ProviderResult:
    relationships: List[Relationship] = field(default_factory=list)

    def add_attribute(self, target_node: str, name: str, value: Any, attr_type: str, 
-                     provider: str, confidence: float = 0.8, 
-                     metadata: Optional[Dict[str, Any]] = None) -> None:
+                     provider: str, metadata: Optional[Dict[str, Any]] = None) -> None:
        """Helper method to add an attribute to the result."""
        self.attributes.append(StandardAttribute(
            target_node=target_node,
@@ -61,19 +46,16 @@ class ProviderResult:
            value=value,
            type=attr_type,
            provider=provider,
-            confidence=confidence,
            metadata=metadata or {}
        ))

    def add_relationship(self, source_node: str, target_node: str, relationship_type: str,
-                        provider: str, confidence: float = 0.8, 
-                        raw_data: Optional[Dict[str, Any]] = None) -> None:
+                        provider: str, raw_data: Optional[Dict[str, Any]] = None) -> None:
        """Helper method to add a relationship to the result."""
        self.relationships.append(Relationship(
            source_node=source_node,
            target_node=target_node,
            relationship_type=relationship_type,
-            confidence=confidence,
            provider=provider,
            raw_data=raw_data or {}
        ))
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -847,7 +847,6 @@ class Scanner:
                    'source_node': rel.source_node,
                    'target_node': rel.target_node,
                    'relationship_type': rel.relationship_type,
-                    'confidence': rel.confidence,
                    'provider': rel.provider,
                    'raw_data': rel.raw_data
                })
@@ -905,7 +904,6 @@ class Scanner:
                        source_id=rel_data['source_node'],
                        target_id=rel_data['target_node'],
                        relationship_type=rel_data['relationship_type'],
-                        confidence_score=rel_data['confidence'],
                        source_provider=rel_data['provider'],
                        raw_data=rel_data['raw_data']
                    )
@@ -1012,7 +1010,6 @@ class Scanner:
            self.graph.add_edge(
                visual_source, visual_target,
                relationship.relationship_type,
-                relationship.confidence,
                provider_name,
                relationship.raw_data
            )
@@ -1035,7 +1032,7 @@ class Scanner:
        for attribute in provider_result.attributes:
            attr_dict = {
                "name": attribute.name, "value": attribute.value, "type": attribute.type,
-                "provider": attribute.provider, "confidence": attribute.confidence, "metadata": attribute.metadata
+                "provider": attribute.provider, "metadata": attribute.metadata
            }
            attributes_by_node[attribute.target_node].append(attr_dict)