This commit is contained in:
overcuriousity
2025-09-24 09:30:42 +02:00
parent 571912218e
commit 897bb80183
15 changed files with 541 additions and 335 deletions

View File

@@ -2,7 +2,7 @@
"""
Graph data model for DNScope using NetworkX.
Manages in-memory graph storage with confidence scoring and forensic metadata.
Manages in-memory graph storage with forensic metadata.
Now fully compatible with the unified ProviderResult data model.
UPDATED: Fixed correlation exclusion keys to match actual attribute names.
UPDATED: Removed export_json() method - now handled by ExportManager.
@@ -31,7 +31,7 @@ class NodeType(Enum):
class GraphManager:
"""
Thread-safe graph manager for DNScope infrastructure mapping.
Uses NetworkX for in-memory graph storage with confidence scoring.
Uses NetworkX for in-memory graph storage.
Compatible with unified ProviderResult data model.
"""
@@ -83,7 +83,7 @@ class GraphManager:
return is_new_node
def add_edge(self, source_id: str, target_id: str, relationship_type: str,
confidence_score: float = 0.5, source_provider: str = "unknown",
source_provider: str = "unknown",
raw_data: Optional[Dict[str, Any]] = None) -> bool:
"""
UPDATED: Add or update an edge between two nodes with raw relationship labels.
@@ -91,23 +91,13 @@ class GraphManager:
if not self.graph.has_node(source_id) or not self.graph.has_node(target_id):
return False
new_confidence = confidence_score
# UPDATED: Use raw relationship type - no formatting
edge_label = relationship_type
if self.graph.has_edge(source_id, target_id):
# If edge exists, update confidence if the new score is higher.
if new_confidence > self.graph.edges[source_id, target_id].get('confidence_score', 0):
self.graph.edges[source_id, target_id]['confidence_score'] = new_confidence
self.graph.edges[source_id, target_id]['updated_timestamp'] = datetime.now(timezone.utc).isoformat()
self.graph.edges[source_id, target_id]['updated_by'] = source_provider
return False
# Add a new edge with raw attributes
self.graph.add_edge(source_id, target_id,
relationship_type=edge_label,
confidence_score=new_confidence,
source_provider=source_provider,
discovery_timestamp=datetime.now(timezone.utc).isoformat(),
raw_data=raw_data or {})
@@ -137,11 +127,6 @@ class GraphManager:
"""Get all nodes of a specific type."""
return [n for n, d in self.graph.nodes(data=True) if d.get('type') == node_type.value]
def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
"""Get edges with confidence score above a given threshold."""
return [(u, v, d) for u, v, d in self.graph.edges(data=True)
if d.get('confidence_score', 0) >= min_confidence]
def get_graph_data(self) -> Dict[str, Any]:
"""
Export graph data formatted for frontend visualization.
@@ -177,7 +162,6 @@ class GraphManager:
'from': source,
'to': target,
'label': attrs.get('relationship_type', ''),
'confidence_score': attrs.get('confidence_score', 0),
'source_provider': attrs.get('source_provider', ''),
'discovery_timestamp': attrs.get('discovery_timestamp')
})
@@ -188,24 +172,6 @@ class GraphManager:
'statistics': self.get_statistics()['basic_metrics']
}
def _get_confidence_distribution(self) -> Dict[str, int]:
"""Get distribution of edge confidence scores with empty graph handling."""
distribution = {'high': 0, 'medium': 0, 'low': 0}
# FIXED: Handle empty graph case
if self.get_edge_count() == 0:
return distribution
for _, _, data in self.graph.edges(data=True):
confidence = data.get('confidence_score', 0)
if confidence >= 0.8:
distribution['high'] += 1
elif confidence >= 0.6:
distribution['medium'] += 1
else:
distribution['low'] += 1
return distribution
def get_statistics(self) -> Dict[str, Any]:
"""Get comprehensive statistics about the graph with proper empty graph handling."""
@@ -222,7 +188,6 @@ class GraphManager:
},
'node_type_distribution': {},
'relationship_type_distribution': {},
'confidence_distribution': self._get_confidence_distribution(),
'provider_distribution': {}
}

View File

@@ -30,7 +30,6 @@ class RelationshipDiscovery:
source_node: str
target_node: str
relationship_type: str
confidence_score: float
provider: str
raw_data: Dict[str, Any]
discovery_method: str
@@ -157,7 +156,7 @@ class ForensicLogger:
self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
def log_relationship_discovery(self, source_node: str, target_node: str,
relationship_type: str, confidence_score: float,
relationship_type: str,
provider: str, raw_data: Dict[str, Any],
discovery_method: str) -> None:
"""
@@ -167,7 +166,6 @@ class ForensicLogger:
source_node: Source node identifier
target_node: Target node identifier
relationship_type: Type of relationship (e.g., 'SAN', 'A_Record')
confidence_score: Confidence score (0.0 to 1.0)
provider: Provider that discovered this relationship
raw_data: Raw data from provider response
discovery_method: Method used to discover relationship
@@ -177,7 +175,6 @@ class ForensicLogger:
source_node=source_node,
target_node=target_node,
relationship_type=relationship_type,
confidence_score=confidence_score,
provider=provider,
raw_data=raw_data,
discovery_method=discovery_method
@@ -188,7 +185,7 @@ class ForensicLogger:
self.logger.info(
f"Relationship Discovered - {source_node} -> {target_node} "
f"({relationship_type}) - Confidence: {confidence_score:.2f} - Provider: {provider}"
f"({relationship_type}) - Provider: {provider}"
)
def log_scan_start(self, target_domain: str, recursion_depth: int,
@@ -238,7 +235,6 @@ class ForensicLogger:
'successful_requests': len([req for req in provider_requests if req.error is None]),
'failed_requests': len([req for req in provider_requests if req.error is not None]),
'relationships_discovered': len(provider_relationships),
'avg_confidence': sum(rel.confidence_score for rel in provider_relationships) / len(provider_relationships) if provider_relationships else 0
}
return {

View File

@@ -18,33 +18,19 @@ class StandardAttribute:
value: Any
type: str
provider: str
confidence: float
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
metadata: Optional[Dict[str, Any]] = field(default_factory=dict)
def __post_init__(self):
"""Validate the attribute after initialization."""
if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
@dataclass
class Relationship:
"""A unified data structure for a directional link between two nodes."""
source_node: str
target_node: str
relationship_type: str
confidence: float
provider: str
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
raw_data: Optional[Dict[str, Any]] = field(default_factory=dict)
def __post_init__(self):
"""Validate the relationship after initialization."""
if not isinstance(self.confidence, (int, float)) or not 0.0 <= self.confidence <= 1.0:
raise ValueError(f"Confidence must be between 0.0 and 1.0, got {self.confidence}")
@dataclass
class ProviderResult:
"""A container for all data returned by a provider from a single query."""
@@ -52,8 +38,7 @@ class ProviderResult:
relationships: List[Relationship] = field(default_factory=list)
def add_attribute(self, target_node: str, name: str, value: Any, attr_type: str,
provider: str, confidence: float = 0.8,
metadata: Optional[Dict[str, Any]] = None) -> None:
provider: str, metadata: Optional[Dict[str, Any]] = None) -> None:
"""Helper method to add an attribute to the result."""
self.attributes.append(StandardAttribute(
target_node=target_node,
@@ -61,19 +46,16 @@ class ProviderResult:
value=value,
type=attr_type,
provider=provider,
confidence=confidence,
metadata=metadata or {}
))
def add_relationship(self, source_node: str, target_node: str, relationship_type: str,
provider: str, confidence: float = 0.8,
raw_data: Optional[Dict[str, Any]] = None) -> None:
provider: str, raw_data: Optional[Dict[str, Any]] = None) -> None:
"""Helper method to add a relationship to the result."""
self.relationships.append(Relationship(
source_node=source_node,
target_node=target_node,
relationship_type=relationship_type,
confidence=confidence,
provider=provider,
raw_data=raw_data or {}
))

View File

@@ -847,7 +847,6 @@ class Scanner:
'source_node': rel.source_node,
'target_node': rel.target_node,
'relationship_type': rel.relationship_type,
'confidence': rel.confidence,
'provider': rel.provider,
'raw_data': rel.raw_data
})
@@ -905,7 +904,6 @@ class Scanner:
source_id=rel_data['source_node'],
target_id=rel_data['target_node'],
relationship_type=rel_data['relationship_type'],
confidence_score=rel_data['confidence'],
source_provider=rel_data['provider'],
raw_data=rel_data['raw_data']
)
@@ -1012,7 +1010,6 @@ class Scanner:
self.graph.add_edge(
visual_source, visual_target,
relationship.relationship_type,
relationship.confidence,
provider_name,
relationship.raw_data
)
@@ -1035,7 +1032,7 @@ class Scanner:
for attribute in provider_result.attributes:
attr_dict = {
"name": attribute.name, "value": attribute.value, "type": attribute.type,
"provider": attribute.provider, "confidence": attribute.confidence, "metadata": attribute.metadata
"provider": attribute.provider, "metadata": attribute.metadata
}
attributes_by_node[attribute.target_node].append(attr_dict)