format keys reduction

This commit is contained in:
overcuriousity
2025-09-16 23:17:23 +02:00
parent 229746e1ec
commit 47ce7ff883
2 changed files with 249 additions and 498 deletions

View File

@@ -4,6 +4,7 @@
Graph data model for DNSRecon using NetworkX.
Manages in-memory graph storage with confidence scoring and forensic metadata.
Now fully compatible with the unified ProviderResult data model.
UPDATED: Fixed certificate styling and correlation edge labeling.
"""
import re
from datetime import datetime, timezone
@@ -40,7 +41,7 @@ class GraphManager:
self.correlation_index = {}
# Compile regex for date filtering for efficiency
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
self.EXCLUDED_KEYS = ['confidence', 'provider', 'timestamp', 'type']
self.EXCLUDED_KEYS = ['confidence', 'provider', 'timestamp', 'type','crtsh_cert_validity_period_days']
def __getstate__(self):
"""Prepare GraphManager for pickling, excluding compiled regex."""
@@ -101,7 +102,7 @@ class GraphManager:
# Add source if not already present (avoid duplicates)
existing_sources = [s for s in self.correlation_index[attr_value]['sources']
if s['node_id'] == node_id and s['path'] == source_info['path']]
if s['node_id'] == node_id and s['path'] == source_info['path']]
if not existing_sources:
self.correlation_index[attr_value]['sources'].append(source_info)
@@ -146,11 +147,8 @@ class GraphManager:
attribute = source['attribute']
if self.graph.has_node(node_id) and not self.graph.has_edge(node_id, correlation_node_id):
# Format relationship label as "provider: attribute"
display_provider = provider
display_attribute = attribute.replace('_', ' ').replace('cert ', '').strip()
relationship_label = f"{display_provider}: {display_attribute}"
# Format relationship label as "corr_provider_attribute"
relationship_label = f"corr_{provider}_{attribute}"
self.add_edge(
source_id=node_id,
@@ -167,46 +165,6 @@ class GraphManager:
print(f"Added correlation edge: {node_id} -> {correlation_node_id} ({relationship_label})")
def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None,
description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
"""
Add a node to the graph, update attributes, and process correlations.
Now compatible with unified data model - attributes are dictionaries from converted StandardAttribute objects.
"""
is_new_node = not self.graph.has_node(node_id)
if is_new_node:
self.graph.add_node(node_id, type=node_type.value,
added_timestamp=datetime.now(timezone.utc).isoformat(),
attributes=attributes or [], # Store as a list from the start
description=description,
metadata=metadata or {})
else:
# Safely merge new attributes into the existing list of attributes
if attributes:
existing_attributes = self.graph.nodes[node_id].get('attributes', [])
# Handle cases where old data might still be in dictionary format
if not isinstance(existing_attributes, list):
existing_attributes = []
# Create a set of existing attribute names for efficient duplicate checking
existing_attr_names = {attr['name'] for attr in existing_attributes}
for new_attr in attributes:
if new_attr['name'] not in existing_attr_names:
existing_attributes.append(new_attr)
existing_attr_names.add(new_attr['name'])
self.graph.nodes[node_id]['attributes'] = existing_attributes
if description:
self.graph.nodes[node_id]['description'] = description
if metadata:
existing_metadata = self.graph.nodes[node_id].get('metadata', {})
existing_metadata.update(metadata)
self.graph.nodes[node_id]['metadata'] = existing_metadata
self.last_modified = datetime.now(timezone.utc).isoformat()
return is_new_node
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
"""
@@ -303,6 +261,47 @@ class GraphManager:
f"across {node_count} nodes"
)
def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None,
description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
"""
Add a node to the graph, update attributes, and process correlations.
Now compatible with unified data model - attributes are dictionaries from converted StandardAttribute objects.
"""
is_new_node = not self.graph.has_node(node_id)
if is_new_node:
self.graph.add_node(node_id, type=node_type.value,
added_timestamp=datetime.now(timezone.utc).isoformat(),
attributes=attributes or [], # Store as a list from the start
description=description,
metadata=metadata or {})
else:
# Safely merge new attributes into the existing list of attributes
if attributes:
existing_attributes = self.graph.nodes[node_id].get('attributes', [])
# Handle cases where old data might still be in dictionary format
if not isinstance(existing_attributes, list):
existing_attributes = []
# Create a set of existing attribute names for efficient duplicate checking
existing_attr_names = {attr['name'] for attr in existing_attributes}
for new_attr in attributes:
if new_attr['name'] not in existing_attr_names:
existing_attributes.append(new_attr)
existing_attr_names.add(new_attr['name'])
self.graph.nodes[node_id]['attributes'] = existing_attributes
if description:
self.graph.nodes[node_id]['description'] = description
if metadata:
existing_metadata = self.graph.nodes[node_id].get('metadata', {})
existing_metadata.update(metadata)
self.graph.nodes[node_id]['metadata'] = existing_metadata
self.last_modified = datetime.now(timezone.utc).isoformat()
return is_new_node
def add_edge(self, source_id: str, target_id: str, relationship_type: str,
confidence_score: float = 0.5, source_provider: str = "unknown",
raw_data: Optional[Dict[str, Any]] = None) -> bool:
@@ -369,11 +368,21 @@ class GraphManager:
# Clean up the correlation index
keys_to_delete = []
for value, nodes in self.correlation_index.items():
if node_id in nodes:
del nodes[node_id]
if not nodes: # If no other nodes are associated with this value, remove it
keys_to_delete.append(value)
for value, data in self.correlation_index.items():
if isinstance(data, dict) and 'nodes' in data:
# Updated correlation structure
if node_id in data['nodes']:
data['nodes'].discard(node_id)
# Remove sources for this node
data['sources'] = [s for s in data['sources'] if s['node_id'] != node_id]
if not data['nodes']: # If no other nodes are associated, remove it
keys_to_delete.append(value)
else:
# Legacy correlation structure (fallback)
if isinstance(data, set) and node_id in data:
data.discard(node_id)
if not data:
keys_to_delete.append(value)
for key in keys_to_delete:
if key in self.correlation_index:
@@ -413,10 +422,10 @@ class GraphManager:
nodes = []
for node_id, attrs in self.graph.nodes(data=True):
node_data = {'id': node_id, 'label': node_id, 'type': attrs.get('type', 'unknown'),
'attributes': attrs.get('attributes', []), # Ensure attributes is a list
'description': attrs.get('description', ''),
'metadata': attrs.get('metadata', {}),
'added_timestamp': attrs.get('added_timestamp')}
'attributes': attrs.get('attributes', []), # Ensure attributes is a list
'description': attrs.get('description', ''),
'metadata': attrs.get('metadata', {}),
'added_timestamp': attrs.get('added_timestamp')}
# UPDATED: Fixed certificate validity styling logic
node_type = node_data['type']
@@ -469,10 +478,10 @@ class GraphManager:
edges = []
for source, target, attrs in self.graph.edges(data=True):
edges.append({'from': source, 'to': target,
'label': attrs.get('relationship_type', ''),
'confidence_score': attrs.get('confidence_score', 0),
'source_provider': attrs.get('source_provider', ''),
'discovery_timestamp': attrs.get('discovery_timestamp')})
'label': attrs.get('relationship_type', ''),
'confidence_score': attrs.get('confidence_score', 0),
'source_provider': attrs.get('source_provider', ''),
'discovery_timestamp': attrs.get('discovery_timestamp')})
return {
'nodes': nodes, 'edges': edges,
'statistics': self.get_statistics()['basic_metrics']