UX improvements

This commit is contained in:
overcuriousity
2025-09-17 21:12:11 +02:00
parent d0ee415f0d
commit 8ae4fdbf80
11 changed files with 905 additions and 360 deletions

View File

@@ -4,7 +4,7 @@
Graph data model for DNSRecon using NetworkX.
Manages in-memory graph storage with confidence scoring and forensic metadata.
Now fully compatible with the unified ProviderResult data model.
UPDATED: Fixed certificate styling and correlation edge labeling.
UPDATED: Fixed correlation exclusion keys to match actual attribute names.
"""
import re
from datetime import datetime, timezone
@@ -41,7 +41,30 @@ class GraphManager:
self.correlation_index = {}
# Compile regex for date filtering for efficiency
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
self.EXCLUDED_KEYS = ['crtsh_cert_validity_period_days','crtsh_cert_source','crtsh_cert_common_name']
# These are the actual attribute names created in providers, WITHOUT provider prefix
self.EXCLUDED_KEYS = [
# Certificate metadata that creates noise
'cert_source', # Always 'crtsh' for crtsh provider
'cert_common_name',
'cert_validity_period_days', # Numerical, not useful for correlation
#'cert_certificate_id', # Unique per certificate
#'cert_serial_number', # Unique per certificate
'cert_entry_timestamp', # Timestamp, filtered by date regex anyway
'cert_not_before', # Date, filtered by date regex anyway
'cert_not_after', # Date, filtered by date regex anyway
# DNS metadata that creates noise
'dns_ttl', # TTL values are not meaningful for correlation
# Shodan metadata that might create noise
'timestamp', # Generic timestamp fields
'last_update', # Generic timestamp fields
#'org', # Too generic, causes false correlations
#'isp', # Too generic, causes false correlations
# Generic noisy attributes
'updated_timestamp', # Any timestamp field
'discovery_timestamp', # Any timestamp field
'query_timestamp', # Any timestamp field
]
def __getstate__(self):
"""Prepare GraphManager for pickling, excluding compiled regex."""
@@ -72,14 +95,31 @@ class GraphManager:
attr_value = attr.get('value')
attr_provider = attr.get('provider', 'unknown')
# Skip excluded attributes and invalid values
if any(excluded_key in attr_name for excluded_key in self.EXCLUDED_KEYS) or not isinstance(attr_value, (str, int, float, bool)) or attr_value is None:
continue
# IMPROVED: More comprehensive exclusion logic
should_exclude = (
# Check against excluded keys (exact match or substring)
any(excluded_key in attr_name or attr_name == excluded_key for excluded_key in self.EXCLUDED_KEYS) or
# Invalid value types
not isinstance(attr_value, (str, int, float, bool)) or
attr_value is None or
# Boolean values are not useful for correlation
isinstance(attr_value, bool) or
# String values that are too short or are dates
(isinstance(attr_value, str) and (
len(attr_value) < 4 or
self.date_pattern.match(attr_value) or
# Exclude common generic values that create noise
attr_value.lower() in ['unknown', 'none', 'null', 'n/a', 'true', 'false', '0', '1']
)) or
# Numerical values that are likely to be unique identifiers
(isinstance(attr_value, (int, float)) and (
attr_value == 0 or # Zero values are not meaningful
attr_value == 1 or # One values are too common
abs(attr_value) > 1000000 # Very large numbers are likely IDs
))
)
if isinstance(attr_value, bool):
continue
if isinstance(attr_value, str) and (len(attr_value) < 4 or self.date_pattern.match(attr_value)):
if should_exclude:
continue
# Initialize correlation tracking for this value
@@ -149,7 +189,7 @@ class GraphManager:
if self.graph.has_node(node_id) and not self.graph.has_edge(node_id, correlation_node_id):
# Format relationship label as "corr_provider_attribute"
relationship_label = f"{provider}_{attribute}"
relationship_label = f"corr_{provider}_{attribute}"
self.add_edge(
source_id=node_id,
@@ -170,7 +210,7 @@ class GraphManager:
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
"""
Check if there's a direct edge between two nodes in either direction.
Returns True if node_anode_b OR node_bnode_a exists.
Returns True if node_aâ†'node_b OR node_bâ†'node_a exists.
"""
return (self.graph.has_edge(node_a, node_b) or
self.graph.has_edge(node_b, node_a))
@@ -410,12 +450,6 @@ class GraphManager:
"""Get all nodes of a specific type."""
return [n for n, d in self.graph.nodes(data=True) if d.get('type') == node_type.value]
def get_neighbors(self, node_id: str) -> List[str]:
"""Get all unique neighbors (predecessors and successors) for a node."""
if not self.graph.has_node(node_id):
return []
return list(set(self.graph.predecessors(node_id)) | set(self.graph.successors(node_id)))
def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
"""Get edges with confidence score above a given threshold."""
return [(u, v, d) for u, v, d in self.graph.edges(data=True)

View File

@@ -101,6 +101,7 @@ class ProviderResult:
"""Get the total number of attributes in this result."""
return len(self.attributes)
def is_large_entity(self, threshold: int) -> bool:
"""Check if this result qualifies as a large entity based on relationship count."""
return self.get_relationship_count() > threshold
##TODO
#def is_large_entity(self, threshold: int) -> bool:
# """Check if this result qualifies as a large entity based on relationship count."""
# return self.get_relationship_count() > threshold

View File

@@ -370,6 +370,7 @@ class Scanner:
task_tuple = (provider_name, target_item)
if task_tuple in processed_tasks:
self.tasks_skipped += 1
self.indicators_completed +=1
continue
if depth > max_depth:
@@ -405,7 +406,7 @@ class Scanner:
if self.target_retries[task_tuple] <= self.config.max_retries_per_target:
self.task_queue.put((priority, (provider_name, target_item, depth)))
self.tasks_re_enqueued += 1
self.total_tasks_ever_enqueued += 1
#self.total_tasks_ever_enqueued += 1
else:
self.scan_failed_due_to_retries = True
self._log_target_processing_error(str(task_tuple), "Max retries exceeded")

View File

@@ -108,64 +108,6 @@ class SessionManager:
print(f"ERROR: Failed to create session {session_id}: {e}")
raise
def clone_session_preserving_config(self, source_session_id: str) -> str:
"""
FIXED: Create a new session that preserves the configuration (including API keys) from an existing session.
This is used when we need a fresh scanner but want to keep user configuration.
"""
with self.creation_lock:
print(f"=== CLONING SESSION {source_session_id} (PRESERVING CONFIG) ===")
try:
# Get the source session data
source_session_data = self._get_session_data(source_session_id)
if not source_session_data:
print(f"ERROR: Source session {source_session_id} not found for cloning")
return self.create_session() # Fallback to new session
# Create new session ID
new_session_id = str(uuid.uuid4())
# Get the preserved configuration
preserved_config = source_session_data.get('config')
if not preserved_config:
print(f"WARNING: No config found in source session, creating new")
from core.session_config import create_session_config
preserved_config = create_session_config()
print(f"Preserving config with API keys: {list(preserved_config.api_keys.keys())}")
# Create new scanner with preserved config
new_scanner = Scanner(session_config=preserved_config)
new_scanner.session_id = new_session_id
new_session_data = {
'scanner': new_scanner,
'config': preserved_config,
'created_at': time.time(),
'last_activity': time.time(),
'status': 'active',
'cloned_from': source_session_id
}
# Store in Redis
serialized_data = pickle.dumps(new_session_data)
session_key = self._get_session_key(new_session_id)
self.redis_client.setex(session_key, self.session_timeout, serialized_data)
# Initialize stop signal
stop_key = self._get_stop_signal_key(new_session_id)
self.redis_client.setex(stop_key, self.session_timeout, b'0')
print(f"Cloned session {new_session_id} with preserved configuration")
return new_session_id
except Exception as e:
print(f"ERROR: Failed to clone session {source_session_id}: {e}")
# Fallback to creating a new session
return self.create_session()
def set_stop_signal(self, session_id: str) -> bool:
"""
Set the stop signal for a session (cross-process safe).