UX improvements
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
Graph data model for DNSRecon using NetworkX.
|
||||
Manages in-memory graph storage with confidence scoring and forensic metadata.
|
||||
Now fully compatible with the unified ProviderResult data model.
|
||||
UPDATED: Fixed certificate styling and correlation edge labeling.
|
||||
UPDATED: Fixed correlation exclusion keys to match actual attribute names.
|
||||
"""
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
@@ -41,7 +41,30 @@ class GraphManager:
|
||||
self.correlation_index = {}
|
||||
# Compile regex for date filtering for efficiency
|
||||
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
||||
self.EXCLUDED_KEYS = ['crtsh_cert_validity_period_days','crtsh_cert_source','crtsh_cert_common_name']
|
||||
|
||||
# These are the actual attribute names created in providers, WITHOUT provider prefix
|
||||
self.EXCLUDED_KEYS = [
|
||||
# Certificate metadata that creates noise
|
||||
'cert_source', # Always 'crtsh' for crtsh provider
|
||||
'cert_common_name',
|
||||
'cert_validity_period_days', # Numerical, not useful for correlation
|
||||
#'cert_certificate_id', # Unique per certificate
|
||||
#'cert_serial_number', # Unique per certificate
|
||||
'cert_entry_timestamp', # Timestamp, filtered by date regex anyway
|
||||
'cert_not_before', # Date, filtered by date regex anyway
|
||||
'cert_not_after', # Date, filtered by date regex anyway
|
||||
# DNS metadata that creates noise
|
||||
'dns_ttl', # TTL values are not meaningful for correlation
|
||||
# Shodan metadata that might create noise
|
||||
'timestamp', # Generic timestamp fields
|
||||
'last_update', # Generic timestamp fields
|
||||
#'org', # Too generic, causes false correlations
|
||||
#'isp', # Too generic, causes false correlations
|
||||
# Generic noisy attributes
|
||||
'updated_timestamp', # Any timestamp field
|
||||
'discovery_timestamp', # Any timestamp field
|
||||
'query_timestamp', # Any timestamp field
|
||||
]
|
||||
|
||||
def __getstate__(self):
|
||||
"""Prepare GraphManager for pickling, excluding compiled regex."""
|
||||
@@ -72,14 +95,31 @@ class GraphManager:
|
||||
attr_value = attr.get('value')
|
||||
attr_provider = attr.get('provider', 'unknown')
|
||||
|
||||
# Skip excluded attributes and invalid values
|
||||
if any(excluded_key in attr_name for excluded_key in self.EXCLUDED_KEYS) or not isinstance(attr_value, (str, int, float, bool)) or attr_value is None:
|
||||
continue
|
||||
# IMPROVED: More comprehensive exclusion logic
|
||||
should_exclude = (
|
||||
# Check against excluded keys (exact match or substring)
|
||||
any(excluded_key in attr_name or attr_name == excluded_key for excluded_key in self.EXCLUDED_KEYS) or
|
||||
# Invalid value types
|
||||
not isinstance(attr_value, (str, int, float, bool)) or
|
||||
attr_value is None or
|
||||
# Boolean values are not useful for correlation
|
||||
isinstance(attr_value, bool) or
|
||||
# String values that are too short or are dates
|
||||
(isinstance(attr_value, str) and (
|
||||
len(attr_value) < 4 or
|
||||
self.date_pattern.match(attr_value) or
|
||||
# Exclude common generic values that create noise
|
||||
attr_value.lower() in ['unknown', 'none', 'null', 'n/a', 'true', 'false', '0', '1']
|
||||
)) or
|
||||
# Numerical values that are likely to be unique identifiers
|
||||
(isinstance(attr_value, (int, float)) and (
|
||||
attr_value == 0 or # Zero values are not meaningful
|
||||
attr_value == 1 or # One values are too common
|
||||
abs(attr_value) > 1000000 # Very large numbers are likely IDs
|
||||
))
|
||||
)
|
||||
|
||||
if isinstance(attr_value, bool):
|
||||
continue
|
||||
|
||||
if isinstance(attr_value, str) and (len(attr_value) < 4 or self.date_pattern.match(attr_value)):
|
||||
if should_exclude:
|
||||
continue
|
||||
|
||||
# Initialize correlation tracking for this value
|
||||
@@ -149,7 +189,7 @@ class GraphManager:
|
||||
|
||||
if self.graph.has_node(node_id) and not self.graph.has_edge(node_id, correlation_node_id):
|
||||
# Format relationship label as "corr_provider_attribute"
|
||||
relationship_label = f"{provider}_{attribute}"
|
||||
relationship_label = f"corr_{provider}_{attribute}"
|
||||
|
||||
self.add_edge(
|
||||
source_id=node_id,
|
||||
@@ -170,7 +210,7 @@ class GraphManager:
|
||||
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
|
||||
"""
|
||||
Check if there's a direct edge between two nodes in either direction.
|
||||
Returns True if node_a→node_b OR node_b→node_a exists.
|
||||
Returns True if node_aâ†'node_b OR node_bâ†'node_a exists.
|
||||
"""
|
||||
return (self.graph.has_edge(node_a, node_b) or
|
||||
self.graph.has_edge(node_b, node_a))
|
||||
@@ -410,12 +450,6 @@ class GraphManager:
|
||||
"""Get all nodes of a specific type."""
|
||||
return [n for n, d in self.graph.nodes(data=True) if d.get('type') == node_type.value]
|
||||
|
||||
def get_neighbors(self, node_id: str) -> List[str]:
|
||||
"""Get all unique neighbors (predecessors and successors) for a node."""
|
||||
if not self.graph.has_node(node_id):
|
||||
return []
|
||||
return list(set(self.graph.predecessors(node_id)) | set(self.graph.successors(node_id)))
|
||||
|
||||
def get_high_confidence_edges(self, min_confidence: float = 0.8) -> List[Tuple[str, str, Dict]]:
|
||||
"""Get edges with confidence score above a given threshold."""
|
||||
return [(u, v, d) for u, v, d in self.graph.edges(data=True)
|
||||
|
||||
@@ -101,6 +101,7 @@ class ProviderResult:
|
||||
"""Get the total number of attributes in this result."""
|
||||
return len(self.attributes)
|
||||
|
||||
def is_large_entity(self, threshold: int) -> bool:
|
||||
"""Check if this result qualifies as a large entity based on relationship count."""
|
||||
return self.get_relationship_count() > threshold
|
||||
##TODO
|
||||
#def is_large_entity(self, threshold: int) -> bool:
|
||||
# """Check if this result qualifies as a large entity based on relationship count."""
|
||||
# return self.get_relationship_count() > threshold
|
||||
@@ -370,6 +370,7 @@ class Scanner:
|
||||
task_tuple = (provider_name, target_item)
|
||||
if task_tuple in processed_tasks:
|
||||
self.tasks_skipped += 1
|
||||
self.indicators_completed +=1
|
||||
continue
|
||||
|
||||
if depth > max_depth:
|
||||
@@ -405,7 +406,7 @@ class Scanner:
|
||||
if self.target_retries[task_tuple] <= self.config.max_retries_per_target:
|
||||
self.task_queue.put((priority, (provider_name, target_item, depth)))
|
||||
self.tasks_re_enqueued += 1
|
||||
self.total_tasks_ever_enqueued += 1
|
||||
#self.total_tasks_ever_enqueued += 1
|
||||
else:
|
||||
self.scan_failed_due_to_retries = True
|
||||
self._log_target_processing_error(str(task_tuple), "Max retries exceeded")
|
||||
|
||||
@@ -108,64 +108,6 @@ class SessionManager:
|
||||
print(f"ERROR: Failed to create session {session_id}: {e}")
|
||||
raise
|
||||
|
||||
def clone_session_preserving_config(self, source_session_id: str) -> str:
|
||||
"""
|
||||
FIXED: Create a new session that preserves the configuration (including API keys) from an existing session.
|
||||
This is used when we need a fresh scanner but want to keep user configuration.
|
||||
"""
|
||||
with self.creation_lock:
|
||||
print(f"=== CLONING SESSION {source_session_id} (PRESERVING CONFIG) ===")
|
||||
|
||||
try:
|
||||
# Get the source session data
|
||||
source_session_data = self._get_session_data(source_session_id)
|
||||
if not source_session_data:
|
||||
print(f"ERROR: Source session {source_session_id} not found for cloning")
|
||||
return self.create_session() # Fallback to new session
|
||||
|
||||
# Create new session ID
|
||||
new_session_id = str(uuid.uuid4())
|
||||
|
||||
# Get the preserved configuration
|
||||
preserved_config = source_session_data.get('config')
|
||||
if not preserved_config:
|
||||
print(f"WARNING: No config found in source session, creating new")
|
||||
from core.session_config import create_session_config
|
||||
preserved_config = create_session_config()
|
||||
|
||||
print(f"Preserving config with API keys: {list(preserved_config.api_keys.keys())}")
|
||||
|
||||
# Create new scanner with preserved config
|
||||
new_scanner = Scanner(session_config=preserved_config)
|
||||
new_scanner.session_id = new_session_id
|
||||
|
||||
|
||||
new_session_data = {
|
||||
'scanner': new_scanner,
|
||||
'config': preserved_config,
|
||||
'created_at': time.time(),
|
||||
'last_activity': time.time(),
|
||||
'status': 'active',
|
||||
'cloned_from': source_session_id
|
||||
}
|
||||
|
||||
# Store in Redis
|
||||
serialized_data = pickle.dumps(new_session_data)
|
||||
session_key = self._get_session_key(new_session_id)
|
||||
self.redis_client.setex(session_key, self.session_timeout, serialized_data)
|
||||
|
||||
# Initialize stop signal
|
||||
stop_key = self._get_stop_signal_key(new_session_id)
|
||||
self.redis_client.setex(stop_key, self.session_timeout, b'0')
|
||||
|
||||
print(f"Cloned session {new_session_id} with preserved configuration")
|
||||
return new_session_id
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR: Failed to clone session {source_session_id}: {e}")
|
||||
# Fallback to creating a new session
|
||||
return self.create_session()
|
||||
|
||||
def set_stop_signal(self, session_id: str) -> bool:
|
||||
"""
|
||||
Set the stop signal for a session (cross-process safe).
|
||||
|
||||
Reference in New Issue
Block a user