This commit is contained in:
overcuriousity
2025-09-14 15:00:00 +02:00
parent c91913fa13
commit 2ae33bc5ba
5 changed files with 59 additions and 31 deletions

View File

@@ -50,7 +50,7 @@ class GraphManager:
self.__dict__.update(state)
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = None):
def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = []):
"""Recursively traverse metadata and add hashable values to the index."""
if path is None:
path = []
@@ -93,7 +93,7 @@ class GraphManager:
if path_str not in self.correlation_index[value][node_id]:
self.correlation_index[value][node_id].append(path_str)
def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = None) -> List[Dict]:
def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = []) -> List[Dict]:
"""Recursively traverse metadata to find correlations with existing data."""
if path is None:
path = []

View File

@@ -50,6 +50,9 @@ class Scanner:
self.stop_event = threading.Event()
self.scan_thread = None
self.session_id = None # Will be set by session manager
self.task_queue = deque([])
self.target_retries = defaultdict(int)
self.scan_failed_due_to_retries = False
# Scanning progress tracking
self.total_indicators_found = 0
@@ -236,6 +239,8 @@ class Scanner:
self.total_indicators_found = 0
self.indicators_processed = 0
self.current_indicator = self.current_target
self.target_retries = defaultdict(int)
self.scan_failed_due_to_retries = False
# Update GUI with scan preparation state
self._update_session_state()
@@ -270,7 +275,7 @@ class Scanner:
self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
processed_targets = set()
task_queue = deque([(target_domain, 0, False)]) # target, depth, is_large_entity_member
self.task_queue.append((target_domain, 0, False)) # target, depth, is_large_entity_member
try:
self.status = ScanStatus.RUNNING
@@ -281,12 +286,12 @@ class Scanner:
self.graph.add_node(target_domain, NodeType.DOMAIN)
self._initialize_provider_states(target_domain)
while task_queue:
while self.task_queue:
if self._is_stop_requested():
print("Stop requested, terminating scan.")
break
target, depth, is_large_entity_member = task_queue.popleft()
target, depth, is_large_entity_member = self.task_queue.popleft()
if target in processed_targets:
continue
@@ -298,16 +303,26 @@ class Scanner:
self.current_indicator = target
self._update_session_state()
new_targets, large_entity_members = self._query_providers_for_target(target, depth, is_large_entity_member)
processed_targets.add(target)
new_targets, large_entity_members, success = self._query_providers_for_target(target, depth, is_large_entity_member)
if not success:
self.target_retries[target] += 1
if self.target_retries[target] <= self.config.max_retries_per_target:
print(f"Re-queueing target {target} (attempt {self.target_retries[target]})")
self.task_queue.append((target, depth, is_large_entity_member))
else:
print(f"ERROR: Max retries exceeded for target {target}")
self.scan_failed_due_to_retries = True
self._log_target_processing_error(target, "Max retries exceeded")
else:
processed_targets.add(target)
for new_target in new_targets:
if new_target not in processed_targets:
task_queue.append((new_target, depth + 1, False))
self.task_queue.append((new_target, depth + 1, False))
for member in large_entity_members:
if member not in processed_targets:
task_queue.append((member, depth, True))
self.task_queue.append((member, depth, True))
except Exception as e:
@@ -318,6 +333,8 @@ class Scanner:
finally:
if self._is_stop_requested():
self.status = ScanStatus.STOPPED
elif self.scan_failed_due_to_retries:
self.status = ScanStatus.FAILED
else:
self.status = ScanStatus.COMPLETED
@@ -331,7 +348,7 @@ class Scanner:
print(f" - Total edges: {stats['basic_metrics']['total_edges']}")
print(f" - Targets processed: {len(processed_targets)}")
def _query_providers_for_target(self, target: str, depth: int, dns_only: bool = False) -> Tuple[Set[str], Set[str]]:
def _query_providers_for_target(self, target: str, depth: int, dns_only: bool = False) -> Tuple[Set[str], Set[str], bool]:
"""Helper method to query providers for a single target."""
is_ip = _is_valid_ip(target)
target_type = NodeType.IP if is_ip else NodeType.DOMAIN
@@ -339,7 +356,7 @@ class Scanner:
if self._is_stop_requested():
print(f"Stop requested before querying providers for {target}")
return set(), set()
return set(), set(), False
self.graph.add_node(target, target_type)
self._initialize_provider_states(target)
@@ -347,21 +364,25 @@ class Scanner:
new_targets = set()
large_entity_members = set()
node_attributes = defaultdict(lambda: defaultdict(list))
all_providers_successful = True
eligible_providers = self._get_eligible_providers(target, is_ip, dns_only)
if not eligible_providers:
self._log_no_eligible_providers(target, is_ip)
return new_targets, large_entity_members
return new_targets, large_entity_members, True
for provider in eligible_providers:
if self._is_stop_requested():
print(f"Stop requested while querying providers for {target}")
all_providers_successful = False
break
try:
provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
if provider_results and not self._is_stop_requested():
if provider_results is None:
all_providers_successful = False
elif not self._is_stop_requested():
discovered, is_large_entity = self._process_provider_results_forensic(
target, provider, provider_results, node_attributes, depth
)
@@ -370,6 +391,7 @@ class Scanner:
else:
new_targets.update(discovered)
except Exception as e:
all_providers_successful = False
self._log_provider_error(target, provider.get_name(), str(e))
for node_id, attributes in node_attributes.items():
@@ -378,7 +400,7 @@ class Scanner:
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
self.graph.add_node(node_id, node_type_to_add, attributes=attributes)
return new_targets, large_entity_members
return new_targets, large_entity_members, all_providers_successful
def _update_session_state(self) -> None:
"""
@@ -438,7 +460,7 @@ class Scanner:
if self._is_stop_requested():
print(f"Stop requested before querying {provider_name} for {target}")
return []
return None
print(f"Querying {provider_name} for {target}")
@@ -452,7 +474,7 @@ class Scanner:
if self._is_stop_requested():
print(f"Stop requested after querying {provider_name} for {target}")
return []
return None
self._update_provider_state(target, provider_name, 'success', len(results), None, start_time)
@@ -462,7 +484,7 @@ class Scanner:
except Exception as e:
self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
print(f"{provider_name} failed for {target}: {e}")
return []
return None
def _update_provider_state(self, target: str, provider_name: str, status: str,
results_count: int, error: str, start_time: datetime) -> None:
@@ -647,6 +669,7 @@ class Scanner:
# Set both local and Redis stop signals
self._set_stop_signal()
self.status = ScanStatus.STOPPED
self.task_queue.clear()
# Immediately update GUI with stopped status
self._update_session_state()
@@ -678,7 +701,8 @@ class Scanner:
'indicators_processed': self.indicators_processed,
'progress_percentage': self._calculate_progress(),
'enabled_providers': [provider.get_name() for provider in self.providers],
'graph_statistics': self.graph.get_statistics()
'graph_statistics': self.graph.get_statistics(),
'task_queue_size': len(self.task_queue)
}
except Exception as e:
print(f"ERROR: Exception in get_scan_status: {e}")
@@ -693,7 +717,8 @@ class Scanner:
'indicators_processed': 0,
'progress_percentage': 0.0,
'enabled_providers': [],
'graph_statistics': {}
'graph_statistics': {},
'task_queue_size': 0
}
def _calculate_progress(self) -> float: