This commit is contained in:
overcuriousity 2025-09-13 16:27:31 +02:00
parent 717f103596
commit 2925512a4d
3 changed files with 44 additions and 279 deletions

1
app.py
View File

@ -31,7 +31,6 @@ def get_user_scanner():
if current_flask_session_id: if current_flask_session_id:
existing_scanner = session_manager.get_session(current_flask_session_id) existing_scanner = session_manager.get_session(current_flask_session_id)
if existing_scanner: if existing_scanner:
print(f"Scanner status: {existing_scanner.status}")
# Ensure session ID is set # Ensure session ID is set
existing_scanner.session_id = current_flask_session_id existing_scanner.session_id = current_flask_session_id
return current_flask_session_id, existing_scanner return current_flask_session_id, existing_scanner

View File

@ -273,8 +273,7 @@ class Scanner:
self.executor = ThreadPoolExecutor(max_workers=self.max_workers) self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
processed_targets = set() processed_targets = set()
# Initialize the task queue with the starting target and its depth task_queue = deque([(target_domain, 0, False)]) # target, depth, is_large_entity_member
task_queue = deque([(target_domain, 0)])
try: try:
self.status = ScanStatus.RUNNING self.status = ScanStatus.RUNNING
@ -290,7 +289,7 @@ class Scanner:
print("Stop requested, terminating scan.") print("Stop requested, terminating scan.")
break break
target, depth = task_queue.popleft() target, depth, is_large_entity_member = task_queue.popleft()
if target in processed_targets: if target in processed_targets:
continue continue
@ -302,14 +301,17 @@ class Scanner:
self.current_indicator = target self.current_indicator = target
self._update_session_state() self._update_session_state()
# Process the current target new_targets, large_entity_members = self._query_providers_for_target(target, depth, is_large_entity_member)
new_targets = self._query_providers_for_target(target, depth)
processed_targets.add(target) processed_targets.add(target)
# Add new, unprocessed targets to the queue
for new_target in new_targets: for new_target in new_targets:
if new_target not in processed_targets: if new_target not in processed_targets:
task_queue.append((new_target, depth + 1)) task_queue.append((new_target, depth + 1, False))
for member in large_entity_members:
if member not in processed_targets:
task_queue.append((member, depth, True))
except Exception as e: except Exception as e:
print(f"ERROR: Scan execution failed with error: {e}") print(f"ERROR: Scan execution failed with error: {e}")
@ -332,32 +334,29 @@ class Scanner:
print(f" - Total edges: {stats['basic_metrics']['total_edges']}") print(f" - Total edges: {stats['basic_metrics']['total_edges']}")
print(f" - Targets processed: {len(processed_targets)}") print(f" - Targets processed: {len(processed_targets)}")
def _query_providers_for_target(self, target: str, depth: int) -> Set[str]: def _query_providers_for_target(self, target: str, depth: int, dns_only: bool = False) -> Tuple[Set[str], Set[str]]:
"""Helper method to query providers for a single target.""" """Helper method to query providers for a single target."""
is_ip = _is_valid_ip(target) is_ip = _is_valid_ip(target)
target_type = NodeType.IP if is_ip else NodeType.DOMAIN target_type = NodeType.IP if is_ip else NodeType.DOMAIN
print(f"Querying providers for {target_type.value}: {target} at depth {depth}") print(f"Querying providers for {target_type.value}: {target} at depth {depth}")
# Early stop check
if self._is_stop_requested(): if self._is_stop_requested():
print(f"Stop requested before querying providers for {target}") print(f"Stop requested before querying providers for {target}")
return set() return set(), set()
# Initialize node and provider states
self.graph.add_node(target, target_type) self.graph.add_node(target, target_type)
self._initialize_provider_states(target) self._initialize_provider_states(target)
new_targets = set() new_targets = set()
large_entity_members = set()
target_metadata = defaultdict(lambda: defaultdict(list)) target_metadata = defaultdict(lambda: defaultdict(list))
# Determine eligible providers for this target eligible_providers = self._get_eligible_providers(target, is_ip, dns_only)
eligible_providers = self._get_eligible_providers(target, is_ip)
if not eligible_providers: if not eligible_providers:
self._log_no_eligible_providers(target, is_ip) self._log_no_eligible_providers(target, is_ip)
return new_targets return new_targets, large_entity_members
# Query each eligible provider sequentially with stop checks
for provider in eligible_providers: for provider in eligible_providers:
if self._is_stop_requested(): if self._is_stop_requested():
print(f"Stop requested while querying providers for {target}") print(f"Stop requested while querying providers for {target}")
@ -366,21 +365,23 @@ class Scanner:
try: try:
provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth) provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth)
if provider_results and not self._is_stop_requested(): if provider_results and not self._is_stop_requested():
discovered_targets = self._process_provider_results_forensic( discovered, is_large_entity = self._process_provider_results_forensic(
target, provider, provider_results, target_metadata, depth target, provider, provider_results, target_metadata, depth
) )
new_targets.update(discovered_targets) if is_large_entity:
large_entity_members.update(discovered)
else:
new_targets.update(discovered)
except Exception as e: except Exception as e:
self._log_provider_error(target, provider.get_name(), str(e)) self._log_provider_error(target, provider.get_name(), str(e))
# Update node metadata
for node_id, metadata_dict in target_metadata.items(): for node_id, metadata_dict in target_metadata.items():
if self.graph.graph.has_node(node_id): if self.graph.graph.has_node(node_id):
node_is_ip = _is_valid_ip(node_id) node_is_ip = _is_valid_ip(node_id)
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
self.graph.add_node(node_id, node_type_to_add, metadata=metadata_dict) self.graph.add_node(node_id, node_type_to_add, metadata=metadata_dict)
return new_targets return new_targets, large_entity_members
def _update_session_state(self) -> None: def _update_session_state(self) -> None:
""" """
@ -407,198 +408,11 @@ class Scanner:
if 'provider_states' not in node_data['metadata']: if 'provider_states' not in node_data['metadata']:
node_data['metadata']['provider_states'] = {} node_data['metadata']['provider_states'] = {}
def _should_recurse_on_target(self, target: str, processed_targets: Set[str], all_discovered: Set[str]) -> bool: def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool) -> List:
"""
Simplified recursion logic: only recurse on valid IPs and domains that haven't been processed.
"""
# Don't recurse on already processed targets
if target in processed_targets:
return False
# Only recurse on valid IPs and domains
if not (_is_valid_ip(target) or _is_valid_domain(target)):
return False
# Don't recurse on targets contained in large entities
if self._is_in_large_entity(target):
return False
return True
def _is_in_large_entity(self, target: str) -> bool:
"""Check if a target is contained within a large entity node."""
for node_id, node_data in self.graph.graph.nodes(data=True):
if node_data.get('type') == NodeType.LARGE_ENTITY.value:
metadata = node_data.get('metadata', {})
contained_nodes = metadata.get('nodes', [])
if target in contained_nodes:
return True
return False
def _process_targets_sequential_with_stop_checks(self, targets: Set[str], processed_targets: Set[str],
all_discovered: Set[str], current_depth: int) -> List[Tuple[str, Set[str]]]:
"""
Process targets with controlled concurrency for both responsiveness and proper completion.
Balances termination responsiveness with avoiding race conditions.
"""
results = []
targets_to_process = targets - processed_targets
if not targets_to_process:
return results
print(f"Processing {len(targets_to_process)} targets with controlled concurrency")
target_list = list(targets_to_process)
active_futures: Dict[Future, str] = {}
target_index = 0
last_gui_update = time.time()
while target_index < len(target_list) or active_futures:
# Check stop signal before any new work
if self._is_stop_requested():
print("Stop requested - canceling active futures and exiting")
for future in list(active_futures.keys()):
future.cancel()
break
# Submit new futures up to max_workers limit (controlled concurrency)
while len(active_futures) < self.max_workers and target_index < len(target_list):
if self._is_stop_requested():
break
target = target_list[target_index]
self.current_indicator = target
print(f"Submitting target {target_index + 1}/{len(target_list)}: {target}")
future = self.executor.submit(self._query_providers_forensic, target, current_depth)
active_futures[future] = target
target_index += 1
# Update GUI periodically
current_time = time.time()
if target_index % 2 == 0 or (current_time - last_gui_update) > 2.0:
self._update_session_state()
last_gui_update = current_time
# Wait for at least one future to complete (but don't wait forever)
if active_futures:
try:
# Wait for the first completion with reasonable timeout
completed_future = next(as_completed(active_futures.keys(), timeout=15.0))
target = active_futures[completed_future]
try:
new_targets = completed_future.result()
results.append((target, new_targets))
self.indicators_processed += 1
print(f"Completed processing target: {target} (found {len(new_targets)} new targets)")
# Update GUI after each completion
current_time = time.time()
if (current_time - last_gui_update) > 1.0:
self._update_session_state()
last_gui_update = current_time
except Exception as e:
print(f"Error processing target {target}: {e}")
self._log_target_processing_error(target, str(e))
# Remove the completed future
del active_futures[completed_future]
except StopIteration:
# No futures completed within timeout - check stop signal and continue
if self._is_stop_requested():
print("Stop requested during timeout - canceling futures")
for future in list(active_futures.keys()):
future.cancel()
break
# Continue loop to wait for completions
except Exception as e:
# as_completed timeout or other error
if self._is_stop_requested():
print("Stop requested during future waiting")
for future in list(active_futures.keys()):
future.cancel()
break
# Check if any futures are actually done (in case of timeout exception)
completed_futures = [f for f in active_futures.keys() if f.done()]
for completed_future in completed_futures:
target = active_futures[completed_future]
try:
new_targets = completed_future.result()
results.append((target, new_targets))
self.indicators_processed += 1
print(f"Completed processing target: {target} (found {len(new_targets)} new targets)")
except Exception as ex:
print(f"Error processing target {target}: {ex}")
self._log_target_processing_error(target, str(ex))
del active_futures[completed_future]
print(f"Completed processing all targets at depth {current_depth}")
# Final state update
self._update_session_state()
return results
def _query_providers_forensic(self, target: str, current_depth: int) -> Set[str]:
"""
Query providers for a target with enhanced stop signal checking.
"""
is_ip = _is_valid_ip(target)
target_type = NodeType.IP if is_ip else NodeType.DOMAIN
print(f"Querying providers for {target_type.value}: {target} at depth {current_depth}")
# Early stop check
if self._is_stop_requested():
print(f"Stop requested before querying providers for {target}")
return set()
# Initialize node and provider states
self.graph.add_node(target, target_type)
self._initialize_provider_states(target)
new_targets = set()
target_metadata = defaultdict(lambda: defaultdict(list))
# Determine eligible providers for this target
eligible_providers = self._get_eligible_providers(target, is_ip)
if not eligible_providers:
self._log_no_eligible_providers(target, is_ip)
return new_targets
# Query each eligible provider sequentially with stop checks
for provider in eligible_providers:
if self._is_stop_requested():
print(f"Stop requested while querying providers for {target}")
break
try:
provider_results = self._query_single_provider_forensic(provider, target, is_ip, current_depth)
if provider_results and not self._is_stop_requested():
discovered_targets = self._process_provider_results_forensic(
target, provider, provider_results, target_metadata, current_depth
)
new_targets.update(discovered_targets)
except Exception as e:
self._log_provider_error(target, provider.get_name(), str(e))
# Update node metadata
for node_id, metadata_dict in target_metadata.items():
if self.graph.graph.has_node(node_id):
node_is_ip = _is_valid_ip(node_id)
node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN
self.graph.add_node(node_id, node_type_to_add, metadata=metadata_dict)
return new_targets
def _get_eligible_providers(self, target: str, is_ip: bool) -> List:
"""Get providers eligible for querying this target.""" """Get providers eligible for querying this target."""
if dns_only:
return [p for p in self.providers if p.get_name() == 'dns']
eligible = [] eligible = []
target_key = 'ips' if is_ip else 'domains' target_key = 'ips' if is_ip else 'domains'
@ -606,7 +420,6 @@ class Scanner:
provider_name = provider.get_name() provider_name = provider.get_name()
if provider_name in self.provider_eligibility: if provider_name in self.provider_eligibility:
if self.provider_eligibility[provider_name][target_key]: if self.provider_eligibility[provider_name][target_key]:
# Check if we already queried this provider for this target
if not self._already_queried_provider(target, provider_name): if not self._already_queried_provider(target, provider_name):
eligible.append(provider) eligible.append(provider)
else: else:
@ -628,36 +441,30 @@ class Scanner:
provider_name = provider.get_name() provider_name = provider.get_name()
start_time = datetime.now(timezone.utc) start_time = datetime.now(timezone.utc)
# Check stop signal before querying
if self._is_stop_requested(): if self._is_stop_requested():
print(f"Stop requested before querying {provider_name} for {target}") print(f"Stop requested before querying {provider_name} for {target}")
return [] return []
print(f"Querying {provider_name} for {target}") print(f"Querying {provider_name} for {target}")
# Log attempt
self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}") self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}")
try: try:
# Perform the query
if is_ip: if is_ip:
results = provider.query_ip(target) results = provider.query_ip(target)
else: else:
results = provider.query_domain(target) results = provider.query_domain(target)
# Check stop signal after querying
if self._is_stop_requested(): if self._is_stop_requested():
print(f"Stop requested after querying {provider_name} for {target}") print(f"Stop requested after querying {provider_name} for {target}")
return [] return []
# Track successful state
self._update_provider_state(target, provider_name, 'success', len(results), None, start_time) self._update_provider_state(target, provider_name, 'success', len(results), None, start_time)
print(f"{provider_name} returned {len(results)} results for {target}") print(f"{provider_name} returned {len(results)} results for {target}")
return results return results
except Exception as e: except Exception as e:
# Track failed state
self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time) self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
print(f"{provider_name} failed for {target}: {e}") print(f"{provider_name} failed for {target}: {e}")
return [] return []
@ -682,35 +489,28 @@ class Scanner:
'duration_ms': (datetime.now(timezone.utc) - start_time).total_seconds() * 1000 'duration_ms': (datetime.now(timezone.utc) - start_time).total_seconds() * 1000
} }
# Log to forensic trail
self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)") self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)")
def _process_provider_results_forensic(self, target: str, provider, results: List, def _process_provider_results_forensic(self, target: str, provider, results: List,
target_metadata: Dict, current_depth: int) -> Set[str]: target_metadata: Dict, current_depth: int) -> Tuple[Set[str], bool]:
"""Process provider results with large entity detection and stop signal checking.""" """Process provider results, returns (discovered_targets, is_large_entity)."""
provider_name = provider.get_name() provider_name = provider.get_name()
discovered_targets = set() discovered_targets = set()
# Check for stop signal before processing results
if self._is_stop_requested(): if self._is_stop_requested():
print(f"Stop requested before processing results from {provider_name} for {target}") print(f"Stop requested before processing results from {provider_name} for {target}")
return discovered_targets return discovered_targets, False
# Check for large entity threshold per provider
if len(results) > self.config.large_entity_threshold: if len(results) > self.config.large_entity_threshold:
print(f"Large entity detected: {provider_name} returned {len(results)} results for {target}") print(f"Large entity detected: {provider_name} returned {len(results)} results for {target}")
self._create_large_entity(target, provider_name, results, current_depth) members = self._create_large_entity(target, provider_name, results, current_depth)
# Large entities block recursion - return empty set return members, True
return discovered_targets
# Process each relationship
for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results): for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results):
# Check stop signal periodically during result processing
if i % 10 == 0 and self._is_stop_requested(): if i % 10 == 0 and self._is_stop_requested():
print(f"Stop requested while processing results from {provider_name} for {target}") print(f"Stop requested while processing results from {provider_name} for {target}")
break break
# Enhanced forensic logging for each relationship
self.logger.log_relationship_discovery( self.logger.log_relationship_discovery(
source_node=source, source_node=source,
target_node=rel_target, target_node=rel_target,
@ -721,10 +521,8 @@ class Scanner:
discovery_method=f"{provider_name}_query_depth_{current_depth}" discovery_method=f"{provider_name}_query_depth_{current_depth}"
) )
# Collect metadata for source node
self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source]) self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source])
# Add nodes and edges based on target type
if _is_valid_ip(rel_target): if _is_valid_ip(rel_target):
self.graph.add_node(rel_target, NodeType.IP) self.graph.add_node(rel_target, NodeType.IP)
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data): if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
@ -741,44 +539,28 @@ class Scanner:
if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data): if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data):
print(f"Added domain relationship: {source} -> {rel_target} ({rel_type.relationship_name})") print(f"Added domain relationship: {source} -> {rel_target} ({rel_type.relationship_name})")
discovered_targets.add(rel_target) discovered_targets.add(rel_target)
# Enrich the newly discovered domain
self._collect_node_metadata_forensic(rel_target, provider_name, rel_type, source, raw_data, target_metadata[rel_target]) self._collect_node_metadata_forensic(rel_target, provider_name, rel_type, source, raw_data, target_metadata[rel_target])
else: else:
# Store the record content in the domain's metadata
self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source]) self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source])
return discovered_targets, False
return discovered_targets def _create_large_entity(self, source: str, provider_name: str, results: List, current_depth: int) -> Set[str]:
"""Create a large entity node and returns the members for DNS processing."""
entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}"
def _create_large_entity(self, source: str, provider_name: str, results: List, current_depth: int) -> None: targets = [rel[1] for rel in results if len(rel) > 1]
"""Create a large entity node and process its contents with the DNS provider."""
entity_id = f"Large Collection from {provider_name}"
targets = []
node_type = 'unknown' node_type = 'unknown'
dns_provider = next((p for p in self.providers if p.get_name() == 'dns'), None) if targets:
if _is_valid_domain(targets[0]):
for rel in results:
if len(rel) > 1:
target = rel[1]
targets.append(target)
# Determine node type and add node to graph
if _is_valid_domain(target):
node_type = 'domain' node_type = 'domain'
self.graph.add_node(target, NodeType.DOMAIN) elif _is_valid_ip(targets[0]):
if dns_provider:
dns_results = dns_provider.query_domain(target)
self._process_provider_results_forensic(target, dns_provider, dns_results, defaultdict(lambda: defaultdict(list)), current_depth)
elif _is_valid_ip(target):
node_type = 'ip' node_type = 'ip'
self.graph.add_node(target, NodeType.IP)
if dns_provider: for target in targets:
dns_results = dns_provider.query_ip(target) self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP)
self._process_provider_results_forensic(target, dns_provider, dns_results, defaultdict(lambda: defaultdict(list)), current_depth)
metadata = { metadata = {
'count': len(targets), 'count': len(targets),
@ -800,6 +582,8 @@ class Scanner:
self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}") self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}")
print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}") print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}")
return set(targets)
def _collect_node_metadata_forensic(self, node_id: str, provider_name: str, rel_type: RelationshipType, def _collect_node_metadata_forensic(self, node_id: str, provider_name: str, rel_type: RelationshipType,
target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None: target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None:
"""Collect and organize metadata for forensic tracking with enhanced logging.""" """Collect and organize metadata for forensic tracking with enhanced logging."""

View File

@ -172,7 +172,6 @@ class GraphManager {
controlsContainer.className = 'graph-controls'; controlsContainer.className = 'graph-controls';
controlsContainer.innerHTML = ` controlsContainer.innerHTML = `
<button class="graph-control-btn" id="graph-fit" title="Fit to Screen">[FIT]</button> <button class="graph-control-btn" id="graph-fit" title="Fit to Screen">[FIT]</button>
<button class="graph-control-btn" id="graph-reset" title="Reset View">[RESET]</button>
<button class="graph-control-btn" id="graph-physics" title="Toggle Physics">[PHYSICS]</button> <button class="graph-control-btn" id="graph-physics" title="Toggle Physics">[PHYSICS]</button>
<button class="graph-control-btn" id="graph-cluster" title="Cluster Nodes">[CLUSTER]</button> <button class="graph-control-btn" id="graph-cluster" title="Cluster Nodes">[CLUSTER]</button>
`; `;
@ -181,7 +180,6 @@ class GraphManager {
// Add control event listeners // Add control event listeners
document.getElementById('graph-fit').addEventListener('click', () => this.fitView()); document.getElementById('graph-fit').addEventListener('click', () => this.fitView());
document.getElementById('graph-reset').addEventListener('click', () => this.resetView());
document.getElementById('graph-physics').addEventListener('click', () => this.togglePhysics()); document.getElementById('graph-physics').addEventListener('click', () => this.togglePhysics());
document.getElementById('graph-cluster').addEventListener('click', () => this.toggleClustering()); document.getElementById('graph-cluster').addEventListener('click', () => this.toggleClustering());
} }
@ -843,22 +841,6 @@ class GraphManager {
} }
} }
/**
* Reset the view to initial state
*/
resetView() {
if (this.network) {
this.network.moveTo({
position: { x: 0, y: 0 },
scale: 1,
animation: {
duration: 1000,
easingFunction: 'easeInOutQuad'
}
});
}
}
/** /**
* Clear the graph * Clear the graph
*/ */