diff --git a/app.py b/app.py index 8bc0162..894304e 100644 --- a/app.py +++ b/app.py @@ -31,7 +31,6 @@ def get_user_scanner(): if current_flask_session_id: existing_scanner = session_manager.get_session(current_flask_session_id) if existing_scanner: - print(f"Scanner status: {existing_scanner.status}") # Ensure session ID is set existing_scanner.session_id = current_flask_session_id return current_flask_session_id, existing_scanner diff --git a/core/scanner.py b/core/scanner.py index dcec77c..54304c1 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -273,8 +273,7 @@ class Scanner: self.executor = ThreadPoolExecutor(max_workers=self.max_workers) processed_targets = set() - # Initialize the task queue with the starting target and its depth - task_queue = deque([(target_domain, 0)]) + task_queue = deque([(target_domain, 0, False)]) # target, depth, is_large_entity_member try: self.status = ScanStatus.RUNNING @@ -290,7 +289,7 @@ class Scanner: print("Stop requested, terminating scan.") break - target, depth = task_queue.popleft() + target, depth, is_large_entity_member = task_queue.popleft() if target in processed_targets: continue @@ -302,14 +301,17 @@ class Scanner: self.current_indicator = target self._update_session_state() - # Process the current target - new_targets = self._query_providers_for_target(target, depth) + new_targets, large_entity_members = self._query_providers_for_target(target, depth, is_large_entity_member) processed_targets.add(target) - # Add new, unprocessed targets to the queue for new_target in new_targets: if new_target not in processed_targets: - task_queue.append((new_target, depth + 1)) + task_queue.append((new_target, depth + 1, False)) + + for member in large_entity_members: + if member not in processed_targets: + task_queue.append((member, depth, True)) + except Exception as e: print(f"ERROR: Scan execution failed with error: {e}") @@ -332,32 +334,29 @@ class Scanner: print(f" - Total edges: {stats['basic_metrics']['total_edges']}") print(f" - Targets processed: {len(processed_targets)}") - def _query_providers_for_target(self, target: str, depth: int) -> Set[str]: + def _query_providers_for_target(self, target: str, depth: int, dns_only: bool = False) -> Tuple[Set[str], Set[str]]: """Helper method to query providers for a single target.""" is_ip = _is_valid_ip(target) target_type = NodeType.IP if is_ip else NodeType.DOMAIN print(f"Querying providers for {target_type.value}: {target} at depth {depth}") - # Early stop check if self._is_stop_requested(): print(f"Stop requested before querying providers for {target}") - return set() + return set(), set() - # Initialize node and provider states self.graph.add_node(target, target_type) self._initialize_provider_states(target) new_targets = set() + large_entity_members = set() target_metadata = defaultdict(lambda: defaultdict(list)) - # Determine eligible providers for this target - eligible_providers = self._get_eligible_providers(target, is_ip) + eligible_providers = self._get_eligible_providers(target, is_ip, dns_only) if not eligible_providers: self._log_no_eligible_providers(target, is_ip) - return new_targets + return new_targets, large_entity_members - # Query each eligible provider sequentially with stop checks for provider in eligible_providers: if self._is_stop_requested(): print(f"Stop requested while querying providers for {target}") @@ -366,21 +365,23 @@ class Scanner: try: provider_results = self._query_single_provider_forensic(provider, target, is_ip, depth) if provider_results and not self._is_stop_requested(): - discovered_targets = self._process_provider_results_forensic( + discovered, is_large_entity = self._process_provider_results_forensic( target, provider, provider_results, target_metadata, depth ) - new_targets.update(discovered_targets) + if is_large_entity: + large_entity_members.update(discovered) + else: + new_targets.update(discovered) except Exception as e: self._log_provider_error(target, provider.get_name(), str(e)) - # Update node metadata for node_id, metadata_dict in target_metadata.items(): if self.graph.graph.has_node(node_id): node_is_ip = _is_valid_ip(node_id) node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN self.graph.add_node(node_id, node_type_to_add, metadata=metadata_dict) - return new_targets + return new_targets, large_entity_members def _update_session_state(self) -> None: """ @@ -407,198 +408,11 @@ class Scanner: if 'provider_states' not in node_data['metadata']: node_data['metadata']['provider_states'] = {} - def _should_recurse_on_target(self, target: str, processed_targets: Set[str], all_discovered: Set[str]) -> bool: - """ - Simplified recursion logic: only recurse on valid IPs and domains that haven't been processed. - """ - # Don't recurse on already processed targets - if target in processed_targets: - return False - - # Only recurse on valid IPs and domains - if not (_is_valid_ip(target) or _is_valid_domain(target)): - return False - - # Don't recurse on targets contained in large entities - if self._is_in_large_entity(target): - return False - - return True - - def _is_in_large_entity(self, target: str) -> bool: - """Check if a target is contained within a large entity node.""" - for node_id, node_data in self.graph.graph.nodes(data=True): - if node_data.get('type') == NodeType.LARGE_ENTITY.value: - metadata = node_data.get('metadata', {}) - contained_nodes = metadata.get('nodes', []) - if target in contained_nodes: - return True - return False - - def _process_targets_sequential_with_stop_checks(self, targets: Set[str], processed_targets: Set[str], - all_discovered: Set[str], current_depth: int) -> List[Tuple[str, Set[str]]]: - """ - Process targets with controlled concurrency for both responsiveness and proper completion. - Balances termination responsiveness with avoiding race conditions. - """ - results = [] - targets_to_process = targets - processed_targets - if not targets_to_process: - return results - - print(f"Processing {len(targets_to_process)} targets with controlled concurrency") - - target_list = list(targets_to_process) - active_futures: Dict[Future, str] = {} - target_index = 0 - last_gui_update = time.time() - - while target_index < len(target_list) or active_futures: - # Check stop signal before any new work - if self._is_stop_requested(): - print("Stop requested - canceling active futures and exiting") - for future in list(active_futures.keys()): - future.cancel() - break - - # Submit new futures up to max_workers limit (controlled concurrency) - while len(active_futures) < self.max_workers and target_index < len(target_list): - if self._is_stop_requested(): - break - - target = target_list[target_index] - self.current_indicator = target - print(f"Submitting target {target_index + 1}/{len(target_list)}: {target}") - - future = self.executor.submit(self._query_providers_forensic, target, current_depth) - active_futures[future] = target - target_index += 1 - - # Update GUI periodically - current_time = time.time() - if target_index % 2 == 0 or (current_time - last_gui_update) > 2.0: - self._update_session_state() - last_gui_update = current_time - - # Wait for at least one future to complete (but don't wait forever) - if active_futures: - try: - # Wait for the first completion with reasonable timeout - completed_future = next(as_completed(active_futures.keys(), timeout=15.0)) - - target = active_futures[completed_future] - try: - new_targets = completed_future.result() - results.append((target, new_targets)) - self.indicators_processed += 1 - print(f"Completed processing target: {target} (found {len(new_targets)} new targets)") - - # Update GUI after each completion - current_time = time.time() - if (current_time - last_gui_update) > 1.0: - self._update_session_state() - last_gui_update = current_time - - except Exception as e: - print(f"Error processing target {target}: {e}") - self._log_target_processing_error(target, str(e)) - - # Remove the completed future - del active_futures[completed_future] - - except StopIteration: - # No futures completed within timeout - check stop signal and continue - if self._is_stop_requested(): - print("Stop requested during timeout - canceling futures") - for future in list(active_futures.keys()): - future.cancel() - break - # Continue loop to wait for completions - - except Exception as e: - # as_completed timeout or other error - if self._is_stop_requested(): - print("Stop requested during future waiting") - for future in list(active_futures.keys()): - future.cancel() - break - - # Check if any futures are actually done (in case of timeout exception) - completed_futures = [f for f in active_futures.keys() if f.done()] - for completed_future in completed_futures: - target = active_futures[completed_future] - try: - new_targets = completed_future.result() - results.append((target, new_targets)) - self.indicators_processed += 1 - print(f"Completed processing target: {target} (found {len(new_targets)} new targets)") - except Exception as ex: - print(f"Error processing target {target}: {ex}") - self._log_target_processing_error(target, str(ex)) - - del active_futures[completed_future] - - print(f"Completed processing all targets at depth {current_depth}") - - # Final state update - self._update_session_state() - - return results - - def _query_providers_forensic(self, target: str, current_depth: int) -> Set[str]: - """ - Query providers for a target with enhanced stop signal checking. - """ - is_ip = _is_valid_ip(target) - target_type = NodeType.IP if is_ip else NodeType.DOMAIN - print(f"Querying providers for {target_type.value}: {target} at depth {current_depth}") - - # Early stop check - if self._is_stop_requested(): - print(f"Stop requested before querying providers for {target}") - return set() - - # Initialize node and provider states - self.graph.add_node(target, target_type) - self._initialize_provider_states(target) - - new_targets = set() - target_metadata = defaultdict(lambda: defaultdict(list)) - - # Determine eligible providers for this target - eligible_providers = self._get_eligible_providers(target, is_ip) - - if not eligible_providers: - self._log_no_eligible_providers(target, is_ip) - return new_targets - - # Query each eligible provider sequentially with stop checks - for provider in eligible_providers: - if self._is_stop_requested(): - print(f"Stop requested while querying providers for {target}") - break - - try: - provider_results = self._query_single_provider_forensic(provider, target, is_ip, current_depth) - if provider_results and not self._is_stop_requested(): - discovered_targets = self._process_provider_results_forensic( - target, provider, provider_results, target_metadata, current_depth - ) - new_targets.update(discovered_targets) - except Exception as e: - self._log_provider_error(target, provider.get_name(), str(e)) - - # Update node metadata - for node_id, metadata_dict in target_metadata.items(): - if self.graph.graph.has_node(node_id): - node_is_ip = _is_valid_ip(node_id) - node_type_to_add = NodeType.IP if node_is_ip else NodeType.DOMAIN - self.graph.add_node(node_id, node_type_to_add, metadata=metadata_dict) - - return new_targets - - def _get_eligible_providers(self, target: str, is_ip: bool) -> List: + def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool) -> List: """Get providers eligible for querying this target.""" + if dns_only: + return [p for p in self.providers if p.get_name() == 'dns'] + eligible = [] target_key = 'ips' if is_ip else 'domains' @@ -606,7 +420,6 @@ class Scanner: provider_name = provider.get_name() if provider_name in self.provider_eligibility: if self.provider_eligibility[provider_name][target_key]: - # Check if we already queried this provider for this target if not self._already_queried_provider(target, provider_name): eligible.append(provider) else: @@ -628,36 +441,30 @@ class Scanner: provider_name = provider.get_name() start_time = datetime.now(timezone.utc) - # Check stop signal before querying if self._is_stop_requested(): print(f"Stop requested before querying {provider_name} for {target}") return [] print(f"Querying {provider_name} for {target}") - # Log attempt self.logger.logger.info(f"Attempting {provider_name} query for {target} at depth {current_depth}") try: - # Perform the query if is_ip: results = provider.query_ip(target) else: results = provider.query_domain(target) - # Check stop signal after querying if self._is_stop_requested(): print(f"Stop requested after querying {provider_name} for {target}") return [] - # Track successful state self._update_provider_state(target, provider_name, 'success', len(results), None, start_time) print(f"✓ {provider_name} returned {len(results)} results for {target}") return results except Exception as e: - # Track failed state self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time) print(f"✗ {provider_name} failed for {target}: {e}") return [] @@ -682,35 +489,28 @@ class Scanner: 'duration_ms': (datetime.now(timezone.utc) - start_time).total_seconds() * 1000 } - # Log to forensic trail self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)") def _process_provider_results_forensic(self, target: str, provider, results: List, - target_metadata: Dict, current_depth: int) -> Set[str]: - """Process provider results with large entity detection and stop signal checking.""" + target_metadata: Dict, current_depth: int) -> Tuple[Set[str], bool]: + """Process provider results, returns (discovered_targets, is_large_entity).""" provider_name = provider.get_name() discovered_targets = set() - # Check for stop signal before processing results if self._is_stop_requested(): print(f"Stop requested before processing results from {provider_name} for {target}") - return discovered_targets + return discovered_targets, False - # Check for large entity threshold per provider if len(results) > self.config.large_entity_threshold: print(f"Large entity detected: {provider_name} returned {len(results)} results for {target}") - self._create_large_entity(target, provider_name, results, current_depth) - # Large entities block recursion - return empty set - return discovered_targets + members = self._create_large_entity(target, provider_name, results, current_depth) + return members, True - # Process each relationship for i, (source, rel_target, rel_type, confidence, raw_data) in enumerate(results): - # Check stop signal periodically during result processing if i % 10 == 0 and self._is_stop_requested(): print(f"Stop requested while processing results from {provider_name} for {target}") break - # Enhanced forensic logging for each relationship self.logger.log_relationship_discovery( source_node=source, target_node=rel_target, @@ -721,10 +521,8 @@ class Scanner: discovery_method=f"{provider_name}_query_depth_{current_depth}" ) - # Collect metadata for source node self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source]) - # Add nodes and edges based on target type if _is_valid_ip(rel_target): self.graph.add_node(rel_target, NodeType.IP) if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data): @@ -741,44 +539,28 @@ class Scanner: if self.graph.add_edge(source, rel_target, rel_type, confidence, provider_name, raw_data): print(f"Added domain relationship: {source} -> {rel_target} ({rel_type.relationship_name})") discovered_targets.add(rel_target) - - # Enrich the newly discovered domain self._collect_node_metadata_forensic(rel_target, provider_name, rel_type, source, raw_data, target_metadata[rel_target]) else: - # Store the record content in the domain's metadata self._collect_node_metadata_forensic(source, provider_name, rel_type, rel_target, raw_data, target_metadata[source]) + return discovered_targets, False - return discovered_targets - - def _create_large_entity(self, source: str, provider_name: str, results: List, current_depth: int) -> None: - """Create a large entity node and process its contents with the DNS provider.""" - entity_id = f"Large Collection from {provider_name}" + def _create_large_entity(self, source: str, provider_name: str, results: List, current_depth: int) -> Set[str]: + """Create a large entity node and returns the members for DNS processing.""" + entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}" - targets = [] + targets = [rel[1] for rel in results if len(rel) > 1] node_type = 'unknown' - dns_provider = next((p for p in self.providers if p.get_name() == 'dns'), None) - - for rel in results: - if len(rel) > 1: - target = rel[1] - targets.append(target) - - # Determine node type and add node to graph - if _is_valid_domain(target): - node_type = 'domain' - self.graph.add_node(target, NodeType.DOMAIN) - if dns_provider: - dns_results = dns_provider.query_domain(target) - self._process_provider_results_forensic(target, dns_provider, dns_results, defaultdict(lambda: defaultdict(list)), current_depth) - elif _is_valid_ip(target): - node_type = 'ip' - self.graph.add_node(target, NodeType.IP) - if dns_provider: - dns_results = dns_provider.query_ip(target) - self._process_provider_results_forensic(target, dns_provider, dns_results, defaultdict(lambda: defaultdict(list)), current_depth) + if targets: + if _is_valid_domain(targets[0]): + node_type = 'domain' + elif _is_valid_ip(targets[0]): + node_type = 'ip' + + for target in targets: + self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP) metadata = { 'count': len(targets), @@ -799,6 +581,8 @@ class Scanner: self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(targets)} targets from {provider_name}") print(f"Created large entity {entity_id} for {len(targets)} {node_type}s from {provider_name}") + + return set(targets) def _collect_node_metadata_forensic(self, node_id: str, provider_name: str, rel_type: RelationshipType, target: str, raw_data: Dict[str, Any], metadata: Dict[str, Any]) -> None: diff --git a/static/js/graph.js b/static/js/graph.js index 75a45c0..2afa598 100644 --- a/static/js/graph.js +++ b/static/js/graph.js @@ -172,7 +172,6 @@ class GraphManager { controlsContainer.className = 'graph-controls'; controlsContainer.innerHTML = ` - `; @@ -181,7 +180,6 @@ class GraphManager { // Add control event listeners document.getElementById('graph-fit').addEventListener('click', () => this.fitView()); - document.getElementById('graph-reset').addEventListener('click', () => this.resetView()); document.getElementById('graph-physics').addEventListener('click', () => this.togglePhysics()); document.getElementById('graph-cluster').addEventListener('click', () => this.toggleClustering()); } @@ -843,22 +841,6 @@ class GraphManager { } } - /** - * Reset the view to initial state - */ - resetView() { - if (this.network) { - this.network.moveTo({ - position: { x: 0, y: 0 }, - scale: 1, - animation: { - duration: 1000, - easingFunction: 'easeInOutQuad' - } - }); - } - } - /** * Clear the graph */