diff --git a/core/scanner.py b/core/scanner.py index 34e00d8..728f602 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -761,37 +761,37 @@ class Scanner: def _process_provider_task(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]: """ Manages the entire process for a given target and provider. - FIXED: Don't enqueue correlation tasks during normal processing. + This version is generalized to handle all relationships dynamically. """ if self._is_stop_requested(): return set(), set(), False - + is_ip = _is_valid_ip(target) target_type = NodeType.IP if is_ip else NodeType.DOMAIN - + self.graph.add_node(target, target_type) self._initialize_provider_states(target) - + new_targets = set() - large_entity_members = set() provider_successful = True - + try: provider_result = self._execute_provider_query(provider, target, is_ip) - + if provider_result is None: provider_successful = False elif not self._is_stop_requested(): + # Pass all relationships to be processed discovered, is_large_entity = self._process_provider_result_unified( target, provider, provider_result, depth ) new_targets.update(discovered) - + except Exception as e: provider_successful = False self._log_provider_error(target, provider.get_name(), str(e)) - - return new_targets, large_entity_members, provider_successful + + return new_targets, set(), provider_successful def _execute_provider_query(self, provider: BaseProvider, target: str, is_ip: bool) -> Optional[ProviderResult]: """ @@ -822,19 +822,18 @@ class Scanner: return None def _create_large_entity_from_result(self, source_node: str, provider_name: str, - provider_result: ProviderResult, depth: int) -> Set[str]: + provider_result: ProviderResult, depth: int) -> Tuple[str, Set[str]]: """ - Creates a large entity node and tags all member nodes. + Creates a large entity node, tags all member nodes, and returns its ID and members. """ members = {rel.target_node for rel in provider_result.relationships if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)} if not members: - return set() + return "", set() large_entity_id = f"le_{provider_name}_{source_node}" - # Add the large entity node to the graph self.graph.add_node( node_id=large_entity_id, node_type=NodeType.LARGE_ENTITY, @@ -847,16 +846,6 @@ class Scanner: description=f"A collection of {len(members)} nodes discovered from {source_node} via {provider_name}." ) - # Create a single edge from the source to the large entity - self.graph.add_edge( - source_node, large_entity_id, - relationship_type=f"{provider_name}_collection", - confidence_score=0.95, - source_provider=provider_name, - raw_data={'description': 'Represents a large collection of nodes.'} - ) - - # Tag each member node with the large entity ID for member_id in members: node_type = NodeType.IP if _is_valid_ip(member_id) else NodeType.DOMAIN self.graph.add_node( @@ -865,7 +854,7 @@ class Scanner: metadata={'large_entity_id': large_entity_id} ) - return members + return large_entity_id, members def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool: """ @@ -907,70 +896,83 @@ class Scanner: provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]: """ Process a unified ProviderResult object to update the graph. - Handles large entity creation while ensuring all underlying nodes and edges are - added to the graph data model for a complete dataset. + This version dynamically re-routes edges to a large entity container. """ provider_name = provider.get_name() discovered_targets = set() + large_entity_id = "" large_entity_members = set() if self._is_stop_requested(): return discovered_targets, False - # Check if a large entity should be created based on the count of domain/IP relationships - eligible_relationship_count = sum( + eligible_rel_count = sum( 1 for rel in provider_result.relationships if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node) ) - - is_large_entity = eligible_relationship_count > self.config.large_entity_threshold + is_large_entity = eligible_rel_count > self.config.large_entity_threshold if is_large_entity: - # Create the large entity node and get the set of its members - large_entity_members = self._create_large_entity_from_result( + large_entity_id, large_entity_members = self._create_large_entity_from_result( target, provider_name, provider_result, current_depth ) - # Process ALL relationships to build the complete underlying data model for i, relationship in enumerate(provider_result.relationships): if i % 5 == 0 and self._is_stop_requested(): break - source_node = relationship.source_node - target_node = relationship.target_node + source_node_id = relationship.source_node + target_node_id = relationship.target_node - # Determine node types - source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN + # Determine visual source and target, substituting with large entity ID if necessary + visual_source = large_entity_id if source_node_id in large_entity_members else source_node_id + visual_target = large_entity_id if target_node_id in large_entity_members else target_node_id + + # Prevent self-loops on the large entity node + if visual_source == visual_target: + continue + + # Determine node types for the actual nodes + source_type = NodeType.IP if _is_valid_ip(source_node_id) else NodeType.DOMAIN if provider_name == 'shodan' and relationship.relationship_type == 'shodan_isp': target_type = NodeType.ISP elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer': target_type = NodeType.CA elif provider_name == 'correlation': target_type = NodeType.CORRELATION_OBJECT - elif _is_valid_ip(target_node): + elif _is_valid_ip(target_node_id): target_type = NodeType.IP else: target_type = NodeType.DOMAIN max_depth_reached = current_depth >= self.max_depth - # Add all nodes and edges to the graph's data model. - # The frontend will handle the visual re-routing for large entity members. - self.graph.add_node(source_node, source_type) - self.graph.add_node(target_node, target_type, metadata={'max_depth_reached': max_depth_reached}) + # Add actual nodes to the graph (they might be hidden by the UI) + self.graph.add_node(source_node_id, source_type) + self.graph.add_node(target_node_id, target_type, metadata={'max_depth_reached': max_depth_reached}) + + # Add the visual edge to the graph self.graph.add_edge( - source_node, target_node, + visual_source, visual_target, relationship.relationship_type, relationship.confidence, provider_name, relationship.raw_data ) + + if (_is_valid_domain(target_node_id) or _is_valid_ip(target_node_id)) and not max_depth_reached: + if target_node_id not in large_entity_members: + discovered_targets.add(target_node_id) - # Add all discovered domains/IPs to be considered for further processing - if (_is_valid_domain(target_node) or _is_valid_ip(target_node)) and not max_depth_reached: - if target_node not in large_entity_members: - discovered_targets.add(target_node) - - # Process all attributes and add them to the corresponding nodes + if large_entity_members: + self.logger.logger.info(f"Enqueuing DNS and Correlation for {len(large_entity_members)} members of {large_entity_id}") + for member in large_entity_members: + for provider_name_to_run in ['dns', 'correlation']: + p_instance = next((p for p in self.providers if p.get_name() == provider_name_to_run), None) + if p_instance and p_instance.get_eligibility().get('domains' if _is_valid_domain(member) else 'ips'): + priority = self._get_priority(provider_name_to_run) + self.task_queue.put((time.time(), priority, (provider_name_to_run, member, current_depth))) + self.total_tasks_ever_enqueued += 1 + attributes_by_node = defaultdict(list) for attribute in provider_result.attributes: attr_dict = { @@ -989,7 +991,6 @@ class Scanner: return discovered_targets, is_large_entity - def stop_scan(self) -> bool: """Request immediate scan termination with proper cleanup.""" try: diff --git a/static/js/graph.js b/static/js/graph.js index 9fb9188..3fb5216 100644 --- a/static/js/graph.js +++ b/static/js/graph.js @@ -1565,14 +1565,20 @@ class GraphManager { } /** - * Unhide all hidden nodes + * Unhide all hidden nodes, excluding those within a large entity. */ unhideAll() { - const allNodes = this.nodes.get({ - filter: (node) => node.hidden === true + const allHiddenNodes = this.nodes.get({ + filter: (node) => { + // Condition: Node is hidden AND it is NOT part of a large entity. + return node.hidden === true && !(node.metadata && node.metadata.large_entity_id); + } }); - const updates = allNodes.map(node => ({ id: node.id, hidden: false })); - this.nodes.update(updates); + + if (allHiddenNodes.length > 0) { + const updates = allHiddenNodes.map(node => ({ id: node.id, hidden: false })); + this.nodes.update(updates); + } } } diff --git a/static/js/main.js b/static/js/main.js index c318aae..c073f7b 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -1997,8 +1997,6 @@ class DNSReconApp { if (response.success) { this.showSuccess(response.message); - this.hideModal(); - // If the scanner was idle, it's now running. Start polling to see the new node appear. if (this.scanStatus === 'idle') { this.startPolling(1000);