large entity real fix
This commit is contained in:
		
							parent
							
								
									b2c5d2331c
								
							
						
					
					
						commit
						b2629de055
					
				
							
								
								
									
										175
									
								
								core/scanner.py
									
									
									
									
									
								
							
							
						
						
									
										175
									
								
								core/scanner.py
									
									
									
									
									
								
							@ -824,18 +824,34 @@ class Scanner:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def _create_large_entity_from_result(self, source_node: str, provider_name: str,
 | 
			
		||||
                                         provider_result: ProviderResult, depth: int) -> Tuple[str, Set[str]]:
 | 
			
		||||
                                        provider_result: ProviderResult, depth: int) -> Tuple[str, Set[str]]:
 | 
			
		||||
        """
 | 
			
		||||
        Creates a large entity node, tags all member nodes, and returns its ID and members.
 | 
			
		||||
        Creates a large entity node, tags all member nodes, and stores original relationships.
 | 
			
		||||
        FIXED: Now stores original relationships for later restoration during extraction.
 | 
			
		||||
        """
 | 
			
		||||
        members = {rel.target_node for rel in provider_result.relationships
 | 
			
		||||
                   if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)}
 | 
			
		||||
                if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)}
 | 
			
		||||
        
 | 
			
		||||
        if not members:
 | 
			
		||||
            return "", set()
 | 
			
		||||
 | 
			
		||||
        large_entity_id = f"le_{provider_name}_{source_node}"
 | 
			
		||||
        
 | 
			
		||||
        # FIXED: Store original relationships for each member
 | 
			
		||||
        member_relationships = {}
 | 
			
		||||
        for rel in provider_result.relationships:
 | 
			
		||||
            if rel.target_node in members:
 | 
			
		||||
                if rel.target_node not in member_relationships:
 | 
			
		||||
                    member_relationships[rel.target_node] = []
 | 
			
		||||
                member_relationships[rel.target_node].append({
 | 
			
		||||
                    'source_node': rel.source_node,
 | 
			
		||||
                    'target_node': rel.target_node,
 | 
			
		||||
                    'relationship_type': rel.relationship_type,
 | 
			
		||||
                    'confidence': rel.confidence,
 | 
			
		||||
                    'provider': rel.provider,
 | 
			
		||||
                    'raw_data': rel.raw_data
 | 
			
		||||
                })
 | 
			
		||||
        
 | 
			
		||||
        self.graph.add_node(
 | 
			
		||||
            node_id=large_entity_id,
 | 
			
		||||
            node_type=NodeType.LARGE_ENTITY,
 | 
			
		||||
@ -843,7 +859,8 @@ class Scanner:
 | 
			
		||||
                {"name": "count", "value": len(members), "type": "statistic"},
 | 
			
		||||
                {"name": "source_provider", "value": provider_name, "type": "metadata"},
 | 
			
		||||
                {"name": "discovery_depth", "value": depth, "type": "metadata"},
 | 
			
		||||
                {"name": "nodes", "value": list(members), "type": "metadata"}
 | 
			
		||||
                {"name": "nodes", "value": list(members), "type": "metadata"},
 | 
			
		||||
                {"name": "original_relationships", "value": member_relationships, "type": "metadata"}  # FIXED: Store original relationships
 | 
			
		||||
            ],
 | 
			
		||||
            description=f"A collection of {len(members)} nodes discovered from {source_node} via {provider_name}."
 | 
			
		||||
        )
 | 
			
		||||
@ -860,7 +877,8 @@ class Scanner:
 | 
			
		||||
 | 
			
		||||
    def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        FIXED: Extract a node from a large entity with proper backend updates and edge re-routing.
 | 
			
		||||
        Removes a node from a large entity and restores its original relationships.
 | 
			
		||||
        FIXED: Now restores original relationships to make the node reachable.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.graph.graph.has_node(node_id):
 | 
			
		||||
            return False
 | 
			
		||||
@ -871,51 +889,45 @@ class Scanner:
 | 
			
		||||
        if metadata.get('large_entity_id') != large_entity_id:
 | 
			
		||||
            return False
 | 
			
		||||
        
 | 
			
		||||
        # FIXED: Update the large entity's attributes to remove the extracted node
 | 
			
		||||
        if self.graph.graph.has_node(large_entity_id):
 | 
			
		||||
            le_node_data = self.graph.graph.nodes[large_entity_id]
 | 
			
		||||
            le_attributes = le_node_data.get('attributes', [])
 | 
			
		||||
        # Remove the large entity tag
 | 
			
		||||
        del metadata['large_entity_id']
 | 
			
		||||
        self.graph.add_node(node_id, NodeType(node_data['type']), metadata=metadata)
 | 
			
		||||
        
 | 
			
		||||
            # Update the 'nodes' attribute to remove extracted node
 | 
			
		||||
            nodes_attr = next((attr for attr in le_attributes if attr['name'] == 'nodes'), None)
 | 
			
		||||
            if nodes_attr and isinstance(nodes_attr['value'], list):
 | 
			
		||||
                if node_id in nodes_attr['value']:
 | 
			
		||||
        # FIXED: Restore original relationships if they exist
 | 
			
		||||
        if self.graph.graph.has_node(large_entity_id):
 | 
			
		||||
            le_attrs = self.graph.graph.nodes[large_entity_id].get('attributes', [])
 | 
			
		||||
            original_relationships_attr = next((a for a in le_attrs if a['name'] == 'original_relationships'), None)
 | 
			
		||||
            
 | 
			
		||||
            if original_relationships_attr and node_id in original_relationships_attr['value']:
 | 
			
		||||
                # Restore all original relationships for this node
 | 
			
		||||
                for rel_data in original_relationships_attr['value'][node_id]:
 | 
			
		||||
                    self.graph.add_edge(
 | 
			
		||||
                        source_id=rel_data['source_node'],
 | 
			
		||||
                        target_id=rel_data['target_node'],
 | 
			
		||||
                        relationship_type=rel_data['relationship_type'],
 | 
			
		||||
                        confidence_score=rel_data['confidence'],
 | 
			
		||||
                        source_provider=rel_data['provider'],
 | 
			
		||||
                        raw_data=rel_data['raw_data']
 | 
			
		||||
                    )
 | 
			
		||||
                    
 | 
			
		||||
                    # Ensure both nodes exist in the graph
 | 
			
		||||
                    source_type = NodeType.IP if _is_valid_ip(rel_data['source_node']) else NodeType.DOMAIN
 | 
			
		||||
                    target_type = NodeType.IP if _is_valid_ip(rel_data['target_node']) else NodeType.DOMAIN
 | 
			
		||||
                    self.graph.add_node(rel_data['source_node'], source_type)
 | 
			
		||||
                    self.graph.add_node(rel_data['target_node'], target_type)
 | 
			
		||||
                
 | 
			
		||||
                # Update the large entity to remove this node from its list
 | 
			
		||||
                nodes_attr = next((a for a in le_attrs if a['name'] == 'nodes'), None)
 | 
			
		||||
                if nodes_attr and node_id in nodes_attr['value']:
 | 
			
		||||
                    nodes_attr['value'].remove(node_id)
 | 
			
		||||
                    
 | 
			
		||||
            # Update the 'count' attribute
 | 
			
		||||
            count_attr = next((attr for attr in le_attributes if attr['name'] == 'count'), None)
 | 
			
		||||
            if count_attr and isinstance(count_attr['value'], (int, float)):
 | 
			
		||||
                count_attr['value'] = max(0, count_attr['value'] - 1)
 | 
			
		||||
                count_attr = next((a for a in le_attrs if a['name'] == 'count'), None)
 | 
			
		||||
                if count_attr:
 | 
			
		||||
                    count_attr['value'] = max(0, count_attr['value'] - 1)
 | 
			
		||||
                
 | 
			
		||||
            # Update the large entity node
 | 
			
		||||
            self.graph.add_node(
 | 
			
		||||
                large_entity_id, 
 | 
			
		||||
                NodeType.LARGE_ENTITY, 
 | 
			
		||||
                attributes=le_attributes,
 | 
			
		||||
                description=le_node_data.get('description', ''),
 | 
			
		||||
                metadata=le_node_data.get('metadata', {})
 | 
			
		||||
            )
 | 
			
		||||
        
 | 
			
		||||
        # Remove the large entity tag from extracted node
 | 
			
		||||
        updated_metadata = metadata.copy()
 | 
			
		||||
        del updated_metadata['large_entity_id']
 | 
			
		||||
        
 | 
			
		||||
        # Add extraction history for forensic integrity
 | 
			
		||||
        extraction_record = {
 | 
			
		||||
            'extracted_at': datetime.now(timezone.utc).isoformat(),
 | 
			
		||||
            'extracted_from': large_entity_id,
 | 
			
		||||
            'extraction_method': 'manual'
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        if 'extraction_history' not in updated_metadata:
 | 
			
		||||
            updated_metadata['extraction_history'] = []
 | 
			
		||||
        updated_metadata['extraction_history'].append(extraction_record)
 | 
			
		||||
        
 | 
			
		||||
        # Update the extracted node
 | 
			
		||||
        self.graph.add_node(node_id, NodeType(node_data['type']), metadata=updated_metadata)
 | 
			
		||||
        
 | 
			
		||||
        # FIXED: Re-route edges that were pointing to the large entity
 | 
			
		||||
        self._reroute_large_entity_edges(large_entity_id, node_id)
 | 
			
		||||
                # Remove from original relationships tracking
 | 
			
		||||
                if node_id in original_relationships_attr['value']:
 | 
			
		||||
                    del original_relationships_attr['value'][node_id]
 | 
			
		||||
        
 | 
			
		||||
        # Re-enqueue the node for full processing
 | 
			
		||||
        is_ip = _is_valid_ip(node_id)
 | 
			
		||||
@ -923,7 +935,6 @@ class Scanner:
 | 
			
		||||
        for provider in eligible_providers:
 | 
			
		||||
            provider_name = provider.get_name()
 | 
			
		||||
            priority = self._get_priority(provider_name)
 | 
			
		||||
            
 | 
			
		||||
            # Use current depth of the large entity if available, else 0
 | 
			
		||||
            depth = 0
 | 
			
		||||
            if self.graph.graph.has_node(large_entity_id):
 | 
			
		||||
@ -935,78 +946,8 @@ class Scanner:
 | 
			
		||||
            self.task_queue.put((time.time(), priority, (provider_name, node_id, depth)))
 | 
			
		||||
            self.total_tasks_ever_enqueued += 1
 | 
			
		||||
        
 | 
			
		||||
        # Force session state update for immediate frontend sync
 | 
			
		||||
        self._update_session_state()
 | 
			
		||||
        
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def _reroute_large_entity_edges(self, large_entity_id: str, extracted_node_id: str) -> None:
 | 
			
		||||
        """
 | 
			
		||||
        FIXED: Re-route edges from large entity to extracted node where appropriate.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.graph.graph.has_node(large_entity_id) or not self.graph.graph.has_node(extracted_node_id):
 | 
			
		||||
            return
 | 
			
		||||
        
 | 
			
		||||
        edges_to_reroute = []
 | 
			
		||||
        
 | 
			
		||||
        # Find edges pointing TO the large entity that should point to the extracted node
 | 
			
		||||
        for source, target, edge_data in self.graph.graph.in_edges(large_entity_id, data=True):
 | 
			
		||||
            # Check if this edge was originally meant for the extracted node
 | 
			
		||||
            raw_data = edge_data.get('raw_data', {})
 | 
			
		||||
            
 | 
			
		||||
            # If the raw data suggests this edge was for the extracted node, re-route it
 | 
			
		||||
            if (raw_data.get('original_target') == extracted_node_id or
 | 
			
		||||
                self._should_reroute_edge(edge_data, extracted_node_id)):
 | 
			
		||||
                edges_to_reroute.append(('in', source, target, edge_data))
 | 
			
		||||
        
 | 
			
		||||
        # Find edges pointing FROM the large entity that should point from the extracted node  
 | 
			
		||||
        for source, target, edge_data in self.graph.graph.out_edges(large_entity_id, data=True):
 | 
			
		||||
            raw_data = edge_data.get('raw_data', {})
 | 
			
		||||
            
 | 
			
		||||
            if (raw_data.get('original_source') == extracted_node_id or
 | 
			
		||||
                self._should_reroute_edge(edge_data, extracted_node_id)):
 | 
			
		||||
                edges_to_reroute.append(('out', source, target, edge_data))
 | 
			
		||||
        
 | 
			
		||||
        # Re-route the edges
 | 
			
		||||
        for direction, source, target, edge_data in edges_to_reroute:
 | 
			
		||||
            # Remove old edge
 | 
			
		||||
            self.graph.graph.remove_edge(source, target)
 | 
			
		||||
            
 | 
			
		||||
            # Add new edge with extracted node
 | 
			
		||||
            if direction == 'in':
 | 
			
		||||
                new_target = extracted_node_id
 | 
			
		||||
                new_source = source
 | 
			
		||||
            else:  # direction == 'out'
 | 
			
		||||
                new_source = extracted_node_id  
 | 
			
		||||
                new_target = target
 | 
			
		||||
            
 | 
			
		||||
            # Add the re-routed edge
 | 
			
		||||
            self.graph.add_edge(
 | 
			
		||||
                source_id=new_source,
 | 
			
		||||
                target_id=new_target,
 | 
			
		||||
                relationship_type=edge_data.get('relationship_type', 'unknown'),
 | 
			
		||||
                confidence_score=edge_data.get('confidence_score', 0.5),
 | 
			
		||||
                source_provider=edge_data.get('source_provider', 'rerouted'),
 | 
			
		||||
                raw_data=dict(edge_data.get('raw_data', {}), **{'rerouted_from_large_entity': large_entity_id})
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    def _should_reroute_edge(self, edge_data: dict, extracted_node_id: str) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        Determine if an edge should be re-routed to an extracted node.
 | 
			
		||||
        This is a heuristic-based approach since we don't store original targets.
 | 
			
		||||
        """
 | 
			
		||||
        relationship_type = edge_data.get('relationship_type', '')
 | 
			
		||||
        
 | 
			
		||||
        # For now, re-route DNS and certificate-based relationships
 | 
			
		||||
        # These are likely to be node-specific rather than entity-wide
 | 
			
		||||
        reroutable_types = [
 | 
			
		||||
            'dns_a_record', 'dns_aaaa_record', 'dns_cname_record', 
 | 
			
		||||
            'dns_mx_record', 'dns_ptr_record',
 | 
			
		||||
            'crtsh_san_certificate', 'crtsh_cert_issuer'
 | 
			
		||||
        ]
 | 
			
		||||
        
 | 
			
		||||
        return any(rtype in relationship_type for rtype in reroutable_types)
 | 
			
		||||
 | 
			
		||||
    def _process_provider_result_unified(self, target: str, provider: BaseProvider,
 | 
			
		||||
                                        provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user