remove-large-entity-temporarily #3
@ -114,36 +114,6 @@ class GraphManager:
 | 
			
		||||
        self.last_modified = datetime.now(timezone.utc).isoformat()
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        Removes a node from a large entity's internal lists and updates its count.
 | 
			
		||||
        This prepares the large entity for the node's promotion to a regular node.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.graph.has_node(large_entity_id):
 | 
			
		||||
            return False
 | 
			
		||||
            
 | 
			
		||||
        node_data = self.graph.nodes[large_entity_id]
 | 
			
		||||
        attributes = node_data.get('attributes', [])
 | 
			
		||||
        
 | 
			
		||||
        # Find the 'nodes' attribute dictionary in the list
 | 
			
		||||
        nodes_attr = next((attr for attr in attributes if attr.get('name') == 'nodes'), None)
 | 
			
		||||
        
 | 
			
		||||
        # Remove from the list of member nodes
 | 
			
		||||
        if nodes_attr and 'value' in nodes_attr and isinstance(nodes_attr['value'], list) and node_id_to_extract in nodes_attr['value']:
 | 
			
		||||
            nodes_attr['value'].remove(node_id_to_extract)
 | 
			
		||||
            
 | 
			
		||||
            # Find the 'count' attribute and update it
 | 
			
		||||
            count_attr = next((attr for attr in attributes if attr.get('name') == 'count'), None)
 | 
			
		||||
            if count_attr:
 | 
			
		||||
                count_attr['value'] = len(nodes_attr['value'])
 | 
			
		||||
        else:
 | 
			
		||||
            # This can happen if the node was already extracted, which is not an error.
 | 
			
		||||
            print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.")
 | 
			
		||||
            return True # Proceed as if successful
 | 
			
		||||
            
 | 
			
		||||
        self.last_modified = datetime.now(timezone.utc).isoformat()
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def remove_node(self, node_id: str) -> bool:
 | 
			
		||||
        """Remove a node and its connected edges from the graph."""
 | 
			
		||||
        if not self.graph.has_node(node_id):
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										349
									
								
								core/scanner.py
									
									
									
									
									
								
							
							
						
						
									
										349
									
								
								core/scanner.py
									
									
									
									
									
								
							@ -761,40 +761,37 @@ class Scanner:
 | 
			
		||||
    def _process_provider_task(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]:
 | 
			
		||||
        """
 | 
			
		||||
        Manages the entire process for a given target and provider.
 | 
			
		||||
        FIXED: Don't enqueue correlation tasks during normal processing.
 | 
			
		||||
        This version is generalized to handle all relationships dynamically.
 | 
			
		||||
        """
 | 
			
		||||
        if self._is_stop_requested():
 | 
			
		||||
            return set(), set(), False
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
        is_ip = _is_valid_ip(target)
 | 
			
		||||
        target_type = NodeType.IP if is_ip else NodeType.DOMAIN
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
        self.graph.add_node(target, target_type)
 | 
			
		||||
        self._initialize_provider_states(target)
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
        new_targets = set()
 | 
			
		||||
        large_entity_members = set()
 | 
			
		||||
        provider_successful = True
 | 
			
		||||
 | 
			
		||||
    
 | 
			
		||||
        try:
 | 
			
		||||
            provider_result = self._execute_provider_query(provider, target, is_ip)
 | 
			
		||||
            
 | 
			
		||||
    
 | 
			
		||||
            if provider_result is None:
 | 
			
		||||
                provider_successful = False
 | 
			
		||||
            elif not self._is_stop_requested():
 | 
			
		||||
                # Pass all relationships to be processed
 | 
			
		||||
                discovered, is_large_entity = self._process_provider_result_unified(
 | 
			
		||||
                    target, provider, provider_result, depth
 | 
			
		||||
                )
 | 
			
		||||
                if is_large_entity:
 | 
			
		||||
                    large_entity_members.update(discovered)
 | 
			
		||||
                else:
 | 
			
		||||
                    new_targets.update(discovered)
 | 
			
		||||
 | 
			
		||||
                new_targets.update(discovered)
 | 
			
		||||
    
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            provider_successful = False
 | 
			
		||||
            self._log_provider_error(target, provider.get_name(), str(e))
 | 
			
		||||
 | 
			
		||||
        return new_targets, large_entity_members, provider_successful
 | 
			
		||||
    
 | 
			
		||||
        return new_targets, set(), provider_successful
 | 
			
		||||
 | 
			
		||||
    def _execute_provider_query(self, provider: BaseProvider, target: str, is_ip: bool) -> Optional[ProviderResult]:
 | 
			
		||||
        """
 | 
			
		||||
@ -824,73 +821,158 @@ class Scanner:
 | 
			
		||||
            self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def _create_large_entity_from_result(self, source_node: str, provider_name: str,
 | 
			
		||||
                                         provider_result: ProviderResult, depth: int) -> Tuple[str, Set[str]]:
 | 
			
		||||
        """
 | 
			
		||||
        Creates a large entity node, tags all member nodes, and returns its ID and members.
 | 
			
		||||
        """
 | 
			
		||||
        members = {rel.target_node for rel in provider_result.relationships
 | 
			
		||||
                   if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)}
 | 
			
		||||
        
 | 
			
		||||
        if not members:
 | 
			
		||||
            return "", set()
 | 
			
		||||
 | 
			
		||||
        large_entity_id = f"le_{provider_name}_{source_node}"
 | 
			
		||||
        
 | 
			
		||||
        self.graph.add_node(
 | 
			
		||||
            node_id=large_entity_id,
 | 
			
		||||
            node_type=NodeType.LARGE_ENTITY,
 | 
			
		||||
            attributes=[
 | 
			
		||||
                {"name": "count", "value": len(members), "type": "statistic"},
 | 
			
		||||
                {"name": "source_provider", "value": provider_name, "type": "metadata"},
 | 
			
		||||
                {"name": "discovery_depth", "value": depth, "type": "metadata"},
 | 
			
		||||
                {"name": "nodes", "value": list(members), "type": "metadata"}
 | 
			
		||||
            ],
 | 
			
		||||
            description=f"A collection of {len(members)} nodes discovered from {source_node} via {provider_name}."
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        for member_id in members:
 | 
			
		||||
            node_type = NodeType.IP if _is_valid_ip(member_id) else NodeType.DOMAIN
 | 
			
		||||
            self.graph.add_node(
 | 
			
		||||
                node_id=member_id,
 | 
			
		||||
                node_type=node_type,
 | 
			
		||||
                metadata={'large_entity_id': large_entity_id}
 | 
			
		||||
            )
 | 
			
		||||
            
 | 
			
		||||
        return large_entity_id, members
 | 
			
		||||
 | 
			
		||||
    def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        Removes a node from a large entity, allowing it to be processed normally.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.graph.graph.has_node(node_id):
 | 
			
		||||
            return False
 | 
			
		||||
            
 | 
			
		||||
        node_data = self.graph.graph.nodes[node_id]
 | 
			
		||||
        metadata = node_data.get('metadata', {})
 | 
			
		||||
        
 | 
			
		||||
        if metadata.get('large_entity_id') == large_entity_id:
 | 
			
		||||
            # Remove the large entity tag
 | 
			
		||||
            del metadata['large_entity_id']
 | 
			
		||||
            self.graph.add_node(node_id, NodeType(node_data['type']), metadata=metadata)
 | 
			
		||||
            
 | 
			
		||||
            # Re-enqueue the node for full processing
 | 
			
		||||
            is_ip = _is_valid_ip(node_id)
 | 
			
		||||
            eligible_providers = self._get_eligible_providers(node_id, is_ip, False)
 | 
			
		||||
            for provider in eligible_providers:
 | 
			
		||||
                provider_name = provider.get_name()
 | 
			
		||||
                priority = self._get_priority(provider_name)
 | 
			
		||||
                # Use current depth of the large entity if available, else 0
 | 
			
		||||
                depth = 0
 | 
			
		||||
                if self.graph.graph.has_node(large_entity_id):
 | 
			
		||||
                    le_attrs = self.graph.graph.nodes[large_entity_id].get('attributes', [])
 | 
			
		||||
                    depth_attr = next((a for a in le_attrs if a['name'] == 'discovery_depth'), None)
 | 
			
		||||
                    if depth_attr:
 | 
			
		||||
                        depth = depth_attr['value']
 | 
			
		||||
 | 
			
		||||
                self.task_queue.put((time.time(), priority, (provider_name, node_id, depth)))
 | 
			
		||||
                self.total_tasks_ever_enqueued += 1
 | 
			
		||||
            
 | 
			
		||||
            return True
 | 
			
		||||
            
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def _process_provider_result_unified(self, target: str, provider: BaseProvider,
 | 
			
		||||
                                        provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
 | 
			
		||||
        """
 | 
			
		||||
        Process a unified ProviderResult object to update the graph.
 | 
			
		||||
        Handles large entity creation while ensuring all underlying nodes and edges are
 | 
			
		||||
        added to the graph data model for a complete dataset.
 | 
			
		||||
        This version dynamically re-routes edges to a large entity container.
 | 
			
		||||
        """
 | 
			
		||||
        provider_name = provider.get_name()
 | 
			
		||||
        discovered_targets = set()
 | 
			
		||||
        large_entity_id = ""
 | 
			
		||||
        large_entity_members = set()
 | 
			
		||||
 | 
			
		||||
        if self._is_stop_requested():
 | 
			
		||||
            return discovered_targets, False
 | 
			
		||||
 | 
			
		||||
        # Check if a large entity should be created based on the count of domain/IP relationships
 | 
			
		||||
        eligible_relationship_count = sum(
 | 
			
		||||
        eligible_rel_count = sum(
 | 
			
		||||
            1 for rel in provider_result.relationships if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        is_large_entity = eligible_relationship_count > self.config.large_entity_threshold
 | 
			
		||||
        is_large_entity = eligible_rel_count > self.config.large_entity_threshold
 | 
			
		||||
 | 
			
		||||
        if is_large_entity:
 | 
			
		||||
            # Create the large entity node and get the set of its members
 | 
			
		||||
            large_entity_members = self._create_large_entity_from_provider_result(
 | 
			
		||||
            large_entity_id, large_entity_members = self._create_large_entity_from_result(
 | 
			
		||||
                target, provider_name, provider_result, current_depth
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # Process ALL relationships to build the complete underlying data model
 | 
			
		||||
        for i, relationship in enumerate(provider_result.relationships):
 | 
			
		||||
            if i % 5 == 0 and self._is_stop_requested():
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            source_node = relationship.source_node
 | 
			
		||||
            target_node = relationship.target_node
 | 
			
		||||
            source_node_id = relationship.source_node
 | 
			
		||||
            target_node_id = relationship.target_node
 | 
			
		||||
 | 
			
		||||
            # Determine node types
 | 
			
		||||
            source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
 | 
			
		||||
            # Determine visual source and target, substituting with large entity ID if necessary
 | 
			
		||||
            visual_source = large_entity_id if source_node_id in large_entity_members else source_node_id
 | 
			
		||||
            visual_target = large_entity_id if target_node_id in large_entity_members else target_node_id
 | 
			
		||||
 | 
			
		||||
            # Prevent self-loops on the large entity node
 | 
			
		||||
            if visual_source == visual_target:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            # Determine node types for the actual nodes
 | 
			
		||||
            source_type = NodeType.IP if _is_valid_ip(source_node_id) else NodeType.DOMAIN
 | 
			
		||||
            if provider_name == 'shodan' and relationship.relationship_type == 'shodan_isp':
 | 
			
		||||
                target_type = NodeType.ISP
 | 
			
		||||
            elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer':
 | 
			
		||||
                target_type = NodeType.CA
 | 
			
		||||
            elif provider_name == 'correlation':
 | 
			
		||||
                target_type = NodeType.CORRELATION_OBJECT
 | 
			
		||||
            elif _is_valid_ip(target_node):
 | 
			
		||||
            elif _is_valid_ip(target_node_id):
 | 
			
		||||
                target_type = NodeType.IP
 | 
			
		||||
            else:
 | 
			
		||||
                target_type = NodeType.DOMAIN
 | 
			
		||||
 | 
			
		||||
            max_depth_reached = current_depth >= self.max_depth
 | 
			
		||||
 | 
			
		||||
            # Add all nodes and edges to the graph's data model.
 | 
			
		||||
            # The frontend will handle the visual re-routing for large entity members.
 | 
			
		||||
            self.graph.add_node(source_node, source_type)
 | 
			
		||||
            self.graph.add_node(target_node, target_type, metadata={'max_depth_reached': max_depth_reached})
 | 
			
		||||
            # Add actual nodes to the graph (they might be hidden by the UI)
 | 
			
		||||
            self.graph.add_node(source_node_id, source_type)
 | 
			
		||||
            self.graph.add_node(target_node_id, target_type, metadata={'max_depth_reached': max_depth_reached})
 | 
			
		||||
            
 | 
			
		||||
            # Add the visual edge to the graph
 | 
			
		||||
            self.graph.add_edge(
 | 
			
		||||
                source_node, target_node,
 | 
			
		||||
                visual_source, visual_target,
 | 
			
		||||
                relationship.relationship_type,
 | 
			
		||||
                relationship.confidence,
 | 
			
		||||
                provider_name,
 | 
			
		||||
                relationship.raw_data
 | 
			
		||||
            )
 | 
			
		||||
            
 | 
			
		||||
            if (_is_valid_domain(target_node_id) or _is_valid_ip(target_node_id)) and not max_depth_reached:
 | 
			
		||||
                if target_node_id not in large_entity_members:
 | 
			
		||||
                    discovered_targets.add(target_node_id)
 | 
			
		||||
 | 
			
		||||
            # Add all discovered domains/IPs to be considered for further processing
 | 
			
		||||
            if (_is_valid_domain(target_node) or _is_valid_ip(target_node)) and not max_depth_reached:
 | 
			
		||||
                discovered_targets.add(target_node)
 | 
			
		||||
 | 
			
		||||
        # Process all attributes and add them to the corresponding nodes
 | 
			
		||||
        if large_entity_members:
 | 
			
		||||
            self.logger.logger.info(f"Enqueuing DNS and Correlation for {len(large_entity_members)} members of {large_entity_id}")
 | 
			
		||||
            for member in large_entity_members:
 | 
			
		||||
                for provider_name_to_run in ['dns', 'correlation']:
 | 
			
		||||
                    p_instance = next((p for p in self.providers if p.get_name() == provider_name_to_run), None)
 | 
			
		||||
                    if p_instance and p_instance.get_eligibility().get('domains' if _is_valid_domain(member) else 'ips'):
 | 
			
		||||
                        priority = self._get_priority(provider_name_to_run)
 | 
			
		||||
                        self.task_queue.put((time.time(), priority, (provider_name_to_run, member, current_depth)))
 | 
			
		||||
                        self.total_tasks_ever_enqueued += 1
 | 
			
		||||
        
 | 
			
		||||
        attributes_by_node = defaultdict(list)
 | 
			
		||||
        for attribute in provider_result.attributes:
 | 
			
		||||
            attr_dict = {
 | 
			
		||||
@ -909,65 +991,6 @@ class Scanner:
 | 
			
		||||
 | 
			
		||||
        return discovered_targets, is_large_entity
 | 
			
		||||
 | 
			
		||||
    def _create_large_entity_from_provider_result(self, source: str, provider_name: str,
 | 
			
		||||
                                                provider_result: ProviderResult, current_depth: int) -> Set[str]:
 | 
			
		||||
        """
 | 
			
		||||
        Create a large entity node and connect it to the source and any shared
 | 
			
		||||
        non-member nodes like CAs or ISPs.
 | 
			
		||||
        """
 | 
			
		||||
        entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}"
 | 
			
		||||
 | 
			
		||||
        members = {
 | 
			
		||||
            rel.target_node for rel in provider_result.relationships
 | 
			
		||||
            if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if not members:
 | 
			
		||||
            return set()
 | 
			
		||||
 | 
			
		||||
        first_member = next(iter(members))
 | 
			
		||||
        node_type = 'ip' if _is_valid_ip(first_member) else 'domain'
 | 
			
		||||
 | 
			
		||||
        attributes_dict = {
 | 
			
		||||
            'count': len(members),
 | 
			
		||||
            'nodes': list(members),
 | 
			
		||||
            'node_type': node_type,
 | 
			
		||||
            'source_provider': provider_name,
 | 
			
		||||
            'discovery_depth': current_depth,
 | 
			
		||||
            'threshold_exceeded': self.config.large_entity_threshold,
 | 
			
		||||
        }
 | 
			
		||||
        attributes_list = [
 | 
			
		||||
            {
 | 
			
		||||
                "name": key, "value": value, "type": "large_entity_info",
 | 
			
		||||
                "provider": provider_name, "confidence": 0.9, "metadata": {}
 | 
			
		||||
            } for key, value in attributes_dict.items()
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        description = f'Large entity created due to {len(members)} relationships from {provider_name}'
 | 
			
		||||
 | 
			
		||||
        self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes_list, description=description)
 | 
			
		||||
 | 
			
		||||
        # Add a representative edge from the source to the large entity
 | 
			
		||||
        if provider_result.relationships:
 | 
			
		||||
            rep_rel = provider_result.relationships[0]
 | 
			
		||||
            self.graph.add_edge(source, entity_id, rep_rel.relationship_type, 0.9, provider_name,
 | 
			
		||||
                                {'large_entity_info': f'Contains {len(members)} {node_type}s'})
 | 
			
		||||
 | 
			
		||||
        # Create edges from the large entity to shared non-member nodes (e.g., CAs, ISPs)
 | 
			
		||||
        processed_targets = set()
 | 
			
		||||
        for rel in provider_result.relationships:
 | 
			
		||||
            if rel.source_node in members and rel.target_node not in members:
 | 
			
		||||
                if rel.target_node not in processed_targets:
 | 
			
		||||
                    self.graph.add_edge(
 | 
			
		||||
                        entity_id, rel.target_node, rel.relationship_type, rel.confidence,
 | 
			
		||||
                        rel.provider, rel.raw_data
 | 
			
		||||
                    )
 | 
			
		||||
                    processed_targets.add(rel.target_node)
 | 
			
		||||
 | 
			
		||||
        self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(members)} targets from {provider_name}")
 | 
			
		||||
 | 
			
		||||
        return members
 | 
			
		||||
 | 
			
		||||
    def stop_scan(self) -> bool:
 | 
			
		||||
        """Request immediate scan termination with proper cleanup."""
 | 
			
		||||
        try:
 | 
			
		||||
@ -995,127 +1018,6 @@ class Scanner:
 | 
			
		||||
            traceback.print_exc()
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
    def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
 | 
			
		||||
        """
 | 
			
		||||
        Extracts a node from a large entity, restores ALL of its original connections,
 | 
			
		||||
        and re-queues it for scanning.
 | 
			
		||||
        """
 | 
			
		||||
        if not self.graph.graph.has_node(large_entity_id):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # Extract the node from the large entity's internal list
 | 
			
		||||
        success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract)
 | 
			
		||||
        if not success:
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # Restore all incoming and outgoing edges for the extracted node
 | 
			
		||||
        # These edges already exist in the graph data model; this ensures they are "activated"
 | 
			
		||||
        # for the frontend.
 | 
			
		||||
        for u, v, data in self.graph.graph.in_edges(node_id_to_extract, data=True):
 | 
			
		||||
            self.graph.add_edge(u, v, data.get('relationship_type'), data.get('confidence_score'),
 | 
			
		||||
                                data.get('source_provider'), data.get('raw_data'))
 | 
			
		||||
 | 
			
		||||
        for u, v, data in self.graph.graph.out_edges(node_id_to_extract, data=True):
 | 
			
		||||
            self.graph.add_edge(u, v, data.get('relationship_type'), data.get('confidence_score'),
 | 
			
		||||
                                data.get('source_provider'), data.get('raw_data'))
 | 
			
		||||
 | 
			
		||||
        # Re-queue the extracted node for further scanning if it is a domain or IP
 | 
			
		||||
        is_ip = _is_valid_ip(node_id_to_extract)
 | 
			
		||||
        is_domain = _is_valid_domain(node_id_to_extract)
 | 
			
		||||
 | 
			
		||||
        if is_domain or is_ip:
 | 
			
		||||
            large_entity_attributes = self.graph.graph.nodes[large_entity_id].get('attributes', [])
 | 
			
		||||
            discovery_depth_attr = next((attr for attr in large_entity_attributes if attr.get('name') == 'discovery_depth'), None)
 | 
			
		||||
            current_depth = discovery_depth_attr['value'] if discovery_depth_attr else 0
 | 
			
		||||
 | 
			
		||||
            eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
 | 
			
		||||
            for provider in eligible_providers:
 | 
			
		||||
                # Exclude DNS and correlation providers from re-processing
 | 
			
		||||
                if provider.get_name() not in ['dns', 'correlation']:
 | 
			
		||||
                    provider_name = provider.get_name()
 | 
			
		||||
                    priority = self._get_priority(provider_name)
 | 
			
		||||
                    self.task_queue.put((time.time(), priority, (provider_name, node_id_to_extract, current_depth)))
 | 
			
		||||
                    self.total_tasks_ever_enqueued += 1
 | 
			
		||||
 | 
			
		||||
            if self.status != ScanStatus.RUNNING:
 | 
			
		||||
                self.status = ScanStatus.RUNNING
 | 
			
		||||
                self._update_session_state()
 | 
			
		||||
 | 
			
		||||
                if not self.scan_thread or not self.scan_thread.is_alive():
 | 
			
		||||
                    self.scan_thread = threading.Thread(
 | 
			
		||||
                        target=self._execute_scan,
 | 
			
		||||
                        args=(self.current_target, self.max_depth),
 | 
			
		||||
                        daemon=True
 | 
			
		||||
                    )
 | 
			
		||||
                    self.scan_thread.start()
 | 
			
		||||
        else:
 | 
			
		||||
            self.logger.logger.info(f"Extracted non-scannable node {node_id_to_extract} of type {self.graph.graph.nodes[node_id_to_extract].get('type', 'unknown')}")
 | 
			
		||||
 | 
			
		||||
        return True
 | 
			
		||||
    
 | 
			
		||||
    def _determine_extracted_node_type(self, node_id: str, large_entity_id: str) -> NodeType:
 | 
			
		||||
        """
 | 
			
		||||
        FIXED: Determine the correct node type for a node being extracted from a large entity.
 | 
			
		||||
        Uses multiple strategies to ensure accurate type detection.
 | 
			
		||||
        """
 | 
			
		||||
        from utils.helpers import _is_valid_ip, _is_valid_domain
 | 
			
		||||
        
 | 
			
		||||
        # Strategy 1: Check if node already exists in graph with a type
 | 
			
		||||
        if self.graph.has_node(node_id):
 | 
			
		||||
            existing_type = self.graph.nodes[node_id].get('type')
 | 
			
		||||
            if existing_type:
 | 
			
		||||
                try:
 | 
			
		||||
                    return NodeType(existing_type)
 | 
			
		||||
                except ValueError:
 | 
			
		||||
                    pass
 | 
			
		||||
        
 | 
			
		||||
        # Strategy 2: Look for existing relationships to this node to infer type
 | 
			
		||||
        for source, target, edge_data in self.graph.edges(data=True):
 | 
			
		||||
            if target == node_id:
 | 
			
		||||
                rel_type = edge_data.get('relationship_type', '')
 | 
			
		||||
                provider = edge_data.get('source_provider', '')
 | 
			
		||||
                
 | 
			
		||||
                # CA nodes from certificate issuer relationships
 | 
			
		||||
                if provider == 'crtsh' and rel_type == 'crtsh_cert_issuer':
 | 
			
		||||
                    return NodeType.CA
 | 
			
		||||
                
 | 
			
		||||
                # ISP nodes from Shodan
 | 
			
		||||
                if provider == 'shodan' and rel_type == 'shodan_isp':
 | 
			
		||||
                    return NodeType.ISP
 | 
			
		||||
                    
 | 
			
		||||
                # Correlation objects
 | 
			
		||||
                if rel_type.startswith('corr_'):
 | 
			
		||||
                    return NodeType.CORRELATION_OBJECT
 | 
			
		||||
            
 | 
			
		||||
            if source == node_id:
 | 
			
		||||
                rel_type = edge_data.get('relationship_type', '')
 | 
			
		||||
                provider = edge_data.get('source_provider', '')
 | 
			
		||||
                
 | 
			
		||||
                # Source nodes in cert issuer relationships are CAs
 | 
			
		||||
                if provider == 'crtsh' and rel_type == 'crtsh_cert_issuer':
 | 
			
		||||
                    return NodeType.CA
 | 
			
		||||
        
 | 
			
		||||
        # Strategy 3: Format-based detection (fallback)
 | 
			
		||||
        if _is_valid_ip(node_id):
 | 
			
		||||
            return NodeType.IP
 | 
			
		||||
        elif _is_valid_domain(node_id):
 | 
			
		||||
            return NodeType.DOMAIN
 | 
			
		||||
        
 | 
			
		||||
        # Strategy 4: Check large entity context
 | 
			
		||||
        if self.graph.has_node(large_entity_id):
 | 
			
		||||
            large_entity_data = self.graph.nodes[large_entity_id]
 | 
			
		||||
            attributes = large_entity_data.get('attributes', [])
 | 
			
		||||
            
 | 
			
		||||
            node_type_attr = next((attr for attr in attributes if attr.get('name') == 'node_type'), None)
 | 
			
		||||
            if node_type_attr:
 | 
			
		||||
                entity_node_type = node_type_attr.get('value', 'domain')
 | 
			
		||||
                if entity_node_type == 'ip':
 | 
			
		||||
                    return NodeType.IP
 | 
			
		||||
                else:
 | 
			
		||||
                    return NodeType.DOMAIN
 | 
			
		||||
        
 | 
			
		||||
        # Final fallback
 | 
			
		||||
        return NodeType.DOMAIN
 | 
			
		||||
    def _update_session_state(self) -> None:
 | 
			
		||||
        """
 | 
			
		||||
        Update the scanner state in Redis for GUI updates.
 | 
			
		||||
@ -1186,8 +1088,19 @@ class Scanner:
 | 
			
		||||
        eligible = []
 | 
			
		||||
        target_key = 'ips' if is_ip else 'domains'
 | 
			
		||||
        
 | 
			
		||||
        # Check if the target is part of a large entity
 | 
			
		||||
        is_in_large_entity = False
 | 
			
		||||
        if self.graph.graph.has_node(target):
 | 
			
		||||
            metadata = self.graph.graph.nodes[target].get('metadata', {})
 | 
			
		||||
            if 'large_entity_id' in metadata:
 | 
			
		||||
                is_in_large_entity = True
 | 
			
		||||
        
 | 
			
		||||
        for provider in self.providers:
 | 
			
		||||
            try:
 | 
			
		||||
                # If in large entity, only allow dns and correlation providers
 | 
			
		||||
                if is_in_large_entity and provider.get_name() not in ['dns', 'correlation']:
 | 
			
		||||
                    continue
 | 
			
		||||
                    
 | 
			
		||||
                # Check if provider supports this target type
 | 
			
		||||
                if not provider.get_eligibility().get(target_key, False):
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
@ -2,15 +2,37 @@
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
import psycopg2
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from typing import List, Dict, Any, Set
 | 
			
		||||
from typing import List, Dict, Any, Set, Optional
 | 
			
		||||
from urllib.parse import quote
 | 
			
		||||
from datetime import datetime, timezone
 | 
			
		||||
import requests
 | 
			
		||||
from psycopg2 import pool
 | 
			
		||||
 | 
			
		||||
from .base_provider import BaseProvider
 | 
			
		||||
from core.provider_result import ProviderResult
 | 
			
		||||
from utils.helpers import _is_valid_domain
 | 
			
		||||
from core.logger import get_forensic_logger
 | 
			
		||||
 | 
			
		||||
# --- Global Instance for PostgreSQL Connection Pool ---
 | 
			
		||||
# This pool will be created once per worker process and is not part of the
 | 
			
		||||
# CrtShProvider instance, thus avoiding pickling errors.
 | 
			
		||||
db_pool = None
 | 
			
		||||
try:
 | 
			
		||||
    db_pool = psycopg2.pool.SimpleConnectionPool(
 | 
			
		||||
        1, 5,
 | 
			
		||||
        host='crt.sh',
 | 
			
		||||
        port=5432,
 | 
			
		||||
        user='guest',
 | 
			
		||||
        dbname='certwatch',
 | 
			
		||||
        sslmode='prefer',
 | 
			
		||||
        connect_timeout=60
 | 
			
		||||
    )
 | 
			
		||||
    # Use a generic logger here as this is at the module level
 | 
			
		||||
    get_forensic_logger().logger.info("crt.sh: Global PostgreSQL connection pool created successfully.")
 | 
			
		||||
except Exception as e:
 | 
			
		||||
    get_forensic_logger().logger.warning(f"crt.sh: Failed to create global DB connection pool: {e}. Will fall back to HTTP API.")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CrtShProvider(BaseProvider):
 | 
			
		||||
@ -37,7 +59,7 @@ class CrtShProvider(BaseProvider):
 | 
			
		||||
        
 | 
			
		||||
        # Compile regex for date filtering for efficiency
 | 
			
		||||
        self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    def get_name(self) -> str:
 | 
			
		||||
        """Return the provider name."""
 | 
			
		||||
        return "crtsh"
 | 
			
		||||
@ -121,7 +143,7 @@ class CrtShProvider(BaseProvider):
 | 
			
		||||
            
 | 
			
		||||
            else:  # "stale" or "not_found"
 | 
			
		||||
                # Query the API for the latest certificates
 | 
			
		||||
                new_raw_certs = self._query_crtsh_api(domain)
 | 
			
		||||
                new_raw_certs = self._query_crtsh(domain)
 | 
			
		||||
                
 | 
			
		||||
                if self._stop_event and self._stop_event.is_set():
 | 
			
		||||
                    return ProviderResult()
 | 
			
		||||
@ -152,8 +174,8 @@ class CrtShProvider(BaseProvider):
 | 
			
		||||
                # Save the new result and the raw data to the cache
 | 
			
		||||
                self._save_result_to_cache(cache_file, result, raw_certificates_to_process, domain)
 | 
			
		||||
 | 
			
		||||
        except requests.exceptions.RequestException as e:
 | 
			
		||||
            self.logger.logger.error(f"API query failed for {domain}: {e}")
 | 
			
		||||
        except (requests.exceptions.RequestException, psycopg2.Error) as e:
 | 
			
		||||
            self.logger.logger.error(f"Upstream query failed for {domain}: {e}")
 | 
			
		||||
            if cache_status != "not_found":
 | 
			
		||||
                result = self._load_from_cache(cache_file)
 | 
			
		||||
                self.logger.logger.warning(f"Using stale cache for {domain} due to API failure.")
 | 
			
		||||
@ -255,6 +277,58 @@ class CrtShProvider(BaseProvider):
 | 
			
		||||
                json.dump(cache_data, f, separators=(',', ':'), default=str)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            self.logger.logger.warning(f"Failed to save cache file for {domain}: {e}")
 | 
			
		||||
            
 | 
			
		||||
    def _query_crtsh(self, domain: str) -> List[Dict[str, Any]]:
 | 
			
		||||
        """Query crt.sh, trying the database first and falling back to the API."""
 | 
			
		||||
        global db_pool
 | 
			
		||||
        if db_pool:
 | 
			
		||||
            try:
 | 
			
		||||
                self.logger.logger.info(f"crt.sh: Attempting DB query for {domain}")
 | 
			
		||||
                return self._query_crtsh_db(domain)
 | 
			
		||||
            except psycopg2.Error as e:
 | 
			
		||||
                self.logger.logger.warning(f"crt.sh: DB query failed for {domain}: {e}. Falling back to HTTP API.")
 | 
			
		||||
                return self._query_crtsh_api(domain)
 | 
			
		||||
        else:
 | 
			
		||||
            self.logger.logger.info(f"crt.sh: No DB connection pool. Using HTTP API for {domain}")
 | 
			
		||||
            return self._query_crtsh_api(domain)
 | 
			
		||||
 | 
			
		||||
    def _query_crtsh_db(self, domain: str) -> List[Dict[str, Any]]:
 | 
			
		||||
        """Query crt.sh database for raw certificate data."""
 | 
			
		||||
        global db_pool
 | 
			
		||||
        conn = db_pool.getconn()
 | 
			
		||||
        try:
 | 
			
		||||
            with conn.cursor() as cursor:
 | 
			
		||||
                query = """
 | 
			
		||||
                SELECT
 | 
			
		||||
                    c.id,
 | 
			
		||||
                    x509_serialnumber(c.certificate) as serial_number,
 | 
			
		||||
                    x509_notbefore(c.certificate) as not_before,
 | 
			
		||||
                    x509_notafter(c.certificate) as not_after,
 | 
			
		||||
                    c.issuer_ca_id,
 | 
			
		||||
                    ca.name as issuer_name,
 | 
			
		||||
                    x509_commonname(c.certificate) as common_name,
 | 
			
		||||
                    identities(c.certificate)::text as name_value
 | 
			
		||||
                FROM certificate c
 | 
			
		||||
                LEFT JOIN ca ON c.issuer_ca_id = ca.id
 | 
			
		||||
                WHERE identities(c.certificate) @@ plainto_tsquery(%s)
 | 
			
		||||
                ORDER BY c.id DESC
 | 
			
		||||
                LIMIT 5000;
 | 
			
		||||
                """
 | 
			
		||||
                cursor.execute(query, (domain,))
 | 
			
		||||
                
 | 
			
		||||
                results = []
 | 
			
		||||
                columns = [desc[0] for desc in cursor.description]
 | 
			
		||||
                for row in cursor.fetchall():
 | 
			
		||||
                    row_dict = dict(zip(columns, row))
 | 
			
		||||
                    if row_dict.get('not_before'):
 | 
			
		||||
                        row_dict['not_before'] = row_dict['not_before'].isoformat()
 | 
			
		||||
                    if row_dict.get('not_after'):
 | 
			
		||||
                        row_dict['not_after'] = row_dict['not_after'].isoformat()
 | 
			
		||||
                    results.append(row_dict)
 | 
			
		||||
                self.logger.logger.info(f"crt.sh: DB query for {domain} returned {len(results)} records.")
 | 
			
		||||
                return results
 | 
			
		||||
        finally:
 | 
			
		||||
            db_pool.putconn(conn)
 | 
			
		||||
 | 
			
		||||
    def _query_crtsh_api(self, domain: str) -> List[Dict[str, Any]]:
 | 
			
		||||
        """Query crt.sh API for raw certificate data."""
 | 
			
		||||
@ -285,6 +359,17 @@ class CrtShProvider(BaseProvider):
 | 
			
		||||
        if self._stop_event and self._stop_event.is_set():
 | 
			
		||||
            self.logger.logger.info(f"CrtSh processing cancelled before processing for domain: {query_domain}")
 | 
			
		||||
            return result
 | 
			
		||||
        
 | 
			
		||||
        incompleteness_warning = self._check_for_incomplete_data(query_domain, certificates)
 | 
			
		||||
        if incompleteness_warning:
 | 
			
		||||
            result.add_attribute(
 | 
			
		||||
                target_node=query_domain,
 | 
			
		||||
                name="crtsh_data_warning",
 | 
			
		||||
                value=incompleteness_warning,
 | 
			
		||||
                attr_type='metadata',
 | 
			
		||||
                provider=self.name,
 | 
			
		||||
                confidence=1.0
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        all_discovered_domains = set()
 | 
			
		||||
        processed_issuers = set()
 | 
			
		||||
@ -457,6 +542,8 @@ class CrtShProvider(BaseProvider):
 | 
			
		||||
            raise ValueError("Empty date string")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            if isinstance(date_string, datetime):
 | 
			
		||||
                return date_string.replace(tzinfo=timezone.utc)
 | 
			
		||||
            if date_string.endswith('Z'):
 | 
			
		||||
                return datetime.fromisoformat(date_string[:-1]).replace(tzinfo=timezone.utc)
 | 
			
		||||
            elif '+' in date_string or date_string.endswith('UTC'):
 | 
			
		||||
@ -577,4 +664,30 @@ class CrtShProvider(BaseProvider):
 | 
			
		||||
        elif query_domain.endswith(f'.{cert_domain}'):
 | 
			
		||||
            return 'parent_domain'
 | 
			
		||||
        else:
 | 
			
		||||
            return 'related_domain'
 | 
			
		||||
            return 'related_domain'
 | 
			
		||||
        
 | 
			
		||||
    def _check_for_incomplete_data(self, domain: str, certificates: List[Dict[str, Any]]) -> Optional[str]:
 | 
			
		||||
        """
 | 
			
		||||
        Analyzes the certificate list to heuristically detect if the data from crt.sh is incomplete.
 | 
			
		||||
        """
 | 
			
		||||
        cert_count = len(certificates)
 | 
			
		||||
 | 
			
		||||
        # Heuristic 1: Check if the number of certs hits a known hard limit.
 | 
			
		||||
        if cert_count >= 10000:
 | 
			
		||||
            return f"Result likely truncated; received {cert_count} certificates, which may be the maximum limit."
 | 
			
		||||
 | 
			
		||||
        # Heuristic 2: Check if all returned certificates are old.
 | 
			
		||||
        if cert_count > 1000: # Only apply this for a reasonable number of certs
 | 
			
		||||
            latest_expiry = None
 | 
			
		||||
            for cert in certificates:
 | 
			
		||||
                try:
 | 
			
		||||
                    not_after = self._parse_certificate_date(cert.get('not_after'))
 | 
			
		||||
                    if latest_expiry is None or not_after > latest_expiry:
 | 
			
		||||
                        latest_expiry = not_after
 | 
			
		||||
                except (ValueError, TypeError):
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
            if latest_expiry and (datetime.now(timezone.utc) - latest_expiry).days > 365:
 | 
			
		||||
                 return f"Incomplete data suspected: The latest certificate expired more than a year ago ({latest_expiry.strftime('%Y-%m-%d')})."
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
@ -7,4 +7,5 @@ urllib3
 | 
			
		||||
dnspython
 | 
			
		||||
gunicorn
 | 
			
		||||
redis
 | 
			
		||||
python-dotenv
 | 
			
		||||
python-dotenv
 | 
			
		||||
psycopg2-binary
 | 
			
		||||
@ -1,3 +1,4 @@
 | 
			
		||||
// dnsrecon-reduced/static/js/graph.js
 | 
			
		||||
/**
 | 
			
		||||
 * Graph visualization module for DNSRecon
 | 
			
		||||
 * Handles network graph rendering using vis.js with proper large entity node hiding
 | 
			
		||||
@ -362,100 +363,60 @@ class GraphManager {
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        try {
 | 
			
		||||
            // Initialize if not already done
 | 
			
		||||
            if (!this.isInitialized) {
 | 
			
		||||
                this.initialize();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            this.initialTargetIds = new Set(graphData.initial_targets || []);
 | 
			
		||||
            // Check if we have actual data to display
 | 
			
		||||
            const hasData = graphData.nodes.length > 0 || graphData.edges.length > 0;
 | 
			
		||||
            
 | 
			
		||||
            // Handle placeholder visibility
 | 
			
		||||
            const placeholder = this.container.querySelector('.graph-placeholder');
 | 
			
		||||
            if (placeholder) {
 | 
			
		||||
                if (hasData) {
 | 
			
		||||
                    placeholder.style.display = 'none';
 | 
			
		||||
                } else {
 | 
			
		||||
                    placeholder.style.display = 'flex';
 | 
			
		||||
                    // Early return if no data to process
 | 
			
		||||
                    return;
 | 
			
		||||
                }
 | 
			
		||||
                placeholder.style.display = hasData ? 'none' : 'flex';
 | 
			
		||||
            }
 | 
			
		||||
            if (!hasData) {
 | 
			
		||||
                this.nodes.clear();
 | 
			
		||||
                this.edges.clear();
 | 
			
		||||
                return;
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            this.largeEntityMembers.clear();
 | 
			
		||||
            const largeEntityMap = new Map();
 | 
			
		||||
            
 | 
			
		||||
            graphData.nodes.forEach(node => {
 | 
			
		||||
                if (node.type === 'large_entity' && node.attributes) {
 | 
			
		||||
                    const nodesAttribute = this.findAttributeByName(node.attributes, 'nodes');
 | 
			
		||||
                    if (nodesAttribute && Array.isArray(nodesAttribute.value)) {
 | 
			
		||||
                        nodesAttribute.value.forEach(nodeId => {
 | 
			
		||||
                            largeEntityMap.set(nodeId, node.id);
 | 
			
		||||
                            this.largeEntityMembers.add(nodeId);
 | 
			
		||||
                        });
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            });
 | 
			
		||||
            const nodeMap = new Map(graphData.nodes.map(node => [node.id, node]));
 | 
			
		||||
 | 
			
		||||
            const filteredNodes = graphData.nodes.filter(node => {
 | 
			
		||||
                return !this.largeEntityMembers.has(node.id) || node.type === 'large_entity';
 | 
			
		||||
            });
 | 
			
		||||
            // Filter out hidden nodes before processing for rendering
 | 
			
		||||
            const filteredNodes = graphData.nodes.filter(node => 
 | 
			
		||||
                !(node.metadata && node.metadata.large_entity_id)
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`);
 | 
			
		||||
 | 
			
		||||
            // Process nodes with proper certificate coloring
 | 
			
		||||
            const processedNodes = filteredNodes.map(node => {
 | 
			
		||||
            const processedNodes = graphData.nodes.map(node => {
 | 
			
		||||
                const processed = this.processNode(node);
 | 
			
		||||
                
 | 
			
		||||
                // Apply certificate-based coloring here in frontend
 | 
			
		||||
                if (node.type === 'domain' && Array.isArray(node.attributes)) {
 | 
			
		||||
                    const certInfo = this.analyzeCertificateInfo(node.attributes);
 | 
			
		||||
                    
 | 
			
		||||
                    if (certInfo.hasExpiredOnly) {
 | 
			
		||||
                        // Red for domains with only expired/invalid certificates
 | 
			
		||||
                        processed.color = { background: '#ff6b6b', border: '#cc5555' };
 | 
			
		||||
                    } else if (!certInfo.hasCertificates) {
 | 
			
		||||
                        // Grey for domains with no certificates
 | 
			
		||||
                        processed.color = { background: '#c7c7c7', border: '#999999' };
 | 
			
		||||
                    }
 | 
			
		||||
                    // Valid certificates use default green (handled by processNode)
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
                return processed;
 | 
			
		||||
            });
 | 
			
		||||
 | 
			
		||||
            const mergedEdges = {};
 | 
			
		||||
            graphData.edges.forEach(edge => {
 | 
			
		||||
                const fromNode = largeEntityMap.has(edge.from) ? largeEntityMap.get(edge.from) : edge.from;
 | 
			
		||||
                const toNode = largeEntityMap.has(edge.to) ? largeEntityMap.get(edge.to) : edge.to;
 | 
			
		||||
                const mergeKey = `${fromNode}-${toNode}-${edge.label}`;
 | 
			
		||||
 | 
			
		||||
                if (!mergedEdges[mergeKey]) {
 | 
			
		||||
                    mergedEdges[mergeKey] = {
 | 
			
		||||
                        ...edge,
 | 
			
		||||
                        from: fromNode,
 | 
			
		||||
                        to: toNode,
 | 
			
		||||
                        count: 0,
 | 
			
		||||
                        confidence_score: 0
 | 
			
		||||
                    };
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                mergedEdges[mergeKey].count++;
 | 
			
		||||
                if (edge.confidence_score > mergedEdges[mergeKey].confidence_score) {
 | 
			
		||||
                    mergedEdges[mergeKey].confidence_score = edge.confidence_score;
 | 
			
		||||
                }
 | 
			
		||||
            });
 | 
			
		||||
 | 
			
		||||
            const processedEdges = Object.values(mergedEdges).map(edge => {
 | 
			
		||||
                const processed = this.processEdge(edge);
 | 
			
		||||
                if (edge.count > 1) {
 | 
			
		||||
                    processed.label = `${edge.label} (${edge.count})`;
 | 
			
		||||
                if (node.metadata && node.metadata.large_entity_id) {
 | 
			
		||||
                    processed.hidden = true;
 | 
			
		||||
                }
 | 
			
		||||
                return processed;
 | 
			
		||||
            });
 | 
			
		||||
            
 | 
			
		||||
            const processedEdges = graphData.edges.map(edge => {
 | 
			
		||||
                let fromNode = nodeMap.get(edge.from);
 | 
			
		||||
                let toNode = nodeMap.get(edge.to);
 | 
			
		||||
                let fromId = edge.from;
 | 
			
		||||
                let toId = edge.to;
 | 
			
		||||
 | 
			
		||||
                if (fromNode && fromNode.metadata && fromNode.metadata.large_entity_id) {
 | 
			
		||||
                    fromId = fromNode.metadata.large_entity_id;
 | 
			
		||||
                }
 | 
			
		||||
                if (toNode && toNode.metadata && toNode.metadata.large_entity_id) {
 | 
			
		||||
                    toId = toNode.metadata.large_entity_id;
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
                // Avoid self-referencing edges from re-routing
 | 
			
		||||
                if (fromId === toId) {
 | 
			
		||||
                    return null; 
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                const reRoutedEdge = { ...edge, from: fromId, to: toId };
 | 
			
		||||
                return this.processEdge(reRoutedEdge);
 | 
			
		||||
            }).filter(Boolean); // Remove nulls from self-referencing edges
 | 
			
		||||
 | 
			
		||||
            // Update datasets with animation
 | 
			
		||||
            const existingNodeIds = this.nodes.getIds();
 | 
			
		||||
            const existingEdgeIds = this.edges.getIds();
 | 
			
		||||
 | 
			
		||||
@ -472,12 +433,9 @@ class GraphManager {
 | 
			
		||||
                setTimeout(() => this.highlightNewElements(newNodes, newEdges), 100);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (processedNodes.length <= 10 || existingNodeIds.length === 0) {
 | 
			
		||||
            if (this.nodes.length <= 10 || existingNodeIds.length === 0) {
 | 
			
		||||
                setTimeout(() => this.fitView(), 800);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            console.log(`Graph updated: ${processedNodes.length} nodes, ${processedEdges.length} edges (${newNodes.length} new nodes, ${newEdges.length} new edges)`);
 | 
			
		||||
            console.log(`Large entity members hidden: ${this.largeEntityMembers.size}`);
 | 
			
		||||
            
 | 
			
		||||
        } catch (error) {
 | 
			
		||||
            console.error('Failed to update graph:', error);
 | 
			
		||||
@ -606,7 +564,7 @@ class GraphManager {
 | 
			
		||||
    processEdge(edge) {
 | 
			
		||||
        const confidence = edge.confidence_score || 0;
 | 
			
		||||
        const processedEdge = {
 | 
			
		||||
            id: `${edge.from}-${edge.to}`,
 | 
			
		||||
            id: `${edge.from}-${edge.to}-${edge.label}`,
 | 
			
		||||
            from: edge.from,
 | 
			
		||||
            to: edge.to,
 | 
			
		||||
            label: this.formatEdgeLabel(edge.label, confidence),
 | 
			
		||||
@ -1053,7 +1011,7 @@ class GraphManager {
 | 
			
		||||
        this.nodes.clear();
 | 
			
		||||
        this.edges.clear();
 | 
			
		||||
        this.history = [];
 | 
			
		||||
        this.largeEntityMembers.clear(); // Clear large entity tracking
 | 
			
		||||
        this.largeEntityMembers.clear();
 | 
			
		||||
        this.initialTargetIds.clear();
 | 
			
		||||
 | 
			
		||||
        // Show placeholder
 | 
			
		||||
@ -1211,7 +1169,6 @@ class GraphManager {
 | 
			
		||||
        const basicStats = {
 | 
			
		||||
            nodeCount: this.nodes.length,
 | 
			
		||||
            edgeCount: this.edges.length,
 | 
			
		||||
            largeEntityMembersHidden: this.largeEntityMembers.size
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        // Add forensic statistics
 | 
			
		||||
@ -1608,14 +1565,43 @@ class GraphManager {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Unhide all hidden nodes
 | 
			
		||||
     * FIXED: Unhide all hidden nodes, excluding large entity members and disconnected nodes.
 | 
			
		||||
     * This prevents orphaned large entity members from appearing as free-floating nodes.
 | 
			
		||||
     */
 | 
			
		||||
    unhideAll() {
 | 
			
		||||
        const allNodes = this.nodes.get({
 | 
			
		||||
            filter: (node) => node.hidden === true
 | 
			
		||||
        const allHiddenNodes = this.nodes.get({
 | 
			
		||||
            filter: (node) => {
 | 
			
		||||
                // Skip nodes that are part of a large entity
 | 
			
		||||
                if (node.metadata && node.metadata.large_entity_id) {
 | 
			
		||||
                    return false;
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
                // Skip nodes that are not hidden
 | 
			
		||||
                if (node.hidden !== true) {
 | 
			
		||||
                    return false;
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
                // Skip nodes that have no edges (would appear disconnected)
 | 
			
		||||
                const nodeId = node.id;
 | 
			
		||||
                const hasIncomingEdges = this.edges.get().some(edge => edge.to === nodeId && !edge.hidden);
 | 
			
		||||
                const hasOutgoingEdges = this.edges.get().some(edge => edge.from === nodeId && !edge.hidden);
 | 
			
		||||
                
 | 
			
		||||
                if (!hasIncomingEdges && !hasOutgoingEdges) {
 | 
			
		||||
                    console.log(`Skipping disconnected node ${nodeId} from unhide`);
 | 
			
		||||
                    return false;
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
                return true;
 | 
			
		||||
            }
 | 
			
		||||
        });
 | 
			
		||||
        const updates = allNodes.map(node => ({ id: node.id, hidden: false }));
 | 
			
		||||
        this.nodes.update(updates);
 | 
			
		||||
        
 | 
			
		||||
        if (allHiddenNodes.length > 0) {
 | 
			
		||||
            console.log(`Unhiding ${allHiddenNodes.length} nodes with valid connections`);
 | 
			
		||||
            const updates = allHiddenNodes.map(node => ({ id: node.id, hidden: false }));
 | 
			
		||||
            this.nodes.update(updates);
 | 
			
		||||
        } else {
 | 
			
		||||
            console.log('No eligible nodes to unhide');
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -1397,28 +1397,62 @@ class DNSReconApp {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * UPDATED: Generate details for standard nodes with organized attribute grouping
 | 
			
		||||
     * UPDATED: Generate details for standard nodes with organized attribute grouping and data warnings
 | 
			
		||||
     */
 | 
			
		||||
    generateStandardNodeDetails(node) {
 | 
			
		||||
        let html = '';
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        // Check for and display a crt.sh data warning if it exists
 | 
			
		||||
        const crtshWarningAttr = this.findAttributeByName(node.attributes, 'crtsh_data_warning');
 | 
			
		||||
        if (crtshWarningAttr) {
 | 
			
		||||
            html += `
 | 
			
		||||
                <div class="modal-section" style="border-left: 3px solid #ff9900; background: rgba(255, 153, 0, 0.05);">
 | 
			
		||||
                    <details open>
 | 
			
		||||
                        <summary style="color: #ff9900;">
 | 
			
		||||
                            <span>⚠️ Data Integrity Warning</span>
 | 
			
		||||
                        </summary>
 | 
			
		||||
                        <div class="modal-section-content">
 | 
			
		||||
                            <p class="placeholder-subtext" style="color: #e0e0e0; font-size: 0.8rem; line-height: 1.5;">
 | 
			
		||||
                                ${this.escapeHtml(crtshWarningAttr.value)}
 | 
			
		||||
                                <br><br>
 | 
			
		||||
                                This can occur for very large domains (e.g., google.com) where crt.sh may return a limited subset of all available certificates. As a result, the certificate status may not be fully representative.
 | 
			
		||||
                            </p>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </details>
 | 
			
		||||
                </div>
 | 
			
		||||
            `;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Relationships sections
 | 
			
		||||
        html += this.generateRelationshipsSection(node);
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        // UPDATED: Enhanced attributes section with intelligent grouping (no formatting)
 | 
			
		||||
        if (node.attributes && Array.isArray(node.attributes) && node.attributes.length > 0) {
 | 
			
		||||
            html += this.generateOrganizedAttributesSection(node.attributes, node.type);
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        // Description section
 | 
			
		||||
        html += this.generateDescriptionSection(node);
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        // Metadata section (collapsed by default)
 | 
			
		||||
        html += this.generateMetadataSection(node);
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
        return html;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * Helper method to find an attribute by name in the standardized attributes list
 | 
			
		||||
     * @param {Array} attributes - List of StandardAttribute objects
 | 
			
		||||
     * @param {string} name - Attribute name to find
 | 
			
		||||
     * @returns {Object|null} The attribute object if found, null otherwise
 | 
			
		||||
     */
 | 
			
		||||
    findAttributeByName(attributes, name) {
 | 
			
		||||
        if (!Array.isArray(attributes)) {
 | 
			
		||||
            return null;
 | 
			
		||||
        }
 | 
			
		||||
        return attributes.find(attr => attr.name === name) || null;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    generateOrganizedAttributesSection(attributes, nodeType) {
 | 
			
		||||
        if (!Array.isArray(attributes) || attributes.length === 0) {
 | 
			
		||||
            return '';
 | 
			
		||||
@ -1997,8 +2031,6 @@ class DNSReconApp {
 | 
			
		||||
            if (response.success) {
 | 
			
		||||
                this.showSuccess(response.message);
 | 
			
		||||
                
 | 
			
		||||
                this.hideModal();
 | 
			
		||||
 | 
			
		||||
                // If the scanner was idle, it's now running. Start polling to see the new node appear.
 | 
			
		||||
                if (this.scanStatus === 'idle') {
 | 
			
		||||
                    this.startPolling(1000);
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user