remove many unnecessary debug print, improve large entity handling

2025-09-17 13:31:35 +02:00
parent 469c133f1b
commit ec755b17ad
9 changed files with 57 additions and 121 deletions
--- a/core/graph_manager.py
+++ b/core/graph_manager.py
@@ -344,13 +344,19 @@ class GraphManager:
            return False
            
        node_data = self.graph.nodes[large_entity_id]
-        attributes = node_data.get('attributes', {})
+        attributes = node_data.get('attributes', [])
+        
+        # Find the 'nodes' attribute dictionary in the list
+        nodes_attr = next((attr for attr in attributes if attr.get('name') == 'nodes'), None)
        
        # Remove from the list of member nodes
-        if 'nodes' in attributes and node_id_to_extract in attributes['nodes']:
-            attributes['nodes'].remove(node_id_to_extract)
-            # Update the count
-            attributes['count'] = len(attributes['nodes'])
+        if nodes_attr and 'value' in nodes_attr and isinstance(nodes_attr['value'], list) and node_id_to_extract in nodes_attr['value']:
+            nodes_attr['value'].remove(node_id_to_extract)
+            
+            # Find the 'count' attribute and update it
+            count_attr = next((attr for attr in attributes if attr.get('name') == 'count'), None)
+            if count_attr:
+                count_attr['value'] = len(nodes_attr['value'])
        else:
            # This can happen if the node was already extracted, which is not an error.
            print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.")
--- a/core/logger.py
+++ b/core/logger.py
@@ -152,7 +152,7 @@ class ForensicLogger:
        
        # Log to standard logger
        if error:
-            self.logger.error(f"API Request Failed - {provider}: {url} - {error}")
+            self.logger.error(f"API Request Failed.")
        else:
            self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")
    
--- a/core/scanner.py
+++ b/core/scanner.py
@@ -432,7 +432,7 @@ class Scanner:
                            self.indicators_completed += 1

                        if not self._is_stop_requested():
-                            all_new_targets = new_targets.union(large_entity_members)
+                            all_new_targets = new_targets
                            for new_target in all_new_targets:
                                is_ip_new = _is_valid_ip(new_target)
                                eligible_providers_new = self._get_eligible_providers(new_target, is_ip_new, False)
@@ -576,6 +576,33 @@ class Scanner:
            print(f"Stop requested before processing results from {provider_name} for {target}")
            return discovered_targets, False

+        attributes_by_node = defaultdict(list)
+        for attribute in provider_result.attributes:
+            # Convert the StandardAttribute object to a dictionary that the frontend can use
+            attr_dict = {
+                "name": attribute.name,
+                "value": attribute.value,
+                "type": attribute.type,
+                "provider": attribute.provider,
+                "confidence": attribute.confidence,
+                "metadata": attribute.metadata
+            }
+            attributes_by_node[attribute.target_node].append(attr_dict)
+
+        # Add attributes to nodes
+        for node_id, node_attributes_list in attributes_by_node.items():
+            if self.graph.graph.has_node(node_id):
+                # Determine node type
+                if _is_valid_ip(node_id):
+                    node_type = NodeType.IP
+                elif node_id.startswith('AS') and node_id[2:].isdigit():
+                    node_type = NodeType.ASN
+                else:
+                    node_type = NodeType.DOMAIN
+                
+                # Add node with the list of attributes
+                self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
+
        # Check for large entity based on relationship count
        if provider_result.get_relationship_count() > self.config.large_entity_threshold:
            print(f"Large entity detected: {provider_name} returned {provider_result.get_relationship_count()} relationships for {target}")
@@ -619,34 +646,6 @@ class Scanner:
            if _is_valid_domain(target_node) or _is_valid_ip(target_node):
                discovered_targets.add(target_node)

-        # Process attributes, preserving them as a list of objects
-        attributes_by_node = defaultdict(list)
-        for attribute in provider_result.attributes:
-            # Convert the StandardAttribute object to a dictionary that the frontend can use
-            attr_dict = {
-                "name": attribute.name,
-                "value": attribute.value,
-                "type": attribute.type,
-                "provider": attribute.provider,
-                "confidence": attribute.confidence,
-                "metadata": attribute.metadata
-            }
-            attributes_by_node[attribute.target_node].append(attr_dict)
-
-        # Add attributes to nodes
-        for node_id, node_attributes_list in attributes_by_node.items():
-            if self.graph.graph.has_node(node_id):
-                # Determine node type
-                if _is_valid_ip(node_id):
-                    node_type = NodeType.IP
-                elif node_id.startswith('AS') and node_id[2:].isdigit():
-                    node_type = NodeType.ASN
-                else:
-                    node_type = NodeType.DOMAIN
-                
-                # Add node with the list of attributes
-                self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
-
        return discovered_targets, False

    def _create_large_entity_from_provider_result(self, source: str, provider_name: str, 
@@ -793,7 +792,11 @@ class Scanner:
        # 4. Re-queue the extracted node for full processing by all eligible providers
        print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...")
        is_ip = _is_valid_ip(node_id_to_extract)
-        current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0)
+        
+        # FIX: Correctly retrieve discovery_depth from the list of attributes
+        large_entity_attributes = self.graph.graph.nodes[large_entity_id].get('attributes', [])
+        discovery_depth_attr = next((attr for attr in large_entity_attributes if attr.get('name') == 'discovery_depth'), None)
+        current_depth = discovery_depth_attr['value'] if discovery_depth_attr else 0
        
        eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
        for provider in eligible_providers:
--- a/core/session_manager.py
+++ b/core/session_manager.py
@@ -50,7 +50,6 @@ class SessionManager:
        """Restore SessionManager after unpickling."""
        self.__dict__.update(state)
        # Re-initialize unpickleable attributes
-        import redis
        self.redis_client = redis.StrictRedis(db=0, decode_responses=False)
        self.lock = threading.Lock()
        self.creation_lock = threading.Lock()
@@ -140,7 +139,6 @@ class SessionManager:
                new_scanner = Scanner(session_config=preserved_config)
                new_scanner.session_id = new_session_id
                
-                print(f"New scanner has {len(new_scanner.providers)} providers: {[p.get_name() for p in new_scanner.providers]}")
                
                new_session_data = {
                    'scanner': new_scanner,