remove many unnecessary debug print, improve large entity handling

This commit is contained in:
overcuriousity
2025-09-17 13:31:35 +02:00
parent 469c133f1b
commit ec755b17ad
9 changed files with 57 additions and 121 deletions

View File

@@ -344,13 +344,19 @@ class GraphManager:
return False
node_data = self.graph.nodes[large_entity_id]
attributes = node_data.get('attributes', {})
attributes = node_data.get('attributes', [])
# Find the 'nodes' attribute dictionary in the list
nodes_attr = next((attr for attr in attributes if attr.get('name') == 'nodes'), None)
# Remove from the list of member nodes
if 'nodes' in attributes and node_id_to_extract in attributes['nodes']:
attributes['nodes'].remove(node_id_to_extract)
# Update the count
attributes['count'] = len(attributes['nodes'])
if nodes_attr and 'value' in nodes_attr and isinstance(nodes_attr['value'], list) and node_id_to_extract in nodes_attr['value']:
nodes_attr['value'].remove(node_id_to_extract)
# Find the 'count' attribute and update it
count_attr = next((attr for attr in attributes if attr.get('name') == 'count'), None)
if count_attr:
count_attr['value'] = len(nodes_attr['value'])
else:
# This can happen if the node was already extracted, which is not an error.
print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.")

View File

@@ -152,7 +152,7 @@ class ForensicLogger:
# Log to standard logger
if error:
self.logger.error(f"API Request Failed - {provider}: {url} - {error}")
self.logger.error(f"API Request Failed.")
else:
self.logger.info(f"API Request - {provider}: {url} - Status: {status_code}")

View File

@@ -432,7 +432,7 @@ class Scanner:
self.indicators_completed += 1
if not self._is_stop_requested():
all_new_targets = new_targets.union(large_entity_members)
all_new_targets = new_targets
for new_target in all_new_targets:
is_ip_new = _is_valid_ip(new_target)
eligible_providers_new = self._get_eligible_providers(new_target, is_ip_new, False)
@@ -576,6 +576,33 @@ class Scanner:
print(f"Stop requested before processing results from {provider_name} for {target}")
return discovered_targets, False
attributes_by_node = defaultdict(list)
for attribute in provider_result.attributes:
# Convert the StandardAttribute object to a dictionary that the frontend can use
attr_dict = {
"name": attribute.name,
"value": attribute.value,
"type": attribute.type,
"provider": attribute.provider,
"confidence": attribute.confidence,
"metadata": attribute.metadata
}
attributes_by_node[attribute.target_node].append(attr_dict)
# Add attributes to nodes
for node_id, node_attributes_list in attributes_by_node.items():
if self.graph.graph.has_node(node_id):
# Determine node type
if _is_valid_ip(node_id):
node_type = NodeType.IP
elif node_id.startswith('AS') and node_id[2:].isdigit():
node_type = NodeType.ASN
else:
node_type = NodeType.DOMAIN
# Add node with the list of attributes
self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
# Check for large entity based on relationship count
if provider_result.get_relationship_count() > self.config.large_entity_threshold:
print(f"Large entity detected: {provider_name} returned {provider_result.get_relationship_count()} relationships for {target}")
@@ -619,34 +646,6 @@ class Scanner:
if _is_valid_domain(target_node) or _is_valid_ip(target_node):
discovered_targets.add(target_node)
# Process attributes, preserving them as a list of objects
attributes_by_node = defaultdict(list)
for attribute in provider_result.attributes:
# Convert the StandardAttribute object to a dictionary that the frontend can use
attr_dict = {
"name": attribute.name,
"value": attribute.value,
"type": attribute.type,
"provider": attribute.provider,
"confidence": attribute.confidence,
"metadata": attribute.metadata
}
attributes_by_node[attribute.target_node].append(attr_dict)
# Add attributes to nodes
for node_id, node_attributes_list in attributes_by_node.items():
if self.graph.graph.has_node(node_id):
# Determine node type
if _is_valid_ip(node_id):
node_type = NodeType.IP
elif node_id.startswith('AS') and node_id[2:].isdigit():
node_type = NodeType.ASN
else:
node_type = NodeType.DOMAIN
# Add node with the list of attributes
self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
return discovered_targets, False
def _create_large_entity_from_provider_result(self, source: str, provider_name: str,
@@ -793,7 +792,11 @@ class Scanner:
# 4. Re-queue the extracted node for full processing by all eligible providers
print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...")
is_ip = _is_valid_ip(node_id_to_extract)
current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0)
# FIX: Correctly retrieve discovery_depth from the list of attributes
large_entity_attributes = self.graph.graph.nodes[large_entity_id].get('attributes', [])
discovery_depth_attr = next((attr for attr in large_entity_attributes if attr.get('name') == 'discovery_depth'), None)
current_depth = discovery_depth_attr['value'] if discovery_depth_attr else 0
eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
for provider in eligible_providers:

View File

@@ -50,7 +50,6 @@ class SessionManager:
"""Restore SessionManager after unpickling."""
self.__dict__.update(state)
# Re-initialize unpickleable attributes
import redis
self.redis_client = redis.StrictRedis(db=0, decode_responses=False)
self.lock = threading.Lock()
self.creation_lock = threading.Lock()
@@ -140,7 +139,6 @@ class SessionManager:
new_scanner = Scanner(session_config=preserved_config)
new_scanner.session_id = new_session_id
print(f"New scanner has {len(new_scanner.providers)} providers: {[p.get_name() for p in new_scanner.providers]}")
new_session_data = {
'scanner': new_scanner,