large entity recreation
This commit is contained in:
106
core/scanner.py
106
core/scanner.py
@@ -821,6 +821,88 @@ class Scanner:
|
||||
self._update_provider_state(target, provider_name, 'failed', 0, str(e), start_time)
|
||||
return None
|
||||
|
||||
def _create_large_entity_from_result(self, source_node: str, provider_name: str,
|
||||
provider_result: ProviderResult, depth: int) -> Set[str]:
|
||||
"""
|
||||
Creates a large entity node and tags all member nodes.
|
||||
"""
|
||||
members = {rel.target_node for rel in provider_result.relationships
|
||||
if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)}
|
||||
|
||||
if not members:
|
||||
return set()
|
||||
|
||||
large_entity_id = f"le_{provider_name}_{source_node}"
|
||||
|
||||
# Add the large entity node to the graph
|
||||
self.graph.add_node(
|
||||
node_id=large_entity_id,
|
||||
node_type=NodeType.LARGE_ENTITY,
|
||||
attributes=[
|
||||
{"name": "count", "value": len(members), "type": "statistic"},
|
||||
{"name": "source_provider", "value": provider_name, "type": "metadata"},
|
||||
{"name": "discovery_depth", "value": depth, "type": "metadata"},
|
||||
{"name": "nodes", "value": list(members), "type": "metadata"}
|
||||
],
|
||||
description=f"A collection of {len(members)} nodes discovered from {source_node} via {provider_name}."
|
||||
)
|
||||
|
||||
# Create a single edge from the source to the large entity
|
||||
self.graph.add_edge(
|
||||
source_node, large_entity_id,
|
||||
relationship_type=f"{provider_name}_collection",
|
||||
confidence_score=0.95,
|
||||
source_provider=provider_name,
|
||||
raw_data={'description': 'Represents a large collection of nodes.'}
|
||||
)
|
||||
|
||||
# Tag each member node with the large entity ID
|
||||
for member_id in members:
|
||||
node_type = NodeType.IP if _is_valid_ip(member_id) else NodeType.DOMAIN
|
||||
self.graph.add_node(
|
||||
node_id=member_id,
|
||||
node_type=node_type,
|
||||
metadata={'large_entity_id': large_entity_id}
|
||||
)
|
||||
|
||||
return members
|
||||
|
||||
def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool:
|
||||
"""
|
||||
Removes a node from a large entity, allowing it to be processed normally.
|
||||
"""
|
||||
if not self.graph.graph.has_node(node_id):
|
||||
return False
|
||||
|
||||
node_data = self.graph.graph.nodes[node_id]
|
||||
metadata = node_data.get('metadata', {})
|
||||
|
||||
if metadata.get('large_entity_id') == large_entity_id:
|
||||
# Remove the large entity tag
|
||||
del metadata['large_entity_id']
|
||||
self.graph.add_node(node_id, NodeType(node_data['type']), metadata=metadata)
|
||||
|
||||
# Re-enqueue the node for full processing
|
||||
is_ip = _is_valid_ip(node_id)
|
||||
eligible_providers = self._get_eligible_providers(node_id, is_ip, False)
|
||||
for provider in eligible_providers:
|
||||
provider_name = provider.get_name()
|
||||
priority = self._get_priority(provider_name)
|
||||
# Use current depth of the large entity if available, else 0
|
||||
depth = 0
|
||||
if self.graph.graph.has_node(large_entity_id):
|
||||
le_attrs = self.graph.graph.nodes[large_entity_id].get('attributes', [])
|
||||
depth_attr = next((a for a in le_attrs if a['name'] == 'discovery_depth'), None)
|
||||
if depth_attr:
|
||||
depth = depth_attr['value']
|
||||
|
||||
self.task_queue.put((time.time(), priority, (provider_name, node_id, depth)))
|
||||
self.total_tasks_ever_enqueued += 1
|
||||
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def _process_provider_result_unified(self, target: str, provider: BaseProvider,
|
||||
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
|
||||
"""
|
||||
@@ -830,7 +912,7 @@ class Scanner:
|
||||
"""
|
||||
provider_name = provider.get_name()
|
||||
discovered_targets = set()
|
||||
#large_entity_members = set()
|
||||
large_entity_members = set()
|
||||
|
||||
if self._is_stop_requested():
|
||||
return discovered_targets, False
|
||||
@@ -842,11 +924,11 @@ class Scanner:
|
||||
|
||||
is_large_entity = eligible_relationship_count > self.config.large_entity_threshold
|
||||
|
||||
#if is_large_entity:
|
||||
if is_large_entity:
|
||||
# Create the large entity node and get the set of its members
|
||||
#large_entity_members = self._create_large_entity_from_result(
|
||||
# target, provider_name, provider_result, current_depth
|
||||
#)
|
||||
large_entity_members = self._create_large_entity_from_result(
|
||||
target, provider_name, provider_result, current_depth
|
||||
)
|
||||
|
||||
# Process ALL relationships to build the complete underlying data model
|
||||
for i, relationship in enumerate(provider_result.relationships):
|
||||
@@ -885,7 +967,8 @@ class Scanner:
|
||||
|
||||
# Add all discovered domains/IPs to be considered for further processing
|
||||
if (_is_valid_domain(target_node) or _is_valid_ip(target_node)) and not max_depth_reached:
|
||||
discovered_targets.add(target_node)
|
||||
if target_node not in large_entity_members:
|
||||
discovered_targets.add(target_node)
|
||||
|
||||
# Process all attributes and add them to the corresponding nodes
|
||||
attributes_by_node = defaultdict(list)
|
||||
@@ -1004,8 +1087,19 @@ class Scanner:
|
||||
eligible = []
|
||||
target_key = 'ips' if is_ip else 'domains'
|
||||
|
||||
# Check if the target is part of a large entity
|
||||
is_in_large_entity = False
|
||||
if self.graph.graph.has_node(target):
|
||||
metadata = self.graph.graph.nodes[target].get('metadata', {})
|
||||
if 'large_entity_id' in metadata:
|
||||
is_in_large_entity = True
|
||||
|
||||
for provider in self.providers:
|
||||
try:
|
||||
# If in large entity, only allow dns and correlation providers
|
||||
if is_in_large_entity and provider.get_name() not in ['dns', 'correlation']:
|
||||
continue
|
||||
|
||||
# Check if provider supports this target type
|
||||
if not provider.get_eligibility().get(target_key, False):
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user