remove-large-entity-temporarily #3

Merged
mstoeck3 merged 5 commits from remove-large-entity-temporarily into main 2025-09-19 12:29:27 +00:00
3 changed files with 63 additions and 58 deletions
Showing only changes of commit eabb532557 - Show all commits

View File

@ -761,7 +761,7 @@ class Scanner:
def _process_provider_task(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]:
"""
Manages the entire process for a given target and provider.
FIXED: Don't enqueue correlation tasks during normal processing.
This version is generalized to handle all relationships dynamically.
"""
if self._is_stop_requested():
return set(), set(), False
@ -773,7 +773,6 @@ class Scanner:
self._initialize_provider_states(target)
new_targets = set()
large_entity_members = set()
provider_successful = True
try:
@ -782,6 +781,7 @@ class Scanner:
if provider_result is None:
provider_successful = False
elif not self._is_stop_requested():
# Pass all relationships to be processed
discovered, is_large_entity = self._process_provider_result_unified(
target, provider, provider_result, depth
)
@ -791,7 +791,7 @@ class Scanner:
provider_successful = False
self._log_provider_error(target, provider.get_name(), str(e))
return new_targets, large_entity_members, provider_successful
return new_targets, set(), provider_successful
def _execute_provider_query(self, provider: BaseProvider, target: str, is_ip: bool) -> Optional[ProviderResult]:
"""
@ -822,19 +822,18 @@ class Scanner:
return None
def _create_large_entity_from_result(self, source_node: str, provider_name: str,
provider_result: ProviderResult, depth: int) -> Set[str]:
provider_result: ProviderResult, depth: int) -> Tuple[str, Set[str]]:
"""
Creates a large entity node and tags all member nodes.
Creates a large entity node, tags all member nodes, and returns its ID and members.
"""
members = {rel.target_node for rel in provider_result.relationships
if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)}
if not members:
return set()
return "", set()
large_entity_id = f"le_{provider_name}_{source_node}"
# Add the large entity node to the graph
self.graph.add_node(
node_id=large_entity_id,
node_type=NodeType.LARGE_ENTITY,
@ -847,16 +846,6 @@ class Scanner:
description=f"A collection of {len(members)} nodes discovered from {source_node} via {provider_name}."
)
# Create a single edge from the source to the large entity
self.graph.add_edge(
source_node, large_entity_id,
relationship_type=f"{provider_name}_collection",
confidence_score=0.95,
source_provider=provider_name,
raw_data={'description': 'Represents a large collection of nodes.'}
)
# Tag each member node with the large entity ID
for member_id in members:
node_type = NodeType.IP if _is_valid_ip(member_id) else NodeType.DOMAIN
self.graph.add_node(
@ -865,7 +854,7 @@ class Scanner:
metadata={'large_entity_id': large_entity_id}
)
return members
return large_entity_id, members
def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool:
"""
@ -907,70 +896,83 @@ class Scanner:
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
"""
Process a unified ProviderResult object to update the graph.
Handles large entity creation while ensuring all underlying nodes and edges are
added to the graph data model for a complete dataset.
This version dynamically re-routes edges to a large entity container.
"""
provider_name = provider.get_name()
discovered_targets = set()
large_entity_id = ""
large_entity_members = set()
if self._is_stop_requested():
return discovered_targets, False
# Check if a large entity should be created based on the count of domain/IP relationships
eligible_relationship_count = sum(
eligible_rel_count = sum(
1 for rel in provider_result.relationships if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node)
)
is_large_entity = eligible_relationship_count > self.config.large_entity_threshold
is_large_entity = eligible_rel_count > self.config.large_entity_threshold
if is_large_entity:
# Create the large entity node and get the set of its members
large_entity_members = self._create_large_entity_from_result(
large_entity_id, large_entity_members = self._create_large_entity_from_result(
target, provider_name, provider_result, current_depth
)
# Process ALL relationships to build the complete underlying data model
for i, relationship in enumerate(provider_result.relationships):
if i % 5 == 0 and self._is_stop_requested():
break
source_node = relationship.source_node
target_node = relationship.target_node
source_node_id = relationship.source_node
target_node_id = relationship.target_node
# Determine node types
source_type = NodeType.IP if _is_valid_ip(source_node) else NodeType.DOMAIN
# Determine visual source and target, substituting with large entity ID if necessary
visual_source = large_entity_id if source_node_id in large_entity_members else source_node_id
visual_target = large_entity_id if target_node_id in large_entity_members else target_node_id
# Prevent self-loops on the large entity node
if visual_source == visual_target:
continue
# Determine node types for the actual nodes
source_type = NodeType.IP if _is_valid_ip(source_node_id) else NodeType.DOMAIN
if provider_name == 'shodan' and relationship.relationship_type == 'shodan_isp':
target_type = NodeType.ISP
elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer':
target_type = NodeType.CA
elif provider_name == 'correlation':
target_type = NodeType.CORRELATION_OBJECT
elif _is_valid_ip(target_node):
elif _is_valid_ip(target_node_id):
target_type = NodeType.IP
else:
target_type = NodeType.DOMAIN
max_depth_reached = current_depth >= self.max_depth
# Add all nodes and edges to the graph's data model.
# The frontend will handle the visual re-routing for large entity members.
self.graph.add_node(source_node, source_type)
self.graph.add_node(target_node, target_type, metadata={'max_depth_reached': max_depth_reached})
# Add actual nodes to the graph (they might be hidden by the UI)
self.graph.add_node(source_node_id, source_type)
self.graph.add_node(target_node_id, target_type, metadata={'max_depth_reached': max_depth_reached})
# Add the visual edge to the graph
self.graph.add_edge(
source_node, target_node,
visual_source, visual_target,
relationship.relationship_type,
relationship.confidence,
provider_name,
relationship.raw_data
)
# Add all discovered domains/IPs to be considered for further processing
if (_is_valid_domain(target_node) or _is_valid_ip(target_node)) and not max_depth_reached:
if target_node not in large_entity_members:
discovered_targets.add(target_node)
if (_is_valid_domain(target_node_id) or _is_valid_ip(target_node_id)) and not max_depth_reached:
if target_node_id not in large_entity_members:
discovered_targets.add(target_node_id)
if large_entity_members:
self.logger.logger.info(f"Enqueuing DNS and Correlation for {len(large_entity_members)} members of {large_entity_id}")
for member in large_entity_members:
for provider_name_to_run in ['dns', 'correlation']:
p_instance = next((p for p in self.providers if p.get_name() == provider_name_to_run), None)
if p_instance and p_instance.get_eligibility().get('domains' if _is_valid_domain(member) else 'ips'):
priority = self._get_priority(provider_name_to_run)
self.task_queue.put((time.time(), priority, (provider_name_to_run, member, current_depth)))
self.total_tasks_ever_enqueued += 1
# Process all attributes and add them to the corresponding nodes
attributes_by_node = defaultdict(list)
for attribute in provider_result.attributes:
attr_dict = {
@ -989,7 +991,6 @@ class Scanner:
return discovered_targets, is_large_entity
def stop_scan(self) -> bool:
"""Request immediate scan termination with proper cleanup."""
try:

View File

@ -1565,15 +1565,21 @@ class GraphManager {
}
/**
* Unhide all hidden nodes
* Unhide all hidden nodes, excluding those within a large entity.
*/
unhideAll() {
const allNodes = this.nodes.get({
filter: (node) => node.hidden === true
const allHiddenNodes = this.nodes.get({
filter: (node) => {
// Condition: Node is hidden AND it is NOT part of a large entity.
return node.hidden === true && !(node.metadata && node.metadata.large_entity_id);
}
});
const updates = allNodes.map(node => ({ id: node.id, hidden: false }));
if (allHiddenNodes.length > 0) {
const updates = allHiddenNodes.map(node => ({ id: node.id, hidden: false }));
this.nodes.update(updates);
}
}
}

View File

@ -1997,8 +1997,6 @@ class DNSReconApp {
if (response.success) {
this.showSuccess(response.message);
this.hideModal();
// If the scanner was idle, it's now running. Start polling to see the new node appear.
if (this.scanStatus === 'idle') {
this.startPolling(1000);