extract from node feature

This commit is contained in:
overcuriousity 2025-09-16 00:01:24 +02:00
parent fc098aed28
commit 2c48316477
7 changed files with 182 additions and 15 deletions

27
app.py
View File

@ -282,6 +282,33 @@ def get_graph_data():
} }
}), 500 }), 500
@app.route('/api/graph/large-entity/extract', methods=['POST'])
def extract_from_large_entity():
"""Extract a node from a large entity, making it a standalone node."""
try:
data = request.get_json()
large_entity_id = data.get('large_entity_id')
node_id = data.get('node_id')
if not large_entity_id or not node_id:
return jsonify({'success': False, 'error': 'Missing required parameters'}), 400
user_session_id, scanner = get_user_scanner()
if not scanner:
return jsonify({'success': False, 'error': 'No active session found'}), 404
success = scanner.extract_node_from_large_entity(large_entity_id, node_id)
if success:
session_manager.update_session_scanner(user_session_id, scanner)
return jsonify({'success': True, 'message': f'Node {node_id} extracted successfully.'})
else:
return jsonify({'success': False, 'error': f'Failed to extract node {node_id}.'}), 500
except Exception as e:
print(f"ERROR: Exception in extract_from_large_entity endpoint: {e}")
traceback.print_exc()
return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500
@app.route('/api/graph/node/<node_id>', methods=['DELETE']) @app.route('/api/graph/node/<node_id>', methods=['DELETE'])
def delete_graph_node(node_id): def delete_graph_node(node_id):

View File

@ -414,6 +414,30 @@ class GraphManager:
self.last_modified = datetime.now(timezone.utc).isoformat() self.last_modified = datetime.now(timezone.utc).isoformat()
return True return True
def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
"""
Removes a node from a large entity's internal lists and updates its count.
This prepares the large entity for the node's promotion to a regular node.
"""
if not self.graph.has_node(large_entity_id):
return False
node_data = self.graph.nodes[large_entity_id]
attributes = node_data.get('attributes', {})
# Remove from the list of member nodes
if 'nodes' in attributes and node_id_to_extract in attributes['nodes']:
attributes['nodes'].remove(node_id_to_extract)
# Update the count
attributes['count'] = len(attributes['nodes'])
else:
# This can happen if the node was already extracted, which is not an error.
print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.")
return True # Proceed as if successful
self.last_modified = datetime.now(timezone.utc).isoformat()
return True
def remove_node(self, node_id: str) -> bool: def remove_node(self, node_id: str) -> bool:
"""Remove a node and its connected edges from the graph.""" """Remove a node and its connected edges from the graph."""
if not self.graph.has_node(node_id): if not self.graph.has_node(node_id):

View File

@ -162,12 +162,15 @@ class Scanner:
self.stop_event = threading.Event() self.stop_event = threading.Event()
self.scan_thread = None self.scan_thread = None
self.executor = None self.executor = None
self.processing_lock = threading.Lock() # **NEW**: Recreate processing lock self.processing_lock = threading.Lock()
self.task_queue = PriorityQueue() self.task_queue = PriorityQueue()
self.rate_limiter = GlobalRateLimiter(redis.StrictRedis(db=0)) self.rate_limiter = GlobalRateLimiter(redis.StrictRedis(db=0))
self.logger = get_forensic_logger() self.logger = get_forensic_logger()
# **NEW**: Reset processing tracking # This ensures the scanner has access to providers for actions like node extraction.
print("Re-initializing providers after loading session...")
self._initialize_providers()
if not hasattr(self, 'currently_processing'): if not hasattr(self, 'currently_processing'):
self.currently_processing = set() self.currently_processing = set()
@ -792,6 +795,7 @@ class Scanner:
elif _is_valid_ip(targets[0]): elif _is_valid_ip(targets[0]):
node_type = 'ip' node_type = 'ip'
# We still create the nodes so they exist in the graph, they are just not processed for edges yet.
for target in targets: for target in targets:
self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP) self.graph.add_node(target, NodeType.DOMAIN if node_type == 'domain' else NodeType.IP)
@ -802,6 +806,7 @@ class Scanner:
'source_provider': provider_name, 'source_provider': provider_name,
'discovery_depth': current_depth, 'discovery_depth': current_depth,
'threshold_exceeded': self.config.large_entity_threshold, 'threshold_exceeded': self.config.large_entity_threshold,
# <<< FIX: Removed 'raw_results'. It's inefficient and unnecessary.
} }
description = f'Large entity created due to {len(targets)} results from {provider_name}' description = f'Large entity created due to {len(targets)} results from {provider_name}'
@ -817,6 +822,53 @@ class Scanner:
return set(targets) return set(targets)
def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool:
"""
Extracts a node from a large entity by re-adding it to the main processing queue.
This is a much cleaner approach than storing and replaying raw results.
"""
if not self.graph.graph.has_node(large_entity_id):
print(f"ERROR: Large entity {large_entity_id} not found.")
return False
# 1. Modify the graph data structure first
# This removes the node from the container's internal list.
success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract)
if not success:
print(f"ERROR: Node {node_id_to_extract} could not be removed from {large_entity_id}'s attributes.")
return False
# 2. Re-queue the extracted node for full processing by all eligible providers.
# This is the same logic used for any newly discovered node.
print(f"Re-queueing extracted node {node_id_to_extract} for full reconnaissance...")
is_ip = _is_valid_ip(node_id_to_extract)
current_depth = self.graph.graph.nodes[large_entity_id].get('attributes', {}).get('discovery_depth', 0)
eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False)
for provider in eligible_providers:
provider_name = provider.get_name()
# Add the task to the main queue with the correct depth.
self.task_queue.put((self._get_priority(provider_name), (provider_name, node_id_to_extract, current_depth)))
self.total_tasks_ever_enqueued += 1
# 3. If the scanner is not running, we need to kickstart it to process this one item.
if self.status != ScanStatus.RUNNING:
print("Scanner is idle. Starting a mini-scan to process the extracted node.")
self.status = ScanStatus.RUNNING
self._update_session_state()
# Start a new thread for the scan execution if one isn't running
if not self.scan_thread or not self.scan_thread.is_alive():
self.scan_thread = threading.Thread(
target=self._execute_scan,
args=(self.current_target, self.max_depth), # Use existing target/depth
daemon=True
)
self.scan_thread.start()
print(f"Successfully extracted and re-queued {node_id_to_extract} from {large_entity_id}.")
return True
def _collect_node_attributes(self, node_id: str, provider_name: str, rel_type: str, def _collect_node_attributes(self, node_id: str, provider_name: str, rel_type: str,
target: str, raw_data: Dict[str, Any], attributes: Dict[str, Any]) -> None: target: str, raw_data: Dict[str, Any], attributes: Dict[str, Any]) -> None:
"""Collect and organize attributes for a node.""" """Collect and organize attributes for a node."""

View File

@ -3,7 +3,6 @@
import time import time
import requests import requests
import threading import threading
import redis
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Tuple from typing import List, Dict, Any, Optional, Tuple
@ -36,7 +35,6 @@ class BaseProvider(ABC):
# Fallback to global config for backwards compatibility # Fallback to global config for backwards compatibility
from config import config as global_config from config import config as global_config
self.config = global_config self.config = global_config
actual_rate_limit = rate_limit
actual_timeout = timeout actual_timeout = timeout
self.name = name self.name = name

View File

@ -514,12 +514,20 @@ class CrtShProvider(BaseProvider):
shared = [] shared = []
# Create a set of certificate IDs from the first list for quick lookup # Create a set of certificate IDs from the first list for quick lookup
cert1_ids = {cert.get('certificate_id') for cert in certs1 if cert.get('certificate_id')} # <<< FIX: Added robust type checking to handle potentially malformed API data
cert1_ids = set()
for cert in certs1:
cert_id = cert.get('certificate_id')
# Ensure the ID is not None and is a hashable type before adding to the set
if cert_id and isinstance(cert_id, (int, str, float, bool, tuple)):
cert1_ids.add(cert_id)
# Find certificates in the second list that match # Find certificates in the second list that match
for cert in certs2: for cert in certs2:
if cert.get('certificate_id') in cert1_ids: cert_id = cert.get('certificate_id')
shared.append(cert) if cert_id and isinstance(cert_id, (int, str, float, bool, tuple)):
if cert_id in cert1_ids:
shared.append(cert)
return shared return shared

View File

@ -389,8 +389,8 @@ class GraphManager {
}); });
const filteredNodes = graphData.nodes.filter(node => { const filteredNodes = graphData.nodes.filter(node => {
// Only include nodes that are NOT members of large entities // Only include nodes that are NOT members of large entities, but always include the container itself
return !this.largeEntityMembers.has(node.id); return !this.largeEntityMembers.has(node.id) || node.type === 'large_entity';
}); });
console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`); console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`);

View File

@ -189,7 +189,7 @@ class DNSReconApp {
this.elements.resetApiKeys.addEventListener('click', () => this.resetApiKeys()); this.elements.resetApiKeys.addEventListener('click', () => this.resetApiKeys());
} }
// ** FIX: Listen for the custom event from the graph ** // Listen for the custom event from the graph
document.addEventListener('nodeSelected', (e) => { document.addEventListener('nodeSelected', (e) => {
this.showNodeModal(e.detail.node); this.showNodeModal(e.detail.node);
}); });
@ -1092,8 +1092,6 @@ class DNSReconApp {
return html; return html;
} }
generateLargeEntityDetails(node) { generateLargeEntityDetails(node) {
const attributes = node.attributes || {}; const attributes = node.attributes || {};
const nodes = attributes.nodes || []; const nodes = attributes.nodes || [];
@ -1123,16 +1121,23 @@ class DNSReconApp {
</div> </div>
<div class="modal-section"> <div class="modal-section">
<details> <details open>
<summary>📋 Contained ${nodeType}s (${nodes.length})</summary> <summary>📋 Contained ${nodeType}s (${nodes.length})</summary>
<div class="modal-section-content"> <div class="modal-section-content">
<div class="relationship-compact"> <div class="relationship-compact">
`; `;
// Use node.id for the large_entity_id
const largeEntityId = node.id;
nodes.forEach(innerNodeId => { nodes.forEach(innerNodeId => {
html += ` html += `
<div class="relationship-compact-item"> <div class="relationship-compact-item">
<span class="node-link-compact" data-node-id="${innerNodeId}">${innerNodeId}</span> <span class="node-link-compact" data-node-id="${innerNodeId}">${innerNodeId}</span>
<button class="btn-icon-small extract-node-btn"
title="Extract to graph"
data-large-entity-id="${largeEntityId}"
data-node-id="${innerNodeId}">[+]</button>
</div> </div>
`; `;
}); });
@ -1732,6 +1737,20 @@ class DNSReconApp {
}); });
}); });
// Handle the new extract button
this.elements.modalDetails.querySelectorAll('.extract-node-btn').forEach(button => {
button.addEventListener('click', (e) => {
e.preventDefault();
e.stopPropagation();
const largeEntityId = e.target.dataset.largeEntityId;
const nodeId = e.target.dataset.nodeId;
console.log(`Extract button clicked for node ${nodeId} from entity ${largeEntityId}`);
this.extractNode(largeEntityId, nodeId);
});
});
// Handle legacy node links // Handle legacy node links
this.elements.modalDetails.querySelectorAll('.node-link').forEach(link => { this.elements.modalDetails.querySelectorAll('.node-link').forEach(link => {
link.addEventListener('click', (e) => { link.addEventListener('click', (e) => {
@ -1749,17 +1768,56 @@ class DNSReconApp {
}); });
} }
async extractNode(largeEntityId, nodeId) {
try {
this.showInfo(`Extraction initiated for ${nodeId}. It will be processed by the scanner.`);
const response = await this.apiCall('/api/graph/large-entity/extract', 'POST', {
large_entity_id: largeEntityId,
node_id: nodeId,
});
if (response.success) {
this.showSuccess(response.message);
// The node is now in the queue. We don't need to force a graph update.
// Instead, we just need to update the modal view to show one less item.
const graphResponse = await this.apiCall('/api/graph');
if (graphResponse.success) {
const updatedLargeEntity = graphResponse.graph.nodes.find(n => n.id === largeEntityId);
if (updatedLargeEntity) {
this.showNodeModal(updatedLargeEntity);
} else {
// The entity might have been dismantled completely if it was the last node
this.hideModal();
}
}
// If the scanner was idle, it's now running. Start polling.
if (this.scanStatus === 'idle') {
this.startPolling(1000);
}
} else {
throw new Error(response.error || 'Extraction failed on the server.');
}
} catch (error) {
console.error('Failed to extract node:', error);
this.showError(`Extraction failed: ${error.message}`);
}
}
initializeModalFunctionality() { initializeModalFunctionality() {
// Make sure the graph manager has node access // Make sure the graph manager has node access
console.log('Initializing modal functionality...'); console.log('Initializing modal functionality...');
// Set up event delegation for dynamic content // Set up event delegation for dynamic content
document.addEventListener('click', (e) => { document.addEventListener('click', (e) => {
if (e.target.classList.contains('node-link-compact') || e.target.classList.contains('node-link')) { const target = e.target.closest('.node-link-compact, .node-link');
if (target) {
e.preventDefault(); e.preventDefault();
e.stopPropagation(); e.stopPropagation();
const nodeId = e.target.dataset.nodeId || e.target.getAttribute('data-node-id'); const nodeId = target.dataset.nodeId || target.getAttribute('data-node-id');
if (nodeId && this.graphManager && this.graphManager.nodes) { if (nodeId && this.graphManager && this.graphManager.nodes) {
const nextNode = this.graphManager.nodes.get(nodeId); const nextNode = this.graphManager.nodes.get(nodeId);
if (nextNode) { if (nextNode) {