work on large entity extraction
This commit is contained in:
parent
602739246f
commit
b2c5d2331c
61
app.py
61
app.py
@ -187,7 +187,9 @@ def get_graph_data():
|
|||||||
|
|
||||||
@app.route('/api/graph/large-entity/extract', methods=['POST'])
|
@app.route('/api/graph/large-entity/extract', methods=['POST'])
|
||||||
def extract_from_large_entity():
|
def extract_from_large_entity():
|
||||||
"""Extract a node from a large entity."""
|
"""
|
||||||
|
FIXED: Extract a node from a large entity with proper error handling.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
large_entity_id = data.get('large_entity_id')
|
large_entity_id = data.get('large_entity_id')
|
||||||
@ -200,17 +202,66 @@ def extract_from_large_entity():
|
|||||||
if not scanner:
|
if not scanner:
|
||||||
return jsonify({'success': False, 'error': 'No active session found'}), 404
|
return jsonify({'success': False, 'error': 'No active session found'}), 404
|
||||||
|
|
||||||
|
# FIXED: Check if node exists and provide better error messages
|
||||||
|
if not scanner.graph.graph.has_node(node_id):
|
||||||
|
return jsonify({
|
||||||
|
'success': False,
|
||||||
|
'error': f'Node {node_id} not found in graph'
|
||||||
|
}), 404
|
||||||
|
|
||||||
|
# FIXED: Check if node is actually part of the large entity
|
||||||
|
node_data = scanner.graph.graph.nodes[node_id]
|
||||||
|
metadata = node_data.get('metadata', {})
|
||||||
|
current_large_entity = metadata.get('large_entity_id')
|
||||||
|
|
||||||
|
if not current_large_entity:
|
||||||
|
return jsonify({
|
||||||
|
'success': False,
|
||||||
|
'error': f'Node {node_id} is not part of any large entity'
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
if current_large_entity != large_entity_id:
|
||||||
|
return jsonify({
|
||||||
|
'success': False,
|
||||||
|
'error': f'Node {node_id} belongs to large entity {current_large_entity}, not {large_entity_id}'
|
||||||
|
}), 400
|
||||||
|
|
||||||
|
# FIXED: Check if large entity exists
|
||||||
|
if not scanner.graph.graph.has_node(large_entity_id):
|
||||||
|
return jsonify({
|
||||||
|
'success': False,
|
||||||
|
'error': f'Large entity {large_entity_id} not found'
|
||||||
|
}), 404
|
||||||
|
|
||||||
|
# Perform the extraction
|
||||||
success = scanner.extract_node_from_large_entity(large_entity_id, node_id)
|
success = scanner.extract_node_from_large_entity(large_entity_id, node_id)
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
|
# Force immediate session state update
|
||||||
session_manager.update_session_scanner(user_session_id, scanner)
|
session_manager.update_session_scanner(user_session_id, scanner)
|
||||||
return jsonify({'success': True, 'message': f'Node {node_id} extracted successfully.'})
|
|
||||||
else:
|
|
||||||
return jsonify({'success': False, 'error': f'Failed to extract node {node_id}.'}), 500
|
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
'success': True,
|
||||||
|
'message': f'Node {node_id} extracted successfully from {large_entity_id}.',
|
||||||
|
'extracted_node': node_id,
|
||||||
|
'large_entity': large_entity_id
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# This should not happen with the improved checks above, but handle it gracefully
|
||||||
|
return jsonify({
|
||||||
|
'success': False,
|
||||||
|
'error': f'Failed to extract node {node_id} from {large_entity_id}. Node may have already been extracted.'
|
||||||
|
}), 409
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return jsonify({'success': False, 'error': 'Invalid JSON in request body'}), 400
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
return jsonify({'success': False, 'error': f'Internal server error: {str(e)}'}), 500
|
return jsonify({
|
||||||
|
'success': False,
|
||||||
|
'error': f'Internal server error: {str(e)}',
|
||||||
|
'error_type': type(e).__name__
|
||||||
|
}), 500
|
||||||
|
|
||||||
@app.route('/api/graph/node/<node_id>', methods=['DELETE'])
|
@app.route('/api/graph/node/<node_id>', methods=['DELETE'])
|
||||||
def delete_graph_node(node_id):
|
def delete_graph_node(node_id):
|
||||||
|
|||||||
125
core/scanner.py
125
core/scanner.py
@ -860,7 +860,7 @@ class Scanner:
|
|||||||
|
|
||||||
def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool:
|
def extract_node_from_large_entity(self, large_entity_id: str, node_id: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Removes a node from a large entity, allowing it to be processed normally.
|
FIXED: Extract a node from a large entity with proper backend updates and edge re-routing.
|
||||||
"""
|
"""
|
||||||
if not self.graph.graph.has_node(node_id):
|
if not self.graph.graph.has_node(node_id):
|
||||||
return False
|
return False
|
||||||
@ -868,10 +868,54 @@ class Scanner:
|
|||||||
node_data = self.graph.graph.nodes[node_id]
|
node_data = self.graph.graph.nodes[node_id]
|
||||||
metadata = node_data.get('metadata', {})
|
metadata = node_data.get('metadata', {})
|
||||||
|
|
||||||
if metadata.get('large_entity_id') == large_entity_id:
|
if metadata.get('large_entity_id') != large_entity_id:
|
||||||
# Remove the large entity tag
|
return False
|
||||||
del metadata['large_entity_id']
|
|
||||||
self.graph.add_node(node_id, NodeType(node_data['type']), metadata=metadata)
|
# FIXED: Update the large entity's attributes to remove the extracted node
|
||||||
|
if self.graph.graph.has_node(large_entity_id):
|
||||||
|
le_node_data = self.graph.graph.nodes[large_entity_id]
|
||||||
|
le_attributes = le_node_data.get('attributes', [])
|
||||||
|
|
||||||
|
# Update the 'nodes' attribute to remove extracted node
|
||||||
|
nodes_attr = next((attr for attr in le_attributes if attr['name'] == 'nodes'), None)
|
||||||
|
if nodes_attr and isinstance(nodes_attr['value'], list):
|
||||||
|
if node_id in nodes_attr['value']:
|
||||||
|
nodes_attr['value'].remove(node_id)
|
||||||
|
|
||||||
|
# Update the 'count' attribute
|
||||||
|
count_attr = next((attr for attr in le_attributes if attr['name'] == 'count'), None)
|
||||||
|
if count_attr and isinstance(count_attr['value'], (int, float)):
|
||||||
|
count_attr['value'] = max(0, count_attr['value'] - 1)
|
||||||
|
|
||||||
|
# Update the large entity node
|
||||||
|
self.graph.add_node(
|
||||||
|
large_entity_id,
|
||||||
|
NodeType.LARGE_ENTITY,
|
||||||
|
attributes=le_attributes,
|
||||||
|
description=le_node_data.get('description', ''),
|
||||||
|
metadata=le_node_data.get('metadata', {})
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove the large entity tag from extracted node
|
||||||
|
updated_metadata = metadata.copy()
|
||||||
|
del updated_metadata['large_entity_id']
|
||||||
|
|
||||||
|
# Add extraction history for forensic integrity
|
||||||
|
extraction_record = {
|
||||||
|
'extracted_at': datetime.now(timezone.utc).isoformat(),
|
||||||
|
'extracted_from': large_entity_id,
|
||||||
|
'extraction_method': 'manual'
|
||||||
|
}
|
||||||
|
|
||||||
|
if 'extraction_history' not in updated_metadata:
|
||||||
|
updated_metadata['extraction_history'] = []
|
||||||
|
updated_metadata['extraction_history'].append(extraction_record)
|
||||||
|
|
||||||
|
# Update the extracted node
|
||||||
|
self.graph.add_node(node_id, NodeType(node_data['type']), metadata=updated_metadata)
|
||||||
|
|
||||||
|
# FIXED: Re-route edges that were pointing to the large entity
|
||||||
|
self._reroute_large_entity_edges(large_entity_id, node_id)
|
||||||
|
|
||||||
# Re-enqueue the node for full processing
|
# Re-enqueue the node for full processing
|
||||||
is_ip = _is_valid_ip(node_id)
|
is_ip = _is_valid_ip(node_id)
|
||||||
@ -879,6 +923,7 @@ class Scanner:
|
|||||||
for provider in eligible_providers:
|
for provider in eligible_providers:
|
||||||
provider_name = provider.get_name()
|
provider_name = provider.get_name()
|
||||||
priority = self._get_priority(provider_name)
|
priority = self._get_priority(provider_name)
|
||||||
|
|
||||||
# Use current depth of the large entity if available, else 0
|
# Use current depth of the large entity if available, else 0
|
||||||
depth = 0
|
depth = 0
|
||||||
if self.graph.graph.has_node(large_entity_id):
|
if self.graph.graph.has_node(large_entity_id):
|
||||||
@ -890,9 +935,77 @@ class Scanner:
|
|||||||
self.task_queue.put((time.time(), priority, (provider_name, node_id, depth)))
|
self.task_queue.put((time.time(), priority, (provider_name, node_id, depth)))
|
||||||
self.total_tasks_ever_enqueued += 1
|
self.total_tasks_ever_enqueued += 1
|
||||||
|
|
||||||
|
# Force session state update for immediate frontend sync
|
||||||
|
self._update_session_state()
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
return False
|
def _reroute_large_entity_edges(self, large_entity_id: str, extracted_node_id: str) -> None:
|
||||||
|
"""
|
||||||
|
FIXED: Re-route edges from large entity to extracted node where appropriate.
|
||||||
|
"""
|
||||||
|
if not self.graph.graph.has_node(large_entity_id) or not self.graph.graph.has_node(extracted_node_id):
|
||||||
|
return
|
||||||
|
|
||||||
|
edges_to_reroute = []
|
||||||
|
|
||||||
|
# Find edges pointing TO the large entity that should point to the extracted node
|
||||||
|
for source, target, edge_data in self.graph.graph.in_edges(large_entity_id, data=True):
|
||||||
|
# Check if this edge was originally meant for the extracted node
|
||||||
|
raw_data = edge_data.get('raw_data', {})
|
||||||
|
|
||||||
|
# If the raw data suggests this edge was for the extracted node, re-route it
|
||||||
|
if (raw_data.get('original_target') == extracted_node_id or
|
||||||
|
self._should_reroute_edge(edge_data, extracted_node_id)):
|
||||||
|
edges_to_reroute.append(('in', source, target, edge_data))
|
||||||
|
|
||||||
|
# Find edges pointing FROM the large entity that should point from the extracted node
|
||||||
|
for source, target, edge_data in self.graph.graph.out_edges(large_entity_id, data=True):
|
||||||
|
raw_data = edge_data.get('raw_data', {})
|
||||||
|
|
||||||
|
if (raw_data.get('original_source') == extracted_node_id or
|
||||||
|
self._should_reroute_edge(edge_data, extracted_node_id)):
|
||||||
|
edges_to_reroute.append(('out', source, target, edge_data))
|
||||||
|
|
||||||
|
# Re-route the edges
|
||||||
|
for direction, source, target, edge_data in edges_to_reroute:
|
||||||
|
# Remove old edge
|
||||||
|
self.graph.graph.remove_edge(source, target)
|
||||||
|
|
||||||
|
# Add new edge with extracted node
|
||||||
|
if direction == 'in':
|
||||||
|
new_target = extracted_node_id
|
||||||
|
new_source = source
|
||||||
|
else: # direction == 'out'
|
||||||
|
new_source = extracted_node_id
|
||||||
|
new_target = target
|
||||||
|
|
||||||
|
# Add the re-routed edge
|
||||||
|
self.graph.add_edge(
|
||||||
|
source_id=new_source,
|
||||||
|
target_id=new_target,
|
||||||
|
relationship_type=edge_data.get('relationship_type', 'unknown'),
|
||||||
|
confidence_score=edge_data.get('confidence_score', 0.5),
|
||||||
|
source_provider=edge_data.get('source_provider', 'rerouted'),
|
||||||
|
raw_data=dict(edge_data.get('raw_data', {}), **{'rerouted_from_large_entity': large_entity_id})
|
||||||
|
)
|
||||||
|
|
||||||
|
def _should_reroute_edge(self, edge_data: dict, extracted_node_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Determine if an edge should be re-routed to an extracted node.
|
||||||
|
This is a heuristic-based approach since we don't store original targets.
|
||||||
|
"""
|
||||||
|
relationship_type = edge_data.get('relationship_type', '')
|
||||||
|
|
||||||
|
# For now, re-route DNS and certificate-based relationships
|
||||||
|
# These are likely to be node-specific rather than entity-wide
|
||||||
|
reroutable_types = [
|
||||||
|
'dns_a_record', 'dns_aaaa_record', 'dns_cname_record',
|
||||||
|
'dns_mx_record', 'dns_ptr_record',
|
||||||
|
'crtsh_san_certificate', 'crtsh_cert_issuer'
|
||||||
|
]
|
||||||
|
|
||||||
|
return any(rtype in relationship_type for rtype in reroutable_types)
|
||||||
|
|
||||||
def _process_provider_result_unified(self, target: str, provider: BaseProvider,
|
def _process_provider_result_unified(self, target: str, provider: BaseProvider,
|
||||||
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
|
provider_result: ProviderResult, current_depth: int) -> Tuple[Set[str], bool]:
|
||||||
|
|||||||
@ -353,9 +353,6 @@ class GraphManager {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param {Object} graphData - Graph data from backend
|
|
||||||
*/
|
|
||||||
updateGraph(graphData) {
|
updateGraph(graphData) {
|
||||||
if (!graphData || !graphData.nodes || !graphData.edges) {
|
if (!graphData || !graphData.nodes || !graphData.edges) {
|
||||||
console.warn('Invalid graph data received');
|
console.warn('Invalid graph data received');
|
||||||
@ -382,16 +379,18 @@ class GraphManager {
|
|||||||
|
|
||||||
const nodeMap = new Map(graphData.nodes.map(node => [node.id, node]));
|
const nodeMap = new Map(graphData.nodes.map(node => [node.id, node]));
|
||||||
|
|
||||||
// Filter out hidden nodes before processing for rendering
|
// FIXED: Process all nodes first, then apply hiding logic correctly
|
||||||
const filteredNodes = graphData.nodes.filter(node =>
|
|
||||||
!(node.metadata && node.metadata.large_entity_id)
|
|
||||||
);
|
|
||||||
|
|
||||||
const processedNodes = graphData.nodes.map(node => {
|
const processedNodes = graphData.nodes.map(node => {
|
||||||
const processed = this.processNode(node);
|
const processed = this.processNode(node);
|
||||||
|
|
||||||
|
// FIXED: Only hide if node is still a large entity member
|
||||||
if (node.metadata && node.metadata.large_entity_id) {
|
if (node.metadata && node.metadata.large_entity_id) {
|
||||||
processed.hidden = true;
|
processed.hidden = true;
|
||||||
|
} else {
|
||||||
|
// FIXED: Ensure extracted nodes are visible
|
||||||
|
processed.hidden = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return processed;
|
return processed;
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -401,6 +400,7 @@ class GraphManager {
|
|||||||
let fromId = edge.from;
|
let fromId = edge.from;
|
||||||
let toId = edge.to;
|
let toId = edge.to;
|
||||||
|
|
||||||
|
// FIXED: Only re-route if nodes are STILL in large entities
|
||||||
if (fromNode && fromNode.metadata && fromNode.metadata.large_entity_id) {
|
if (fromNode && fromNode.metadata && fromNode.metadata.large_entity_id) {
|
||||||
fromId = fromNode.metadata.large_entity_id;
|
fromId = fromNode.metadata.large_entity_id;
|
||||||
}
|
}
|
||||||
@ -423,6 +423,7 @@ class GraphManager {
|
|||||||
const newNodes = processedNodes.filter(node => !existingNodeIds.includes(node.id));
|
const newNodes = processedNodes.filter(node => !existingNodeIds.includes(node.id));
|
||||||
const newEdges = processedEdges.filter(edge => !existingEdgeIds.includes(edge.id));
|
const newEdges = processedEdges.filter(edge => !existingEdgeIds.includes(edge.id));
|
||||||
|
|
||||||
|
// FIXED: Update all nodes to ensure extracted nodes become visible
|
||||||
this.nodes.update(processedNodes);
|
this.nodes.update(processedNodes);
|
||||||
this.edges.update(processedEdges);
|
this.edges.update(processedEdges);
|
||||||
|
|
||||||
|
|||||||
@ -2023,6 +2023,16 @@ class DNSReconApp {
|
|||||||
|
|
||||||
async extractNode(largeEntityId, nodeId) {
|
async extractNode(largeEntityId, nodeId) {
|
||||||
try {
|
try {
|
||||||
|
console.log(`Extracting node ${nodeId} from large entity ${largeEntityId}`);
|
||||||
|
|
||||||
|
// Show immediate feedback
|
||||||
|
const button = document.querySelector(`[data-node-id="${nodeId}"][data-large-entity-id="${largeEntityId}"]`);
|
||||||
|
if (button) {
|
||||||
|
const originalContent = button.innerHTML;
|
||||||
|
button.innerHTML = '[...]';
|
||||||
|
button.disabled = true;
|
||||||
|
}
|
||||||
|
|
||||||
const response = await this.apiCall('/api/graph/large-entity/extract', 'POST', {
|
const response = await this.apiCall('/api/graph/large-entity/extract', 'POST', {
|
||||||
large_entity_id: largeEntityId,
|
large_entity_id: largeEntityId,
|
||||||
node_id: nodeId,
|
node_id: nodeId,
|
||||||
@ -2031,41 +2041,46 @@ class DNSReconApp {
|
|||||||
if (response.success) {
|
if (response.success) {
|
||||||
this.showSuccess(response.message);
|
this.showSuccess(response.message);
|
||||||
|
|
||||||
// If the scanner was idle, it's now running. Start polling to see the new node appear.
|
// FIXED: Don't update local modal data - let backend be source of truth
|
||||||
if (this.scanStatus === 'idle') {
|
// Force immediate graph update to get fresh backend data
|
||||||
this.startPolling(1000);
|
console.log('Extraction successful, updating graph with fresh backend data');
|
||||||
} else {
|
await this.updateGraph();
|
||||||
// If already scanning, force a quick graph update to see the change sooner.
|
|
||||||
setTimeout(() => this.updateGraph(), 500);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Immediately update the modal view
|
// FIXED: Re-fetch graph data instead of manipulating local state
|
||||||
if (this.graphManager) {
|
setTimeout(async () => {
|
||||||
const largeEntityNode = this.graphManager.nodes.get(largeEntityId);
|
try {
|
||||||
if (largeEntityNode && largeEntityNode.attributes) {
|
const graphResponse = await this.apiCall('/api/graph');
|
||||||
|
if (graphResponse.success) {
|
||||||
|
this.graphManager.updateGraph(graphResponse.graph);
|
||||||
|
|
||||||
// Find and update the 'nodes' attribute
|
// Update modal with fresh data if still open
|
||||||
const nodesAttribute = largeEntityNode.attributes.find(attr => attr.name === 'nodes');
|
if (this.elements.nodeModal && this.elements.nodeModal.style.display === 'block') {
|
||||||
if (nodesAttribute && Array.isArray(nodesAttribute.value)) {
|
if (this.graphManager.nodes) {
|
||||||
nodesAttribute.value = nodesAttribute.value.filter(id => id !== nodeId);
|
const updatedLargeEntity = this.graphManager.nodes.get(largeEntityId);
|
||||||
|
if (updatedLargeEntity) {
|
||||||
|
this.showNodeModal(updatedLargeEntity);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error refreshing graph after extraction:', error);
|
||||||
|
}
|
||||||
|
}, 100);
|
||||||
|
|
||||||
// Find and update the 'count' attribute
|
|
||||||
const countAttribute = largeEntityNode.attributes.find(attr => attr.name === 'count');
|
|
||||||
if (countAttribute) {
|
|
||||||
countAttribute.value = (countAttribute.value || 0) - 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Re-render the modal with the updated data
|
|
||||||
this.showNodeModal(largeEntityNode);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
throw new Error(response.error || 'Extraction failed on the server.');
|
throw new Error(response.error || 'Extraction failed on the server.');
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to extract node:', error);
|
console.error('Failed to extract node:', error);
|
||||||
this.showError(`Extraction failed: ${error.message}`);
|
this.showError(`Extraction failed: ${error.message}`);
|
||||||
|
|
||||||
|
// Restore button state on error
|
||||||
|
const button = document.querySelector(`[data-node-id="${nodeId}"][data-large-entity-id="${largeEntityId}"]`);
|
||||||
|
if (button) {
|
||||||
|
button.innerHTML = '[+]';
|
||||||
|
button.disabled = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user