diff --git a/app.py b/app.py index 1dd1693..701f8e8 100644 --- a/app.py +++ b/app.py @@ -200,13 +200,14 @@ def extract_from_large_entity(): if not scanner: return jsonify({'success': False, 'error': 'No active session found'}), 404 - success = scanner.extract_node_from_large_entity(large_entity_id, node_id) + # TODO implement + #success = scanner.extract_node_from_large_entity(large_entity_id, node_id) - if success: - session_manager.update_session_scanner(user_session_id, scanner) - return jsonify({'success': True, 'message': f'Node {node_id} extracted successfully.'}) - else: - return jsonify({'success': False, 'error': f'Failed to extract node {node_id}.'}), 500 + #if success: + # session_manager.update_session_scanner(user_session_id, scanner) + # return jsonify({'success': True, 'message': f'Node {node_id} extracted successfully.'}) + #else: + # return jsonify({'success': False, 'error': f'Failed to extract node {node_id}.'}), 500 except Exception as e: traceback.print_exc() diff --git a/core/graph_manager.py b/core/graph_manager.py index 74fb46c..4abe919 100644 --- a/core/graph_manager.py +++ b/core/graph_manager.py @@ -114,36 +114,6 @@ class GraphManager: self.last_modified = datetime.now(timezone.utc).isoformat() return True - def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool: - """ - Removes a node from a large entity's internal lists and updates its count. - This prepares the large entity for the node's promotion to a regular node. - """ - if not self.graph.has_node(large_entity_id): - return False - - node_data = self.graph.nodes[large_entity_id] - attributes = node_data.get('attributes', []) - - # Find the 'nodes' attribute dictionary in the list - nodes_attr = next((attr for attr in attributes if attr.get('name') == 'nodes'), None) - - # Remove from the list of member nodes - if nodes_attr and 'value' in nodes_attr and isinstance(nodes_attr['value'], list) and node_id_to_extract in nodes_attr['value']: - nodes_attr['value'].remove(node_id_to_extract) - - # Find the 'count' attribute and update it - count_attr = next((attr for attr in attributes if attr.get('name') == 'count'), None) - if count_attr: - count_attr['value'] = len(nodes_attr['value']) - else: - # This can happen if the node was already extracted, which is not an error. - print(f"Warning: Node {node_id_to_extract} not found in the 'nodes' list of {large_entity_id}.") - return True # Proceed as if successful - - self.last_modified = datetime.now(timezone.utc).isoformat() - return True - def remove_node(self, node_id: str) -> bool: """Remove a node and its connected edges from the graph.""" if not self.graph.has_node(node_id): diff --git a/core/scanner.py b/core/scanner.py index bd6c8e9..43c9d9d 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -785,10 +785,7 @@ class Scanner: discovered, is_large_entity = self._process_provider_result_unified( target, provider, provider_result, depth ) - if is_large_entity: - large_entity_members.update(discovered) - else: - new_targets.update(discovered) + new_targets.update(discovered) except Exception as e: provider_successful = False @@ -833,7 +830,7 @@ class Scanner: """ provider_name = provider.get_name() discovered_targets = set() - large_entity_members = set() + #large_entity_members = set() if self._is_stop_requested(): return discovered_targets, False @@ -845,11 +842,11 @@ class Scanner: is_large_entity = eligible_relationship_count > self.config.large_entity_threshold - if is_large_entity: + #if is_large_entity: # Create the large entity node and get the set of its members - large_entity_members = self._create_large_entity_from_provider_result( - target, provider_name, provider_result, current_depth - ) + #large_entity_members = self._create_large_entity_from_result( + # target, provider_name, provider_result, current_depth + #) # Process ALL relationships to build the complete underlying data model for i, relationship in enumerate(provider_result.relationships): @@ -909,64 +906,6 @@ class Scanner: return discovered_targets, is_large_entity - def _create_large_entity_from_provider_result(self, source: str, provider_name: str, - provider_result: ProviderResult, current_depth: int) -> Set[str]: - """ - Create a large entity node and connect it to the source and any shared - non-member nodes like CAs or ISPs. - """ - entity_id = f"large_entity_{provider_name}_{hash(source) & 0x7FFFFFFF}" - - members = { - rel.target_node for rel in provider_result.relationships - if _is_valid_domain(rel.target_node) or _is_valid_ip(rel.target_node) - } - - if not members: - return set() - - first_member = next(iter(members)) - node_type = 'ip' if _is_valid_ip(first_member) else 'domain' - - attributes_dict = { - 'count': len(members), - 'nodes': list(members), - 'node_type': node_type, - 'source_provider': provider_name, - 'discovery_depth': current_depth, - 'threshold_exceeded': self.config.large_entity_threshold, - } - attributes_list = [ - { - "name": key, "value": value, "type": "large_entity_info", - "provider": provider_name, "confidence": 0.9, "metadata": {} - } for key, value in attributes_dict.items() - ] - - description = f'Large entity created due to {len(members)} relationships from {provider_name}' - - self.graph.add_node(entity_id, NodeType.LARGE_ENTITY, attributes=attributes_list, description=description) - - # Add a representative edge from the source to the large entity - if provider_result.relationships: - rep_rel = provider_result.relationships[0] - self.graph.add_edge(source, entity_id, rep_rel.relationship_type, 0.9, provider_name, - {'large_entity_info': f'Contains {len(members)} {node_type}s'}) - - # Create edges from the large entity to shared non-member nodes (e.g., CAs, ISPs) - processed_targets = set() - for rel in provider_result.relationships: - if rel.source_node in members and rel.target_node not in members: - if rel.target_node not in processed_targets: - self.graph.add_edge( - entity_id, rel.target_node, rel.relationship_type, rel.confidence, - rel.provider, rel.raw_data - ) - processed_targets.add(rel.target_node) - - self.logger.logger.warning(f"Large entity created: {entity_id} contains {len(members)} targets from {provider_name}") - - return members def stop_scan(self) -> bool: """Request immediate scan termination with proper cleanup.""" @@ -995,127 +934,6 @@ class Scanner: traceback.print_exc() return False - def extract_node_from_large_entity(self, large_entity_id: str, node_id_to_extract: str) -> bool: - """ - Extracts a node from a large entity, restores ALL of its original connections, - and re-queues it for scanning. - """ - if not self.graph.graph.has_node(large_entity_id): - return False - - # Extract the node from the large entity's internal list - success = self.graph.extract_node_from_large_entity(large_entity_id, node_id_to_extract) - if not success: - return False - - # Restore all incoming and outgoing edges for the extracted node - # These edges already exist in the graph data model; this ensures they are "activated" - # for the frontend. - for u, v, data in self.graph.graph.in_edges(node_id_to_extract, data=True): - self.graph.add_edge(u, v, data.get('relationship_type'), data.get('confidence_score'), - data.get('source_provider'), data.get('raw_data')) - - for u, v, data in self.graph.graph.out_edges(node_id_to_extract, data=True): - self.graph.add_edge(u, v, data.get('relationship_type'), data.get('confidence_score'), - data.get('source_provider'), data.get('raw_data')) - - # Re-queue the extracted node for further scanning if it is a domain or IP - is_ip = _is_valid_ip(node_id_to_extract) - is_domain = _is_valid_domain(node_id_to_extract) - - if is_domain or is_ip: - large_entity_attributes = self.graph.graph.nodes[large_entity_id].get('attributes', []) - discovery_depth_attr = next((attr for attr in large_entity_attributes if attr.get('name') == 'discovery_depth'), None) - current_depth = discovery_depth_attr['value'] if discovery_depth_attr else 0 - - eligible_providers = self._get_eligible_providers(node_id_to_extract, is_ip, False) - for provider in eligible_providers: - # Exclude DNS and correlation providers from re-processing - if provider.get_name() not in ['dns', 'correlation']: - provider_name = provider.get_name() - priority = self._get_priority(provider_name) - self.task_queue.put((time.time(), priority, (provider_name, node_id_to_extract, current_depth))) - self.total_tasks_ever_enqueued += 1 - - if self.status != ScanStatus.RUNNING: - self.status = ScanStatus.RUNNING - self._update_session_state() - - if not self.scan_thread or not self.scan_thread.is_alive(): - self.scan_thread = threading.Thread( - target=self._execute_scan, - args=(self.current_target, self.max_depth), - daemon=True - ) - self.scan_thread.start() - else: - self.logger.logger.info(f"Extracted non-scannable node {node_id_to_extract} of type {self.graph.graph.nodes[node_id_to_extract].get('type', 'unknown')}") - - return True - - def _determine_extracted_node_type(self, node_id: str, large_entity_id: str) -> NodeType: - """ - FIXED: Determine the correct node type for a node being extracted from a large entity. - Uses multiple strategies to ensure accurate type detection. - """ - from utils.helpers import _is_valid_ip, _is_valid_domain - - # Strategy 1: Check if node already exists in graph with a type - if self.graph.has_node(node_id): - existing_type = self.graph.nodes[node_id].get('type') - if existing_type: - try: - return NodeType(existing_type) - except ValueError: - pass - - # Strategy 2: Look for existing relationships to this node to infer type - for source, target, edge_data in self.graph.edges(data=True): - if target == node_id: - rel_type = edge_data.get('relationship_type', '') - provider = edge_data.get('source_provider', '') - - # CA nodes from certificate issuer relationships - if provider == 'crtsh' and rel_type == 'crtsh_cert_issuer': - return NodeType.CA - - # ISP nodes from Shodan - if provider == 'shodan' and rel_type == 'shodan_isp': - return NodeType.ISP - - # Correlation objects - if rel_type.startswith('corr_'): - return NodeType.CORRELATION_OBJECT - - if source == node_id: - rel_type = edge_data.get('relationship_type', '') - provider = edge_data.get('source_provider', '') - - # Source nodes in cert issuer relationships are CAs - if provider == 'crtsh' and rel_type == 'crtsh_cert_issuer': - return NodeType.CA - - # Strategy 3: Format-based detection (fallback) - if _is_valid_ip(node_id): - return NodeType.IP - elif _is_valid_domain(node_id): - return NodeType.DOMAIN - - # Strategy 4: Check large entity context - if self.graph.has_node(large_entity_id): - large_entity_data = self.graph.nodes[large_entity_id] - attributes = large_entity_data.get('attributes', []) - - node_type_attr = next((attr for attr in attributes if attr.get('name') == 'node_type'), None) - if node_type_attr: - entity_node_type = node_type_attr.get('value', 'domain') - if entity_node_type == 'ip': - return NodeType.IP - else: - return NodeType.DOMAIN - - # Final fallback - return NodeType.DOMAIN def _update_session_state(self) -> None: """ Update the scanner state in Redis for GUI updates. diff --git a/static/js/graph.js b/static/js/graph.js index 17a411a..7087eb9 100644 --- a/static/js/graph.js +++ b/static/js/graph.js @@ -383,27 +383,6 @@ class GraphManager { } } - this.largeEntityMembers.clear(); - const largeEntityMap = new Map(); - - graphData.nodes.forEach(node => { - if (node.type === 'large_entity' && node.attributes) { - const nodesAttribute = this.findAttributeByName(node.attributes, 'nodes'); - if (nodesAttribute && Array.isArray(nodesAttribute.value)) { - nodesAttribute.value.forEach(nodeId => { - largeEntityMap.set(nodeId, node.id); - this.largeEntityMembers.add(nodeId); - }); - } - } - }); - - const filteredNodes = graphData.nodes.filter(node => { - return !this.largeEntityMembers.has(node.id) || node.type === 'large_entity'; - }); - - console.log(`Filtered ${graphData.nodes.length - filteredNodes.length} large entity member nodes from visualization`); - // Process nodes with proper certificate coloring const processedNodes = filteredNodes.map(node => { const processed = this.processNode(node); @@ -427,8 +406,6 @@ class GraphManager { const mergedEdges = {}; graphData.edges.forEach(edge => { - const fromNode = largeEntityMap.has(edge.from) ? largeEntityMap.get(edge.from) : edge.from; - const toNode = largeEntityMap.has(edge.to) ? largeEntityMap.get(edge.to) : edge.to; const mergeKey = `${fromNode}-${toNode}-${edge.label}`; if (!mergedEdges[mergeKey]) { @@ -477,7 +454,6 @@ class GraphManager { } console.log(`Graph updated: ${processedNodes.length} nodes, ${processedEdges.length} edges (${newNodes.length} new nodes, ${newEdges.length} new edges)`); - console.log(`Large entity members hidden: ${this.largeEntityMembers.size}`); } catch (error) { console.error('Failed to update graph:', error); @@ -1053,7 +1029,7 @@ class GraphManager { this.nodes.clear(); this.edges.clear(); this.history = []; - this.largeEntityMembers.clear(); // Clear large entity tracking + this.largeEntityMembers.clear(); this.initialTargetIds.clear(); // Show placeholder @@ -1211,7 +1187,6 @@ class GraphManager { const basicStats = { nodeCount: this.nodes.length, edgeCount: this.edges.length, - largeEntityMembersHidden: this.largeEntityMembers.size }; // Add forensic statistics