diff --git a/core/graph_manager.py b/core/graph_manager.py index 3dd03b1..297f811 100644 --- a/core/graph_manager.py +++ b/core/graph_manager.py @@ -131,7 +131,7 @@ class GraphManager: return all_correlations def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[Dict[str, Any]] = None, - description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool: + description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool: """Add a node to the graph, update attributes, and process correlations.""" is_new_node = not self.graph.has_node(node_id) if is_new_node: @@ -157,51 +157,162 @@ class GraphManager: correlations = self._check_for_correlations(node_id, attributes) for corr in correlations: value = corr['value'] - - found_major_node_id = None - if isinstance(value, str): - # Check if the value contains ANY existing major node ID from the entire graph - for existing_node in self.graph.nodes(): - # Ensure the existing_node is a major type (domain/ip/asn) and is a substring of the correlation value - if (self.graph.nodes[existing_node].get('type') in [NodeType.DOMAIN.value, NodeType.IP.value, NodeType.ASN.value] and - existing_node in value): - found_major_node_id = existing_node - break # Found a major node, no need to check further - - if found_major_node_id: - # An existing major node is part of the value; link to it directly. - for c_node_id in set(corr['nodes']): - if self.graph.has_node(c_node_id) and c_node_id != found_major_node_id: + + # STEP 1: Substring check against all existing nodes + if self._correlation_value_matches_existing_node(value): + # Skip creating correlation node - would be redundant + continue + + # STEP 2: Filter out node pairs that already have direct edges + eligible_nodes = self._filter_nodes_without_direct_edges(set(corr['nodes'])) + + if len(eligible_nodes) < 2: + # Need at least 2 nodes to create a correlation + continue + + # STEP 3: Check for existing correlation node with same connection pattern + correlation_nodes_with_pattern = self._find_correlation_nodes_with_same_pattern(eligible_nodes) + + if correlation_nodes_with_pattern: + # STEP 4: Merge with existing correlation node + target_correlation_node = correlation_nodes_with_pattern[0] + self._merge_correlation_values(target_correlation_node, value, corr) + else: + # STEP 5: Create new correlation node for eligible nodes only + correlation_node_id = f"corr_{abs(hash(str(sorted(eligible_nodes))))}" + self.add_node(correlation_node_id, NodeType.CORRELATION_OBJECT, + metadata={'values': [value], 'sources': corr['sources'], + 'correlated_nodes': list(eligible_nodes)}) + + # Create edges from eligible nodes to this correlation node + for c_node_id in eligible_nodes: + if self.graph.has_node(c_node_id): attribute = corr['sources'][0]['path'].split('.')[-1] relationship_type = f"c_{attribute}" - self.add_edge(c_node_id, found_major_node_id, relationship_type, confidence_score=0.9) - continue # Skip creating a redundant correlation node - - # Proceed to create a new correlation node if no major node was found. - correlation_node_id = f"{value}" - if not self.graph.has_node(correlation_node_id): - self.add_node(correlation_node_id, NodeType.CORRELATION_OBJECT, - metadata={'value': value, 'sources': corr['sources'], - 'correlated_nodes': list(set(corr['nodes']))}) - else: # Update existing correlation node - existing_meta = self.graph.nodes[correlation_node_id]['metadata'] - existing_nodes = set(existing_meta.get('correlated_nodes', [])) - existing_meta['correlated_nodes'] = list(existing_nodes.union(set(corr['nodes']))) - existing_sources = {(s['node_id'], s['path']) for s in existing_meta.get('sources', [])} - for s in corr['sources']: - existing_sources.add((s['node_id'], s['path'])) - existing_meta['sources'] = [{'node_id': nid, 'path': p} for nid, p in existing_sources] - - for c_node_id in set(corr['nodes']): - attribute = corr['sources'][0]['path'].split('.')[-1] - relationship_type = f"c_{attribute}" - self.add_edge(c_node_id, correlation_node_id, relationship_type, confidence_score=0.9) + self.add_edge(c_node_id, correlation_node_id, relationship_type, confidence_score=0.9) self._update_correlation_index(node_id, attributes) self.last_modified = datetime.now(timezone.utc).isoformat() return is_new_node + def _filter_nodes_without_direct_edges(self, node_set: set) -> set: + """ + Filter out nodes that already have direct edges between them. + Returns set of nodes that should be included in correlation. + """ + nodes_list = list(node_set) + eligible_nodes = set(node_set) # Start with all nodes + + # Check all pairs of nodes + for i in range(len(nodes_list)): + for j in range(i + 1, len(nodes_list)): + node_a = nodes_list[i] + node_b = nodes_list[j] + + # Check if direct edge exists in either direction + if self._has_direct_edge_bidirectional(node_a, node_b): + # Remove both nodes from eligible set since they're already connected + eligible_nodes.discard(node_a) + eligible_nodes.discard(node_b) + + return eligible_nodes + + def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool: + """ + Check if there's a direct edge between two nodes in either direction. + Returns True if node_a→node_b OR node_b→node_a exists. + """ + return (self.graph.has_edge(node_a, node_b) or + self.graph.has_edge(node_b, node_a)) + + def _correlation_value_matches_existing_node(self, correlation_value: str) -> bool: + """ + Check if correlation value contains any existing node ID as substring. + Returns True if match found (correlation node should NOT be created). + """ + correlation_str = str(correlation_value).lower() + + # Check against all existing nodes + for existing_node_id in self.graph.nodes(): + if existing_node_id.lower() in correlation_str: + return True + + return False + + def _find_correlation_nodes_with_same_pattern(self, node_set: set) -> List[str]: + """ + Find existing correlation nodes that have the exact same pattern of connected nodes. + Returns list of correlation node IDs with matching patterns. + """ + correlation_nodes = self.get_nodes_by_type(NodeType.CORRELATION_OBJECT) + matching_nodes = [] + + for corr_node_id in correlation_nodes: + # Get all nodes connected to this correlation node + connected_nodes = set() + + # Add all predecessors (nodes pointing TO the correlation node) + connected_nodes.update(self.graph.predecessors(corr_node_id)) + + # Add all successors (nodes pointed TO by the correlation node) + connected_nodes.update(self.graph.successors(corr_node_id)) + + # Check if the pattern matches exactly + if connected_nodes == node_set: + matching_nodes.append(corr_node_id) + + return matching_nodes + + def _merge_correlation_values(self, target_node_id: str, new_value: Any, corr_data: Dict) -> None: + """ + Merge a new correlation value into an existing correlation node. + Uses same logic as large entity merging. + """ + if not self.graph.has_node(target_node_id): + return + + target_metadata = self.graph.nodes[target_node_id]['metadata'] + + # Get existing values (ensure it's a list) + existing_values = target_metadata.get('values', []) + if not isinstance(existing_values, list): + existing_values = [existing_values] + + # Add new value if not already present + if new_value not in existing_values: + existing_values.append(new_value) + + # Merge sources + existing_sources = target_metadata.get('sources', []) + new_sources = corr_data.get('sources', []) + + # Create set of unique sources based on (node_id, path) tuples + source_set = set() + for source in existing_sources + new_sources: + source_tuple = (source['node_id'], source['path']) + source_set.add(source_tuple) + + # Convert back to list of dictionaries + merged_sources = [{'node_id': nid, 'path': path} for nid, path in source_set] + + # Update metadata + target_metadata.update({ + 'values': existing_values, + 'sources': merged_sources, + 'correlated_nodes': list(set(target_metadata.get('correlated_nodes', []) + corr_data.get('nodes', []))), + 'merge_count': len(existing_values), + 'last_merge_timestamp': datetime.now(timezone.utc).isoformat() + }) + + # Update description to reflect merged nature + value_count = len(existing_values) + node_count = len(target_metadata['correlated_nodes']) + self.graph.nodes[target_node_id]['description'] = ( + f"Correlation container with {value_count} merged values " + f"across {node_count} nodes" + ) + def add_edge(self, source_id: str, target_id: str, relationship_type: str, confidence_score: float = 0.5, source_provider: str = "unknown", raw_data: Optional[Dict[str, Any]] = None) -> bool: diff --git a/static/css/main.css b/static/css/main.css index 95d0529..2107b7a 100644 --- a/static/css/main.css +++ b/static/css/main.css @@ -1000,6 +1000,46 @@ input[type="text"]:focus, select:focus { font-style: italic; } +.correlation-values-list { + margin-top: 1rem; +} + +.correlation-value-details { + margin-bottom: 0.5rem; + border: 1px solid #333; + border-radius: 3px; +} + +.correlation-value-details summary { + padding: 0.5rem; + background-color: #3a3a3a; + cursor: pointer; + outline: none; + color: #c7c7c7; +} + +.correlation-value-details summary:hover { + background-color: #4a4a4a; +} + +.correlation-value-details .detail-row { + margin-left: 1rem; + margin-right: 1rem; + padding: 0.5rem 0; +} + +.correlation-value-details .detail-label { + color: #999; + font-weight: 500; +} + +.correlation-value-details .detail-value { + color: #c7c7c7; + word-break: break-all; + font-family: 'Roboto Mono', monospace; + font-size: 0.9em; +} + @keyframes fadeIn { from {opacity: 0; transform: scale(0.95);} to {opacity: 1; transform: scale(1);} diff --git a/static/js/graph.js b/static/js/graph.js index 423b1ee..2c8e183 100644 --- a/static/js/graph.js +++ b/static/js/graph.js @@ -365,11 +365,24 @@ class GraphManager { } } + // Handle merged correlation objects (similar to large entities) if (node.type === 'correlation_object') { - const value = node.metadata.value; - const label = Array.isArray(value) ? `Correlated (${value.length})` : String(value); - processedNode.label = this.formatNodeLabel(label, node.type); - processedNode.title = Array.isArray(value) ? value.join(', ') : value; + const metadata = node.metadata || {}; + const values = metadata.values || []; + const mergeCount = metadata.merge_count || 1; + + if (mergeCount > 1) { + // Display as merged correlation container + processedNode.label = `Correlations (${mergeCount})`; + processedNode.title = `Merged correlation container with ${mergeCount} values: ${values.slice(0, 3).join(', ')}${values.length > 3 ? '...' : ''}`; + processedNode.borderWidth = 3; // Thicker border for merged nodes + } else { + // Single correlation value + const value = Array.isArray(values) && values.length > 0 ? values[0] : (metadata.value || 'Unknown'); + const displayValue = typeof value === 'string' && value.length > 20 ? value.substring(0, 17) + '...' : value; + processedNode.label = `Corr: ${displayValue}`; + processedNode.title = `Correlation: ${value}`; + } } return processedNode; diff --git a/static/js/main.js b/static/js/main.js index ccb8359..b2a8594 100644 --- a/static/js/main.js +++ b/static/js/main.js @@ -799,10 +799,51 @@ class DNSReconApp { */ generateNodeDetailsHtml(node) { if (!node) return '
Details not available.
'; - + let detailsHtml = ''; return detailsHtml; }