update edge labels
This commit is contained in:
parent
d7adf9ad8b
commit
ae07635ab6
@ -1,3 +1,5 @@
|
|||||||
|
# core/graph_manager.py
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Graph data model for DNSRecon using NetworkX.
|
Graph data model for DNSRecon using NetworkX.
|
||||||
Manages in-memory graph storage with confidence scoring and forensic metadata.
|
Manages in-memory graph storage with confidence scoring and forensic metadata.
|
||||||
@ -50,21 +52,23 @@ class GraphManager:
|
|||||||
self.__dict__.update(state)
|
self.__dict__.update(state)
|
||||||
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
||||||
|
|
||||||
def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = []):
|
def _update_correlation_index(self, node_id: str, data: Any, path: List[str] = [], parent_attr: str = ""):
|
||||||
"""Recursively traverse metadata and add hashable values to the index."""
|
"""Recursively traverse metadata and add hashable values to the index with better path tracking."""
|
||||||
if path is None:
|
if path is None:
|
||||||
path = []
|
path = []
|
||||||
|
|
||||||
if isinstance(data, dict):
|
if isinstance(data, dict):
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
self._update_correlation_index(node_id, value, path + [key])
|
self._update_correlation_index(node_id, value, path + [key], key)
|
||||||
elif isinstance(data, list):
|
elif isinstance(data, list):
|
||||||
for i, item in enumerate(data):
|
for i, item in enumerate(data):
|
||||||
self._update_correlation_index(node_id, item, path + [f"[{i}]"])
|
# Instead of just using [i], include the parent attribute context
|
||||||
|
list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]"
|
||||||
|
self._update_correlation_index(node_id, item, path + [list_path_component], parent_attr)
|
||||||
else:
|
else:
|
||||||
self._add_to_correlation_index(node_id, data, ".".join(path))
|
self._add_to_correlation_index(node_id, data, ".".join(path), parent_attr)
|
||||||
|
|
||||||
def _add_to_correlation_index(self, node_id: str, value: Any, path_str: str):
|
def _add_to_correlation_index(self, node_id: str, value: Any, path_str: str, parent_attr: str = ""):
|
||||||
"""Add a hashable value to the correlation index, filtering out noise."""
|
"""Add a hashable value to the correlation index, filtering out noise."""
|
||||||
if not isinstance(value, (str, int, float, bool)) or value is None:
|
if not isinstance(value, (str, int, float, bool)) or value is None:
|
||||||
return
|
return
|
||||||
@ -90,10 +94,47 @@ class GraphManager:
|
|||||||
self.correlation_index[value] = {}
|
self.correlation_index[value] = {}
|
||||||
if node_id not in self.correlation_index[value]:
|
if node_id not in self.correlation_index[value]:
|
||||||
self.correlation_index[value][node_id] = []
|
self.correlation_index[value][node_id] = []
|
||||||
if path_str not in self.correlation_index[value][node_id]:
|
|
||||||
self.correlation_index[value][node_id].append(path_str)
|
|
||||||
|
|
||||||
def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = []) -> List[Dict]:
|
# Store both the full path and the parent attribute for better edge labeling
|
||||||
|
correlation_entry = {
|
||||||
|
'path': path_str,
|
||||||
|
'parent_attr': parent_attr,
|
||||||
|
'meaningful_attr': self._extract_meaningful_attribute(path_str, parent_attr)
|
||||||
|
}
|
||||||
|
|
||||||
|
if correlation_entry not in self.correlation_index[value][node_id]:
|
||||||
|
self.correlation_index[value][node_id].append(correlation_entry)
|
||||||
|
|
||||||
|
def _extract_meaningful_attribute(self, path_str: str, parent_attr: str = "") -> str:
|
||||||
|
"""Extract the most meaningful attribute name from a path string."""
|
||||||
|
if not path_str:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
path_parts = path_str.split('.')
|
||||||
|
|
||||||
|
# Look for the last non-array-index part
|
||||||
|
for part in reversed(path_parts):
|
||||||
|
# Skip array indices like [0], [1], etc.
|
||||||
|
if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()):
|
||||||
|
# Clean up compound names like "hostnames[0]" to just "hostnames"
|
||||||
|
clean_part = re.sub(r'\[\d+\]$', '', part)
|
||||||
|
if clean_part:
|
||||||
|
return clean_part
|
||||||
|
|
||||||
|
# Fallback to parent attribute if available
|
||||||
|
if parent_attr:
|
||||||
|
return parent_attr
|
||||||
|
|
||||||
|
# Last resort - use the first meaningful part
|
||||||
|
for part in path_parts:
|
||||||
|
if not (part.startswith('[') and part.endswith(']') and part[1:-1].isdigit()):
|
||||||
|
clean_part = re.sub(r'\[\d+\]$', '', part)
|
||||||
|
if clean_part:
|
||||||
|
return clean_part
|
||||||
|
|
||||||
|
return "correlation"
|
||||||
|
|
||||||
|
def _check_for_correlations(self, new_node_id: str, data: Any, path: List[str] = [], parent_attr: str = "") -> List[Dict]:
|
||||||
"""Recursively traverse metadata to find correlations with existing data."""
|
"""Recursively traverse metadata to find correlations with existing data."""
|
||||||
if path is None:
|
if path is None:
|
||||||
path = []
|
path = []
|
||||||
@ -103,10 +144,11 @@ class GraphManager:
|
|||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
if key == 'source': # Avoid correlating on the provider name
|
if key == 'source': # Avoid correlating on the provider name
|
||||||
continue
|
continue
|
||||||
all_correlations.extend(self._check_for_correlations(new_node_id, value, path + [key]))
|
all_correlations.extend(self._check_for_correlations(new_node_id, value, path + [key], key))
|
||||||
elif isinstance(data, list):
|
elif isinstance(data, list):
|
||||||
for i, item in enumerate(data):
|
for i, item in enumerate(data):
|
||||||
all_correlations.extend(self._check_for_correlations(new_node_id, item, path + [f"[{i}]"]))
|
list_path_component = f"[{i}]" if not parent_attr else f"{parent_attr}[{i}]"
|
||||||
|
all_correlations.extend(self._check_for_correlations(new_node_id, item, path + [list_path_component], parent_attr))
|
||||||
else:
|
else:
|
||||||
value = data
|
value = data
|
||||||
if value in self.correlation_index:
|
if value in self.correlation_index:
|
||||||
@ -117,11 +159,31 @@ class GraphManager:
|
|||||||
if len(unique_nodes) < 2:
|
if len(unique_nodes) < 2:
|
||||||
return all_correlations # Correlation must involve at least two distinct nodes
|
return all_correlations # Correlation must involve at least two distinct nodes
|
||||||
|
|
||||||
new_source = {'node_id': new_node_id, 'path': ".".join(path)}
|
new_source = {
|
||||||
|
'node_id': new_node_id,
|
||||||
|
'path': ".".join(path),
|
||||||
|
'parent_attr': parent_attr,
|
||||||
|
'meaningful_attr': self._extract_meaningful_attribute(".".join(path), parent_attr)
|
||||||
|
}
|
||||||
all_sources = [new_source]
|
all_sources = [new_source]
|
||||||
for node_id, paths in existing_nodes_with_paths.items():
|
|
||||||
for p_str in paths:
|
for node_id, path_entries in existing_nodes_with_paths.items():
|
||||||
all_sources.append({'node_id': node_id, 'path': p_str})
|
for entry in path_entries:
|
||||||
|
if isinstance(entry, dict):
|
||||||
|
all_sources.append({
|
||||||
|
'node_id': node_id,
|
||||||
|
'path': entry['path'],
|
||||||
|
'parent_attr': entry.get('parent_attr', ''),
|
||||||
|
'meaningful_attr': entry.get('meaningful_attr', self._extract_meaningful_attribute(entry['path'], entry.get('parent_attr', '')))
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# Handle legacy string-only entries
|
||||||
|
all_sources.append({
|
||||||
|
'node_id': node_id,
|
||||||
|
'path': str(entry),
|
||||||
|
'parent_attr': '',
|
||||||
|
'meaningful_attr': self._extract_meaningful_attribute(str(entry))
|
||||||
|
})
|
||||||
|
|
||||||
all_correlations.append({
|
all_correlations.append({
|
||||||
'value': value,
|
'value': value,
|
||||||
@ -163,11 +225,7 @@ class GraphManager:
|
|||||||
# Skip creating correlation node - would be redundant
|
# Skip creating correlation node - would be redundant
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# *** CHANGE START ***
|
|
||||||
# The overly aggressive filtering logic has been removed.
|
|
||||||
# All nodes involved in the correlation will now be used.
|
|
||||||
eligible_nodes = set(corr['nodes'])
|
eligible_nodes = set(corr['nodes'])
|
||||||
# *** CHANGE END ***
|
|
||||||
|
|
||||||
if len(eligible_nodes) < 2:
|
if len(eligible_nodes) < 2:
|
||||||
# Need at least 2 nodes to create a correlation
|
# Need at least 2 nodes to create a correlation
|
||||||
@ -187,11 +245,12 @@ class GraphManager:
|
|||||||
metadata={'values': [value], 'sources': corr['sources'],
|
metadata={'values': [value], 'sources': corr['sources'],
|
||||||
'correlated_nodes': list(eligible_nodes)})
|
'correlated_nodes': list(eligible_nodes)})
|
||||||
|
|
||||||
# Create edges from eligible nodes to this correlation node
|
# Create edges from eligible nodes to this correlation node with better labeling
|
||||||
for c_node_id in eligible_nodes:
|
for c_node_id in eligible_nodes:
|
||||||
if self.graph.has_node(c_node_id):
|
if self.graph.has_node(c_node_id):
|
||||||
attribute = corr['sources'][0]['path'].split('.')[-1]
|
# Find the best attribute name for this node
|
||||||
relationship_type = f"c_{attribute}"
|
meaningful_attr = self._find_best_attribute_name_for_node(c_node_id, corr['sources'])
|
||||||
|
relationship_type = f"c_{meaningful_attr}"
|
||||||
self.add_edge(c_node_id, correlation_node_id, relationship_type, confidence_score=0.9)
|
self.add_edge(c_node_id, correlation_node_id, relationship_type, confidence_score=0.9)
|
||||||
|
|
||||||
self._update_correlation_index(node_id, attributes)
|
self._update_correlation_index(node_id, attributes)
|
||||||
@ -199,10 +258,34 @@ class GraphManager:
|
|||||||
self.last_modified = datetime.now(timezone.utc).isoformat()
|
self.last_modified = datetime.now(timezone.utc).isoformat()
|
||||||
return is_new_node
|
return is_new_node
|
||||||
|
|
||||||
# *** CHANGE START ***
|
def _find_best_attribute_name_for_node(self, node_id: str, sources: List[Dict]) -> str:
|
||||||
# The following function is no longer needed and has been removed to avoid confusion.
|
"""Find the best attribute name for a correlation edge by looking at the sources."""
|
||||||
# def _filter_nodes_without_direct_edges(self, node_set: set) -> set:
|
node_sources = [s for s in sources if s['node_id'] == node_id]
|
||||||
# *** CHANGE END ***
|
|
||||||
|
if not node_sources:
|
||||||
|
return "correlation"
|
||||||
|
|
||||||
|
# Use the meaningful_attr if available
|
||||||
|
for source in node_sources:
|
||||||
|
meaningful_attr = source.get('meaningful_attr')
|
||||||
|
if meaningful_attr and meaningful_attr != "unknown":
|
||||||
|
return meaningful_attr
|
||||||
|
|
||||||
|
# Fallback to parent_attr
|
||||||
|
for source in node_sources:
|
||||||
|
parent_attr = source.get('parent_attr')
|
||||||
|
if parent_attr:
|
||||||
|
return parent_attr
|
||||||
|
|
||||||
|
# Last resort - extract from path
|
||||||
|
for source in node_sources:
|
||||||
|
path = source.get('path', '')
|
||||||
|
if path:
|
||||||
|
extracted = self._extract_meaningful_attribute(path)
|
||||||
|
if extracted != "unknown":
|
||||||
|
return extracted
|
||||||
|
|
||||||
|
return "correlation"
|
||||||
|
|
||||||
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
|
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
|
||||||
"""
|
"""
|
||||||
@ -276,7 +359,7 @@ class GraphManager:
|
|||||||
# Create set of unique sources based on (node_id, path) tuples
|
# Create set of unique sources based on (node_id, path) tuples
|
||||||
source_set = set()
|
source_set = set()
|
||||||
for source in existing_sources + new_sources:
|
for source in existing_sources + new_sources:
|
||||||
source_tuple = (source['node_id'], source['path'])
|
source_tuple = (source['node_id'], source.get('path', ''))
|
||||||
source_set.add(source_tuple)
|
source_set.add(source_tuple)
|
||||||
|
|
||||||
# Convert back to list of dictionaries
|
# Convert back to list of dictionaries
|
||||||
|
Loading…
x
Reference in New Issue
Block a user