fix correlation provider issues

This commit is contained in:
overcuriousity
2025-09-24 11:36:27 +02:00
parent 897bb80183
commit 3951b9e521
5 changed files with 225 additions and 947 deletions

View File

@@ -1,4 +1,4 @@
# DNScope/providers/correlation_provider.py
# dnsrecon-reduced/providers/correlation_provider.py
import re
from typing import Dict, Any, List
@@ -24,6 +24,10 @@ class CorrelationProvider(BaseProvider):
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
self.EXCLUDED_KEYS = [
'cert_source',
'a_records',
'mx_records',
'ns_records',
'ptr_records',
'cert_issuer_ca_id',
'cert_common_name',
'cert_validity_period_days',
@@ -38,6 +42,8 @@ class CorrelationProvider(BaseProvider):
'updated_timestamp',
'discovery_timestamp',
'query_timestamp',
'shodan_ip_str',
'shodan_a_record',
]
def get_name(self) -> str:
@@ -83,7 +89,7 @@ class CorrelationProvider(BaseProvider):
def _find_correlations(self, node_id: str) -> ProviderResult:
"""
Find correlations for a given node with enhanced filtering and error handling.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring and list value processing.
"""
result = ProviderResult()
discovery_time = datetime.now(timezone.utc)
@@ -109,38 +115,46 @@ class CorrelationProvider(BaseProvider):
attr_value = attr.get('value')
attr_provider = attr.get('provider', 'unknown')
# Enhanced filtering logic
should_exclude = self._should_exclude_attribute(attr_name, attr_value)
if should_exclude:
continue
# Prepare a list of values to iterate over
values_to_process = []
if isinstance(attr_value, list):
values_to_process.extend(attr_value)
else:
values_to_process.append(attr_value)
# Build correlation index
if attr_value not in self.correlation_index:
self.correlation_index[attr_value] = {
'nodes': set(),
'sources': []
for value_item in values_to_process:
# Enhanced filtering logic
should_exclude = self._should_exclude_attribute(attr_name, value_item)
if should_exclude:
continue
# Build correlation index
if value_item not in self.correlation_index:
self.correlation_index[value_item] = {
'nodes': set(),
'sources': []
}
self.correlation_index[value_item]['nodes'].add(node_id)
source_info = {
'node_id': node_id,
'provider': attr_provider,
'attribute': attr_name,
'path': f"{attr_provider}_{attr_name}"
}
self.correlation_index[attr_value]['nodes'].add(node_id)
# Avoid duplicate sources
existing_sources = [s for s in self.correlation_index[value_item]['sources']
if s['node_id'] == node_id and s['path'] == source_info['path']]
if not existing_sources:
self.correlation_index[value_item]['sources'].append(source_info)
source_info = {
'node_id': node_id,
'provider': attr_provider,
'attribute': attr_name,
'path': f"{attr_provider}_{attr_name}"
}
# Avoid duplicate sources
existing_sources = [s for s in self.correlation_index[attr_value]['sources']
if s['node_id'] == node_id and s['path'] == source_info['path']]
if not existing_sources:
self.correlation_index[attr_value]['sources'].append(source_info)
# Create correlation if we have multiple nodes with this value
if len(self.correlation_index[attr_value]['nodes']) > 1:
self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result, discovery_time)
correlations_found += 1
# Create correlation if we have multiple nodes with this value
if len(self.correlation_index[value_item]['nodes']) > 1:
self._create_correlation_relationships(value_item, self.correlation_index[value_item], result, discovery_time)
correlations_found += 1
# Log correlation results
if correlations_found > 0: