fix correlation provider issues

This commit is contained in:
overcuriousity 2025-09-24 11:36:27 +02:00
parent 897bb80183
commit 3951b9e521
5 changed files with 225 additions and 947 deletions

View File

@ -163,7 +163,8 @@ class GraphManager:
'to': target, 'to': target,
'label': attrs.get('relationship_type', ''), 'label': attrs.get('relationship_type', ''),
'source_provider': attrs.get('source_provider', ''), 'source_provider': attrs.get('source_provider', ''),
'discovery_timestamp': attrs.get('discovery_timestamp') 'discovery_timestamp': attrs.get('discovery_timestamp'),
'raw_data': attrs.get('raw_data', {})
}) })
return { return {

View File

@ -929,7 +929,7 @@ class Scanner:
# Re-enqueue the node for full processing # Re-enqueue the node for full processing
is_ip = _is_valid_ip(node_id) is_ip = _is_valid_ip(node_id)
eligible_providers = self._get_eligible_providers(node_id, is_ip, False) eligible_providers = self._get_eligible_providers(node_id, is_ip, False, is_extracted=True)
for provider in eligible_providers: for provider in eligible_providers:
provider_name = provider.get_name() provider_name = provider.get_name()
priority = self._get_priority(provider_name) priority = self._get_priority(provider_name)
@ -1133,7 +1133,7 @@ class Scanner:
self.logger.logger.warning(f"Error initializing provider states for {target}: {e}") self.logger.logger.warning(f"Error initializing provider states for {target}: {e}")
def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool) -> List: def _get_eligible_providers(self, target: str, is_ip: bool, dns_only: bool, is_extracted: bool = False) -> List:
""" """
FIXED: Improved provider eligibility checking with better filtering. FIXED: Improved provider eligibility checking with better filtering.
""" """
@ -1145,7 +1145,7 @@ class Scanner:
# Check if the target is part of a large entity # Check if the target is part of a large entity
is_in_large_entity = False is_in_large_entity = False
if self.graph.graph.has_node(target): if self.graph.graph.has_node(target) and not is_extracted:
metadata = self.graph.graph.nodes[target].get('metadata', {}) metadata = self.graph.graph.nodes[target].get('metadata', {})
if 'large_entity_id' in metadata: if 'large_entity_id' in metadata:
is_in_large_entity = True is_in_large_entity = True

View File

@ -1,4 +1,4 @@
# DNScope/providers/correlation_provider.py # dnsrecon-reduced/providers/correlation_provider.py
import re import re
from typing import Dict, Any, List from typing import Dict, Any, List
@ -24,6 +24,10 @@ class CorrelationProvider(BaseProvider):
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}') self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
self.EXCLUDED_KEYS = [ self.EXCLUDED_KEYS = [
'cert_source', 'cert_source',
'a_records',
'mx_records',
'ns_records',
'ptr_records',
'cert_issuer_ca_id', 'cert_issuer_ca_id',
'cert_common_name', 'cert_common_name',
'cert_validity_period_days', 'cert_validity_period_days',
@ -38,6 +42,8 @@ class CorrelationProvider(BaseProvider):
'updated_timestamp', 'updated_timestamp',
'discovery_timestamp', 'discovery_timestamp',
'query_timestamp', 'query_timestamp',
'shodan_ip_str',
'shodan_a_record',
] ]
def get_name(self) -> str: def get_name(self) -> str:
@ -83,7 +89,7 @@ class CorrelationProvider(BaseProvider):
def _find_correlations(self, node_id: str) -> ProviderResult: def _find_correlations(self, node_id: str) -> ProviderResult:
""" """
Find correlations for a given node with enhanced filtering and error handling. Find correlations for a given node with enhanced filtering and error handling.
UPDATED: Enhanced with discovery timestamps for time-based edge coloring. UPDATED: Enhanced with discovery timestamps for time-based edge coloring and list value processing.
""" """
result = ProviderResult() result = ProviderResult()
discovery_time = datetime.now(timezone.utc) discovery_time = datetime.now(timezone.utc)
@ -109,20 +115,28 @@ class CorrelationProvider(BaseProvider):
attr_value = attr.get('value') attr_value = attr.get('value')
attr_provider = attr.get('provider', 'unknown') attr_provider = attr.get('provider', 'unknown')
# Prepare a list of values to iterate over
values_to_process = []
if isinstance(attr_value, list):
values_to_process.extend(attr_value)
else:
values_to_process.append(attr_value)
for value_item in values_to_process:
# Enhanced filtering logic # Enhanced filtering logic
should_exclude = self._should_exclude_attribute(attr_name, attr_value) should_exclude = self._should_exclude_attribute(attr_name, value_item)
if should_exclude: if should_exclude:
continue continue
# Build correlation index # Build correlation index
if attr_value not in self.correlation_index: if value_item not in self.correlation_index:
self.correlation_index[attr_value] = { self.correlation_index[value_item] = {
'nodes': set(), 'nodes': set(),
'sources': [] 'sources': []
} }
self.correlation_index[attr_value]['nodes'].add(node_id) self.correlation_index[value_item]['nodes'].add(node_id)
source_info = { source_info = {
'node_id': node_id, 'node_id': node_id,
@ -132,14 +146,14 @@ class CorrelationProvider(BaseProvider):
} }
# Avoid duplicate sources # Avoid duplicate sources
existing_sources = [s for s in self.correlation_index[attr_value]['sources'] existing_sources = [s for s in self.correlation_index[value_item]['sources']
if s['node_id'] == node_id and s['path'] == source_info['path']] if s['node_id'] == node_id and s['path'] == source_info['path']]
if not existing_sources: if not existing_sources:
self.correlation_index[attr_value]['sources'].append(source_info) self.correlation_index[value_item]['sources'].append(source_info)
# Create correlation if we have multiple nodes with this value # Create correlation if we have multiple nodes with this value
if len(self.correlation_index[attr_value]['nodes']) > 1: if len(self.correlation_index[value_item]['nodes']) > 1:
self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result, discovery_time) self._create_correlation_relationships(value_item, self.correlation_index[value_item], result, discovery_time)
correlations_found += 1 correlations_found += 1
# Log correlation results # Log correlation results

View File

@ -401,7 +401,7 @@ input[type="text"]:focus, select:focus {
gap: 0.3rem; gap: 0.3rem;
position: absolute; position: absolute;
top: 10px; top: 10px;
left: 10px; right: 10px;
background: rgba(26, 26, 26, 0.9); background: rgba(26, 26, 26, 0.9);
padding: 0.5rem; padding: 0.5rem;
border-radius: 6px; border-radius: 6px;
@ -1406,7 +1406,7 @@ input[type="password"]:focus {
.graph-controls { .graph-controls {
position: relative; position: relative;
top: auto; top: auto;
left: auto; right: auto;
margin-bottom: 1rem; margin-bottom: 1rem;
min-width: auto; min-width: auto;
} }

File diff suppressed because it is too large Load Diff