correlation engine

This commit is contained in:
overcuriousity 2025-09-18 20:51:13 +02:00
parent cbfd40ee98
commit 12f834bb65
8 changed files with 258 additions and 346 deletions

View File

@ -33,14 +33,16 @@ class Config:
self.rate_limits = { self.rate_limits = {
'crtsh': 5, 'crtsh': 5,
'shodan': 60, 'shodan': 60,
'dns': 100 'dns': 100,
'correlation': 1000 # Set a high limit as it's a local operation
} }
# --- Provider Settings --- # --- Provider Settings ---
self.enabled_providers = { self.enabled_providers = {
'crtsh': True, 'crtsh': True,
'dns': True, 'dns': True,
'shodan': False 'shodan': False,
'correlation': True # Enable the new provider by default
} }
# --- Logging --- # --- Logging ---

View File

@ -40,270 +40,6 @@ class GraphManager:
self.graph = nx.DiGraph() self.graph = nx.DiGraph()
self.creation_time = datetime.now(timezone.utc).isoformat() self.creation_time = datetime.now(timezone.utc).isoformat()
self.last_modified = self.creation_time self.last_modified = self.creation_time
self.correlation_index = {}
# Compile regex for date filtering for efficiency
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
# FIXED: Exclude cert_issuer_name since we already create proper CA relationships
self.EXCLUDED_KEYS = [
# Certificate metadata that creates noise or has dedicated node types
'cert_source', # Always 'crtsh' for crtsh provider
'cert_common_name',
'cert_validity_period_days', # Numerical, not useful for correlation
'cert_issuer_name', # FIXED: Has dedicated CA nodes, don't correlate
#'cert_certificate_id', # Unique per certificate
#'cert_serial_number', # Unique per certificate
'cert_entry_timestamp', # Timestamp, filtered by date regex anyway
'cert_not_before', # Date, filtered by date regex anyway
'cert_not_after', # Date, filtered by date regex anyway
# DNS metadata that creates noise
'dns_ttl', # TTL values are not meaningful for correlation
# Shodan metadata that might create noise
'timestamp', # Generic timestamp fields
'last_update', # Generic timestamp fields
#'org', # Too generic, causes false correlations
#'isp', # Too generic, causes false correlations
# Generic noisy attributes
'updated_timestamp', # Any timestamp field
'discovery_timestamp', # Any timestamp field
'query_timestamp', # Any timestamp field
]
def __getstate__(self):
"""Prepare GraphManager for pickling, excluding compiled regex."""
state = self.__dict__.copy()
# Compiled regex patterns are not always picklable
if 'date_pattern' in state:
del state['date_pattern']
return state
def __setstate__(self, state):
"""Restore GraphManager state and recompile regex."""
self.__dict__.update(state)
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
def process_correlations_for_node(self, node_id: str):
"""
UPDATED: Process correlations for a given node with enhanced tracking.
Now properly tracks which attribute/provider created each correlation.
"""
if not self.graph.has_node(node_id):
return
node_attributes = self.graph.nodes[node_id].get('attributes', [])
# Process each attribute for potential correlations
for attr in node_attributes:
attr_name = attr.get('name')
attr_value = attr.get('value')
attr_provider = attr.get('provider', 'unknown')
# IMPROVED: More comprehensive exclusion logic
should_exclude = (
# Check against excluded keys (exact match or substring)
any(excluded_key in attr_name or attr_name == excluded_key for excluded_key in self.EXCLUDED_KEYS) or
# Invalid value types
not isinstance(attr_value, (str, int, float, bool)) or
attr_value is None or
# Boolean values are not useful for correlation
isinstance(attr_value, bool) or
# String values that are too short or are dates
(isinstance(attr_value, str) and (
len(attr_value) < 4 or
self.date_pattern.match(attr_value) or
# Exclude common generic values that create noise
attr_value.lower() in ['unknown', 'none', 'null', 'n/a', 'true', 'false', '0', '1']
)) or
# Numerical values that are likely to be unique identifiers
(isinstance(attr_value, (int, float)) and (
attr_value == 0 or # Zero values are not meaningful
attr_value == 1 or # One values are too common
abs(attr_value) > 1000000 # Very large numbers are likely IDs
))
)
if should_exclude:
continue
# Initialize correlation tracking for this value
if attr_value not in self.correlation_index:
self.correlation_index[attr_value] = {
'nodes': set(),
'sources': [] # Track which provider/attribute combinations contributed
}
# Add this node and source information
self.correlation_index[attr_value]['nodes'].add(node_id)
# Track the source of this correlation value
source_info = {
'node_id': node_id,
'provider': attr_provider,
'attribute': attr_name,
'path': f"{attr_provider}_{attr_name}"
}
# Add source if not already present (avoid duplicates)
existing_sources = [s for s in self.correlation_index[attr_value]['sources']
if s['node_id'] == node_id and s['path'] == source_info['path']]
if not existing_sources:
self.correlation_index[attr_value]['sources'].append(source_info)
# Create correlation node if we have multiple nodes with this value
if len(self.correlation_index[attr_value]['nodes']) > 1:
self._create_enhanced_correlation_node_and_edges(attr_value, self.correlation_index[attr_value])
def _create_enhanced_correlation_node_and_edges(self, value, correlation_data):
"""
UPDATED: Create correlation node and edges with raw provider data (no formatting).
"""
correlation_node_id = f"corr_{hash(str(value)) & 0x7FFFFFFF}"
nodes = correlation_data['nodes']
sources = correlation_data['sources']
# Create or update correlation node
if not self.graph.has_node(correlation_node_id):
# Use raw provider/attribute data - no formatting
provider_counts = {}
for source in sources:
# Keep original provider and attribute names
key = f"{source['provider']}_{source['attribute']}"
provider_counts[key] = provider_counts.get(key, 0) + 1
# Use the most common provider/attribute as the primary label (raw)
primary_source = max(provider_counts.items(), key=lambda x: x[1])[0] if provider_counts else "unknown_correlation"
metadata = {
'value': value,
'correlated_nodes': list(nodes),
'sources': sources,
'primary_source': primary_source,
'correlation_count': len(nodes)
}
self.add_node(correlation_node_id, NodeType.CORRELATION_OBJECT, metadata=metadata)
#print(f"Created correlation node {correlation_node_id} for value '{value}' with {len(nodes)} nodes")
# Create edges from each node to the correlation node
for source in sources:
node_id = source['node_id']
provider = source['provider']
attribute = source['attribute']
if self.graph.has_node(node_id) and not self.graph.has_edge(node_id, correlation_node_id):
# Format relationship label as "corr_provider_attribute"
relationship_label = f"corr_{provider}_{attribute}"
self.add_edge(
source_id=node_id,
target_id=correlation_node_id,
relationship_type=relationship_label,
confidence_score=0.9,
source_provider=provider,
raw_data={
'correlation_value': value,
'original_attribute': attribute,
'correlation_type': 'attribute_matching'
}
)
#print(f"Added correlation edge: {node_id} -> {correlation_node_id} ({relationship_label})")
def _has_direct_edge_bidirectional(self, node_a: str, node_b: str) -> bool:
"""
Check if there's a direct edge between two nodes in either direction.
Returns True if node_aââ⬠'node_b OR node_bââ⬠'node_a exists.
"""
return (self.graph.has_edge(node_a, node_b) or
self.graph.has_edge(node_b, node_a))
def _correlation_value_matches_existing_node(self, correlation_value: str) -> bool:
"""
Check if correlation value contains any existing node ID as substring.
Returns True if match found (correlation node should NOT be created).
"""
correlation_str = str(correlation_value).lower()
# Check against all existing nodes
for existing_node_id in self.graph.nodes():
if existing_node_id.lower() in correlation_str:
return True
return False
def _find_correlation_nodes_with_same_pattern(self, node_set: set) -> List[str]:
"""
Find existing correlation nodes that have the exact same pattern of connected nodes.
Returns list of correlation node IDs with matching patterns.
"""
correlation_nodes = self.get_nodes_by_type(NodeType.CORRELATION_OBJECT)
matching_nodes = []
for corr_node_id in correlation_nodes:
# Get all nodes connected to this correlation node
connected_nodes = set()
# Add all predecessors (nodes pointing TO the correlation node)
connected_nodes.update(self.graph.predecessors(corr_node_id))
# Add all successors (nodes pointed TO by the correlation node)
connected_nodes.update(self.graph.successors(corr_node_id))
# Check if the pattern matches exactly
if connected_nodes == node_set:
matching_nodes.append(corr_node_id)
return matching_nodes
def _merge_correlation_values(self, target_node_id: str, new_value: Any, corr_data: Dict) -> None:
"""
Merge a new correlation value into an existing correlation node.
Uses same logic as large entity merging.
"""
if not self.graph.has_node(target_node_id):
return
target_metadata = self.graph.nodes[target_node_id]['metadata']
# Get existing values (ensure it's a list)
existing_values = target_metadata.get('values', [])
if not isinstance(existing_values, list):
existing_values = [existing_values]
# Add new value if not already present
if new_value not in existing_values:
existing_values.append(new_value)
# Merge sources
existing_sources = target_metadata.get('sources', [])
new_sources = corr_data.get('sources', [])
# Create set of unique sources based on (node_id, path) tuples
source_set = set()
for source in existing_sources + new_sources:
source_tuple = (source['node_id'], source.get('path', ''))
source_set.add(source_tuple)
# Convert back to list of dictionaries
merged_sources = [{'node_id': nid, 'path': path} for nid, path in source_set]
# Update metadata
target_metadata.update({
'values': existing_values,
'sources': merged_sources,
'correlated_nodes': list(set(target_metadata.get('correlated_nodes', []) + corr_data.get('nodes', []))),
'merge_count': len(existing_values),
'last_merge_timestamp': datetime.now(timezone.utc).isoformat()
})
# Update description to reflect merged nature
value_count = len(existing_values)
node_count = len(target_metadata['correlated_nodes'])
self.graph.nodes[target_node_id]['description'] = (
f"Correlation container with {value_count} merged values "
f"across {node_count} nodes"
)
def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None, def add_node(self, node_id: str, node_type: NodeType, attributes: Optional[List[Dict[str, Any]]] = None,
description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool: description: str = "", metadata: Optional[Dict[str, Any]] = None) -> bool:
@ -415,29 +151,7 @@ class GraphManager:
# Remove node from the graph (NetworkX handles removing connected edges) # Remove node from the graph (NetworkX handles removing connected edges)
self.graph.remove_node(node_id) self.graph.remove_node(node_id)
# Clean up the correlation index
keys_to_delete = []
for value, data in self.correlation_index.items():
if isinstance(data, dict) and 'nodes' in data:
# Updated correlation structure
if node_id in data['nodes']:
data['nodes'].discard(node_id)
# Remove sources for this node
data['sources'] = [s for s in data['sources'] if s['node_id'] != node_id]
if not data['nodes']: # If no other nodes are associated, remove it
keys_to_delete.append(value)
else:
# Legacy correlation structure (fallback)
if isinstance(data, set) and node_id in data:
data.discard(node_id)
if not data:
keys_to_delete.append(value)
for key in keys_to_delete:
if key in self.correlation_index:
del self.correlation_index[key]
self.last_modified = datetime.now(timezone.utc).isoformat() self.last_modified = datetime.now(timezone.utc).isoformat()
return True return True
@ -562,8 +276,7 @@ class GraphManager:
return stats return stats
def clear(self) -> None: def clear(self) -> None:
"""Clear all nodes, edges, and indices from the graph.""" """Clear all nodes and edges from the graph."""
self.graph.clear() self.graph.clear()
self.correlation_index.clear()
self.creation_time = datetime.now(timezone.utc).isoformat() self.creation_time = datetime.now(timezone.utc).isoformat()
self.last_modified = self.creation_time self.last_modified = self.creation_time

View File

@ -6,6 +6,7 @@ import os
import importlib import importlib
import redis import redis
import time import time
import math
import random # Imported for jitter import random # Imported for jitter
from typing import List, Set, Dict, Any, Tuple, Optional from typing import List, Set, Dict, Any, Tuple, Optional
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
@ -19,6 +20,7 @@ from core.provider_result import ProviderResult
from utils.helpers import _is_valid_ip, _is_valid_domain from utils.helpers import _is_valid_ip, _is_valid_domain
from utils.export_manager import export_manager from utils.export_manager import export_manager
from providers.base_provider import BaseProvider from providers.base_provider import BaseProvider
from providers.correlation_provider import CorrelationProvider
from core.rate_limiter import GlobalRateLimiter from core.rate_limiter import GlobalRateLimiter
class ScanStatus: class ScanStatus:
@ -196,12 +198,15 @@ class Scanner:
attribute = getattr(module, attribute_name) attribute = getattr(module, attribute_name)
if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider: if isinstance(attribute, type) and issubclass(attribute, BaseProvider) and attribute is not BaseProvider:
provider_class = attribute provider_class = attribute
# FIXED: Pass the 'name' argument during initialization
provider = provider_class(name=attribute_name, session_config=self.config) provider = provider_class(name=attribute_name, session_config=self.config)
provider_name = provider.get_name() provider_name = provider.get_name()
if self.config.is_provider_enabled(provider_name): if self.config.is_provider_enabled(provider_name):
if provider.is_available(): if provider.is_available():
provider.set_stop_event(self.stop_event) provider.set_stop_event(self.stop_event)
if isinstance(provider, CorrelationProvider):
provider.set_graph_manager(self.graph)
self.providers.append(provider) self.providers.append(provider)
except Exception as e: except Exception as e:
traceback.print_exc() traceback.print_exc()
@ -336,12 +341,20 @@ class Scanner:
def _get_priority(self, provider_name): def _get_priority(self, provider_name):
rate_limit = self.config.get_rate_limit(provider_name) rate_limit = self.config.get_rate_limit(provider_name)
if rate_limit > 90:
return 1 # Highest priority # Define the logarithmic scale
elif rate_limit > 50: if rate_limit < 10:
return 2 return 10 # Highest priority number (lowest priority) for very low rate limits
else:
return 3 # Lowest priority # Calculate logarithmic value and map to priority levels
# Lower rate limits get higher priority numbers (lower priority)
log_value = math.log10(rate_limit)
priority = 10 - int(log_value * 2) # Scale factor to get more granular levels
# Ensure priority is within a reasonable range (1-10)
priority = max(1, min(10, priority))
return priority
def _execute_scan(self, target: str, max_depth: int) -> None: def _execute_scan(self, target: str, max_depth: int) -> None:
""" """
@ -420,7 +433,7 @@ class Scanner:
provider = next((p for p in self.providers if p.get_name() == provider_name), None) provider = next((p for p in self.providers if p.get_name() == provider_name), None)
if provider: if provider:
new_targets, _, success = self._query_single_provider_for_target(provider, target_item, depth) new_targets, _, success = self._process_provider_task(provider, target_item, depth)
if self._is_stop_requested(): break if self._is_stop_requested(): break
@ -482,9 +495,10 @@ class Scanner:
self.executor.shutdown(wait=False, cancel_futures=True) self.executor.shutdown(wait=False, cancel_futures=True)
self.executor = None self.executor = None
def _query_single_provider_for_target(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]: def _process_provider_task(self, provider: BaseProvider, target: str, depth: int) -> Tuple[Set[str], Set[str], bool]:
""" """
Query a single provider and process the unified ProviderResult. Manages the entire process for a given target and provider.
It uses the "worker" function to get the data and then manages the consequences.
""" """
if self._is_stop_requested(): if self._is_stop_requested():
return set(), set(), False return set(), set(), False
@ -500,7 +514,7 @@ class Scanner:
provider_successful = True provider_successful = True
try: try:
provider_result = self._query_single_provider_unified(provider, target, is_ip, depth) provider_result = self._execute_provider_query(provider, target, is_ip)
if provider_result is None: if provider_result is None:
provider_successful = False provider_successful = False
@ -512,16 +526,24 @@ class Scanner:
large_entity_members.update(discovered) large_entity_members.update(discovered)
else: else:
new_targets.update(discovered) new_targets.update(discovered)
self.graph.process_correlations_for_node(target)
# After processing a provider, queue a correlation task for the target
correlation_provider = next((p for p in self.providers if isinstance(p, CorrelationProvider)), None)
if correlation_provider and not isinstance(provider, CorrelationProvider):
priority = self._get_priority(correlation_provider.get_name())
self.task_queue.put((time.time(), priority, (correlation_provider.get_name(), target, depth)))
# FIXED: Increment total tasks when a correlation task is enqueued
self.total_tasks_ever_enqueued += 1
except Exception as e: except Exception as e:
provider_successful = False provider_successful = False
self._log_provider_error(target, provider.get_name(), str(e)) self._log_provider_error(target, provider.get_name(), str(e))
return new_targets, large_entity_members, provider_successful return new_targets, large_entity_members, provider_successful
def _query_single_provider_unified(self, provider: BaseProvider, target: str, is_ip: bool, current_depth: int) -> Optional[ProviderResult]: def _execute_provider_query(self, provider: BaseProvider, target: str, is_ip: bool) -> Optional[ProviderResult]:
""" """
Query a single provider with stop signal checking. The "worker" function that directly communicates with the provider to fetch data.
""" """
provider_name = provider.get_name() provider_name = provider.get_name()
start_time = datetime.now(timezone.utc) start_time = datetime.now(timezone.utc)
@ -572,16 +594,15 @@ class Scanner:
} }
attributes_by_node[attribute.target_node].append(attr_dict) attributes_by_node[attribute.target_node].append(attr_dict)
# Add attributes to existing nodes (important for ISP nodes to get ASN attributes) # FIXED: Add attributes to existing nodes AND create new nodes (like correlation nodes)
for node_id, node_attributes_list in attributes_by_node.items(): for node_id, node_attributes_list in attributes_by_node.items():
if self.graph.graph.has_node(node_id): if provider_name == 'correlation' and not self.graph.graph.has_node(node_id):
# Node already exists, just add attributes node_type = NodeType.CORRELATION_OBJECT
if _is_valid_ip(node_id): elif _is_valid_ip(node_id):
node_type = NodeType.IP node_type = NodeType.IP
else: else:
node_type = NodeType.DOMAIN node_type = NodeType.DOMAIN
self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
self.graph.add_node(node_id, node_type, attributes=node_attributes_list)
# Check if this should be a large entity # Check if this should be a large entity
if provider_result.get_relationship_count() > self.config.large_entity_threshold: if provider_result.get_relationship_count() > self.config.large_entity_threshold:
@ -604,6 +625,8 @@ class Scanner:
target_type = NodeType.ISP # ISP node for Shodan organization data target_type = NodeType.ISP # ISP node for Shodan organization data
elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer': elif provider_name == 'crtsh' and relationship.relationship_type == 'crtsh_cert_issuer':
target_type = NodeType.CA # CA node for certificate issuers target_type = NodeType.CA # CA node for certificate issuers
elif provider_name == 'correlation':
target_type = NodeType.CORRELATION_OBJECT
elif _is_valid_ip(target_node): elif _is_valid_ip(target_node):
target_type = NodeType.IP target_type = NodeType.IP
else: else:

View File

@ -7,14 +7,16 @@ from .base_provider import BaseProvider
from .crtsh_provider import CrtShProvider from .crtsh_provider import CrtShProvider
from .dns_provider import DNSProvider from .dns_provider import DNSProvider
from .shodan_provider import ShodanProvider from .shodan_provider import ShodanProvider
from .correlation_provider import CorrelationProvider
from core.rate_limiter import GlobalRateLimiter from core.rate_limiter import GlobalRateLimiter
__all__ = [ __all__ = [
'BaseProvider', 'BaseProvider',
'GlobalRateLimiter', 'GlobalRateLimiter',
'CrtShProvider', 'CrtShProvider',
'DNSProvider', 'DNSProvider',
'ShodanProvider' 'ShodanProvider',
'CorrelationProvider'
] ]
__version__ = "0.0.0-rc" __version__ = "0.0.0-rc"

View File

@ -0,0 +1,178 @@
# dnsrecon/providers/correlation_provider.py
import re
from typing import Dict, Any, List
from .base_provider import BaseProvider
from core.provider_result import ProviderResult
from core.graph_manager import NodeType, GraphManager
class CorrelationProvider(BaseProvider):
"""
A provider that finds correlations between nodes in the graph.
"""
def __init__(self, name: str = "correlation", session_config=None):
"""
Initialize the correlation provider.
"""
super().__init__(name, session_config=session_config)
self.graph: GraphManager | None = None
self.correlation_index = {}
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
self.EXCLUDED_KEYS = [
'cert_source',
'cert_issuer_ca_id',
'cert_common_name',
'cert_validity_period_days',
'cert_issuer_name',
'cert_entry_timestamp',
'cert_not_before',
'cert_not_after',
'dns_ttl',
'timestamp',
'last_update',
'updated_timestamp',
'discovery_timestamp',
'query_timestamp',
]
def get_name(self) -> str:
"""Return the provider name."""
return "correlation"
def get_display_name(self) -> str:
"""Return the provider display name for the UI."""
return "Correlation Engine"
def requires_api_key(self) -> bool:
"""Return True if the provider requires an API key."""
return False
def get_eligibility(self) -> Dict[str, bool]:
"""Return a dictionary indicating if the provider can query domains and/or IPs."""
return {'domains': True, 'ips': True}
def is_available(self) -> bool:
"""Check if the provider is available and properly configured."""
return True
def query_domain(self, domain: str) -> ProviderResult:
"""
Query the provider for information about a domain.
"""
return self._find_correlations(domain)
def query_ip(self, ip: str) -> ProviderResult:
"""
Query the provider for information about an IP address.
"""
return self._find_correlations(ip)
def set_graph_manager(self, graph_manager: GraphManager):
"""
Set the graph manager for the provider to use.
"""
self.graph = graph_manager
def _find_correlations(self, node_id: str) -> ProviderResult:
"""
Find correlations for a given node.
"""
result = ProviderResult()
# FIXED: Ensure self.graph is not None before proceeding.
if not self.graph or not self.graph.graph.has_node(node_id):
return result
node_attributes = self.graph.graph.nodes[node_id].get('attributes', [])
for attr in node_attributes:
attr_name = attr.get('name')
attr_value = attr.get('value')
attr_provider = attr.get('provider', 'unknown')
should_exclude = (
any(excluded_key in attr_name or attr_name == excluded_key for excluded_key in self.EXCLUDED_KEYS) or
not isinstance(attr_value, (str, int, float, bool)) or
attr_value is None or
isinstance(attr_value, bool) or
(isinstance(attr_value, str) and (
len(attr_value) < 4 or
self.date_pattern.match(attr_value) or
attr_value.lower() in ['unknown', 'none', 'null', 'n/a', 'true', 'false', '0', '1']
)) or
(isinstance(attr_value, (int, float)) and (
attr_value == 0 or
attr_value == 1 or
abs(attr_value) > 1000000
))
)
if should_exclude:
continue
if attr_value not in self.correlation_index:
self.correlation_index[attr_value] = {
'nodes': set(),
'sources': []
}
self.correlation_index[attr_value]['nodes'].add(node_id)
source_info = {
'node_id': node_id,
'provider': attr_provider,
'attribute': attr_name,
'path': f"{attr_provider}_{attr_name}"
}
existing_sources = [s for s in self.correlation_index[attr_value]['sources']
if s['node_id'] == node_id and s['path'] == source_info['path']]
if not existing_sources:
self.correlation_index[attr_value]['sources'].append(source_info)
if len(self.correlation_index[attr_value]['nodes']) > 1:
self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result)
return result
def _create_correlation_relationships(self, value: Any, correlation_data: Dict[str, Any], result: ProviderResult):
"""
Create correlation relationships and add them to the provider result.
"""
correlation_node_id = f"corr_{hash(str(value)) & 0x7FFFFFFF}"
nodes = correlation_data['nodes']
sources = correlation_data['sources']
# Add the correlation node as an attribute to the result
result.add_attribute(
target_node=correlation_node_id,
name="correlation_value",
value=value,
attr_type=str(type(value)),
provider=self.name,
confidence=0.9,
metadata={
'correlated_nodes': list(nodes),
'sources': sources,
}
)
for source in sources:
node_id = source['node_id']
provider = source['provider']
attribute = source['attribute']
relationship_label = f"corr_{provider}_{attribute}"
# Add the relationship to the result
result.add_relationship(
source_node=node_id,
target_node=correlation_node_id,
relationship_type=relationship_label,
provider=self.name,
confidence=0.9,
raw_data={
'correlation_value': value,
'original_attribute': attribute,
'correlation_type': 'attribute_matching'
}
)

View File

@ -27,14 +27,25 @@ class ShodanProvider(BaseProvider):
) )
self.base_url = "https://api.shodan.io" self.base_url = "https://api.shodan.io"
self.api_key = self.config.get_api_key('shodan') self.api_key = self.config.get_api_key('shodan')
self._is_active = self._check_api_connection()
# Initialize cache directory # Initialize cache directory
self.cache_dir = Path('cache') / 'shodan' self.cache_dir = Path('cache') / 'shodan'
self.cache_dir.mkdir(parents=True, exist_ok=True) self.cache_dir.mkdir(parents=True, exist_ok=True)
def _check_api_connection(self) -> bool:
"""Checks if the Shodan API is reachable."""
if not self.api_key:
return False
try:
response = self.session.get(f"{self.base_url}/api-info?key={self.api_key}", timeout=5)
return response.status_code == 200
except requests.exceptions.RequestException:
return False
def is_available(self) -> bool: def is_available(self) -> bool:
"""Check if Shodan provider is available (has valid API key in this session).""" """Check if Shodan provider is available (has valid API key in this session)."""
return self.api_key is not None and len(self.api_key.strip()) > 0 return self._is_active and self.api_key is not None and len(self.api_key.strip()) > 0
def get_name(self) -> str: def get_name(self) -> str:
"""Return the provider name.""" """Return the provider name."""
@ -96,18 +107,6 @@ class ShodanProvider(BaseProvider):
except (json.JSONDecodeError, ValueError, KeyError): except (json.JSONDecodeError, ValueError, KeyError):
return "stale" return "stale"
def query_domain(self, domain: str) -> ProviderResult:
"""
Domain queries are no longer supported for the Shodan provider.
Args:
domain: Domain to investigate
Returns:
Empty ProviderResult
"""
return ProviderResult()
def query_ip(self, ip: str) -> ProviderResult: def query_ip(self, ip: str) -> ProviderResult:
""" """
Query Shodan for information about an IP address (IPv4 or IPv6), with caching of processed data. Query Shodan for information about an IP address (IPv4 or IPv6), with caching of processed data.

View File

@ -587,26 +587,17 @@ class GraphManager {
// Handle merged correlation objects // Handle merged correlation objects
if (node.type === 'correlation_object') { if (node.type === 'correlation_object') {
const metadata = node.metadata || {}; const correlationValueAttr = this.findAttributeByName(node.attributes, 'correlation_value');
const values = metadata.values || []; const value = correlationValueAttr ? correlationValueAttr.value : 'Unknown';
const mergeCount = metadata.merge_count || 1; const displayValue = typeof value === 'string' && value.length > 20 ? value.substring(0, 17) + '...' : value;
if (mergeCount > 1) { processedNode.label = `${displayValue}`;
processedNode.label = `Correlations (${mergeCount})`; processedNode.title = `Correlation: ${value}`;
processedNode.title = `Merged correlation container with ${mergeCount} values: ${values.slice(0, 3).join(', ')}${values.length > 3 ? '...' : ''}`;
processedNode.borderWidth = 3;
} else {
const value = Array.isArray(values) && values.length > 0 ? values[0] : (metadata.value || 'Unknown');
const displayValue = typeof value === 'string' && value.length > 20 ? value.substring(0, 17) + '...' : value;
processedNode.label = `${displayValue}`;
processedNode.title = `Correlation: ${value}`;
}
} }
return processedNode; return processedNode;
} }
/** /**
* Process edge data with styling and metadata * Process edge data with styling and metadata
* @param {Object} edge - Raw edge data * @param {Object} edge - Raw edge data

View File

@ -1609,15 +1609,19 @@ class DNSReconApp {
* UPDATED: Enhanced correlation details showing the correlated attribute clearly (no formatting) * UPDATED: Enhanced correlation details showing the correlated attribute clearly (no formatting)
*/ */
generateCorrelationDetails(node) { generateCorrelationDetails(node) {
const metadata = node.metadata || {}; const attributes = node.attributes || [];
const value = metadata.value; const correlationValueAttr = attributes.find(attr => attr.name === 'correlation_value');
const value = correlationValueAttr ? correlationValueAttr.value : 'Unknown';
const metadataAttr = attributes.find(attr => attr.name === 'correlation_value');
const metadata = metadataAttr ? metadataAttr.metadata : {};
const correlatedNodes = metadata.correlated_nodes || []; const correlatedNodes = metadata.correlated_nodes || [];
const sources = metadata.sources || []; const sources = metadata.sources || [];
let html = ''; let html = '';
// Show what attribute is being correlated (raw names) // Show what attribute is being correlated (raw names)
const primarySource = metadata.primary_source || 'unknown'; const primarySource = sources.length > 0 ? sources[0].attribute : 'unknown';
html += ` html += `
<div class="modal-section"> <div class="modal-section">