correlation engine
This commit is contained in:
@@ -7,14 +7,16 @@ from .base_provider import BaseProvider
|
||||
from .crtsh_provider import CrtShProvider
|
||||
from .dns_provider import DNSProvider
|
||||
from .shodan_provider import ShodanProvider
|
||||
from .correlation_provider import CorrelationProvider
|
||||
from core.rate_limiter import GlobalRateLimiter
|
||||
|
||||
__all__ = [
|
||||
'BaseProvider',
|
||||
'GlobalRateLimiter',
|
||||
'GlobalRateLimiter',
|
||||
'CrtShProvider',
|
||||
'DNSProvider',
|
||||
'ShodanProvider'
|
||||
'ShodanProvider',
|
||||
'CorrelationProvider'
|
||||
]
|
||||
|
||||
__version__ = "0.0.0-rc"
|
||||
178
providers/correlation_provider.py
Normal file
178
providers/correlation_provider.py
Normal file
@@ -0,0 +1,178 @@
|
||||
# dnsrecon/providers/correlation_provider.py
|
||||
|
||||
import re
|
||||
from typing import Dict, Any, List
|
||||
|
||||
from .base_provider import BaseProvider
|
||||
from core.provider_result import ProviderResult
|
||||
from core.graph_manager import NodeType, GraphManager
|
||||
|
||||
class CorrelationProvider(BaseProvider):
|
||||
"""
|
||||
A provider that finds correlations between nodes in the graph.
|
||||
"""
|
||||
|
||||
def __init__(self, name: str = "correlation", session_config=None):
|
||||
"""
|
||||
Initialize the correlation provider.
|
||||
"""
|
||||
super().__init__(name, session_config=session_config)
|
||||
self.graph: GraphManager | None = None
|
||||
self.correlation_index = {}
|
||||
self.date_pattern = re.compile(r'^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}')
|
||||
self.EXCLUDED_KEYS = [
|
||||
'cert_source',
|
||||
'cert_issuer_ca_id',
|
||||
'cert_common_name',
|
||||
'cert_validity_period_days',
|
||||
'cert_issuer_name',
|
||||
'cert_entry_timestamp',
|
||||
'cert_not_before',
|
||||
'cert_not_after',
|
||||
'dns_ttl',
|
||||
'timestamp',
|
||||
'last_update',
|
||||
'updated_timestamp',
|
||||
'discovery_timestamp',
|
||||
'query_timestamp',
|
||||
]
|
||||
|
||||
def get_name(self) -> str:
|
||||
"""Return the provider name."""
|
||||
return "correlation"
|
||||
|
||||
def get_display_name(self) -> str:
|
||||
"""Return the provider display name for the UI."""
|
||||
return "Correlation Engine"
|
||||
|
||||
def requires_api_key(self) -> bool:
|
||||
"""Return True if the provider requires an API key."""
|
||||
return False
|
||||
|
||||
def get_eligibility(self) -> Dict[str, bool]:
|
||||
"""Return a dictionary indicating if the provider can query domains and/or IPs."""
|
||||
return {'domains': True, 'ips': True}
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if the provider is available and properly configured."""
|
||||
return True
|
||||
|
||||
def query_domain(self, domain: str) -> ProviderResult:
|
||||
"""
|
||||
Query the provider for information about a domain.
|
||||
"""
|
||||
return self._find_correlations(domain)
|
||||
|
||||
def query_ip(self, ip: str) -> ProviderResult:
|
||||
"""
|
||||
Query the provider for information about an IP address.
|
||||
"""
|
||||
return self._find_correlations(ip)
|
||||
|
||||
def set_graph_manager(self, graph_manager: GraphManager):
|
||||
"""
|
||||
Set the graph manager for the provider to use.
|
||||
"""
|
||||
self.graph = graph_manager
|
||||
|
||||
def _find_correlations(self, node_id: str) -> ProviderResult:
|
||||
"""
|
||||
Find correlations for a given node.
|
||||
"""
|
||||
result = ProviderResult()
|
||||
# FIXED: Ensure self.graph is not None before proceeding.
|
||||
if not self.graph or not self.graph.graph.has_node(node_id):
|
||||
return result
|
||||
|
||||
node_attributes = self.graph.graph.nodes[node_id].get('attributes', [])
|
||||
|
||||
for attr in node_attributes:
|
||||
attr_name = attr.get('name')
|
||||
attr_value = attr.get('value')
|
||||
attr_provider = attr.get('provider', 'unknown')
|
||||
|
||||
should_exclude = (
|
||||
any(excluded_key in attr_name or attr_name == excluded_key for excluded_key in self.EXCLUDED_KEYS) or
|
||||
not isinstance(attr_value, (str, int, float, bool)) or
|
||||
attr_value is None or
|
||||
isinstance(attr_value, bool) or
|
||||
(isinstance(attr_value, str) and (
|
||||
len(attr_value) < 4 or
|
||||
self.date_pattern.match(attr_value) or
|
||||
attr_value.lower() in ['unknown', 'none', 'null', 'n/a', 'true', 'false', '0', '1']
|
||||
)) or
|
||||
(isinstance(attr_value, (int, float)) and (
|
||||
attr_value == 0 or
|
||||
attr_value == 1 or
|
||||
abs(attr_value) > 1000000
|
||||
))
|
||||
)
|
||||
|
||||
if should_exclude:
|
||||
continue
|
||||
|
||||
if attr_value not in self.correlation_index:
|
||||
self.correlation_index[attr_value] = {
|
||||
'nodes': set(),
|
||||
'sources': []
|
||||
}
|
||||
|
||||
self.correlation_index[attr_value]['nodes'].add(node_id)
|
||||
|
||||
source_info = {
|
||||
'node_id': node_id,
|
||||
'provider': attr_provider,
|
||||
'attribute': attr_name,
|
||||
'path': f"{attr_provider}_{attr_name}"
|
||||
}
|
||||
|
||||
existing_sources = [s for s in self.correlation_index[attr_value]['sources']
|
||||
if s['node_id'] == node_id and s['path'] == source_info['path']]
|
||||
if not existing_sources:
|
||||
self.correlation_index[attr_value]['sources'].append(source_info)
|
||||
|
||||
if len(self.correlation_index[attr_value]['nodes']) > 1:
|
||||
self._create_correlation_relationships(attr_value, self.correlation_index[attr_value], result)
|
||||
return result
|
||||
|
||||
def _create_correlation_relationships(self, value: Any, correlation_data: Dict[str, Any], result: ProviderResult):
|
||||
"""
|
||||
Create correlation relationships and add them to the provider result.
|
||||
"""
|
||||
correlation_node_id = f"corr_{hash(str(value)) & 0x7FFFFFFF}"
|
||||
nodes = correlation_data['nodes']
|
||||
sources = correlation_data['sources']
|
||||
|
||||
# Add the correlation node as an attribute to the result
|
||||
result.add_attribute(
|
||||
target_node=correlation_node_id,
|
||||
name="correlation_value",
|
||||
value=value,
|
||||
attr_type=str(type(value)),
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
metadata={
|
||||
'correlated_nodes': list(nodes),
|
||||
'sources': sources,
|
||||
}
|
||||
)
|
||||
|
||||
for source in sources:
|
||||
node_id = source['node_id']
|
||||
provider = source['provider']
|
||||
attribute = source['attribute']
|
||||
relationship_label = f"corr_{provider}_{attribute}"
|
||||
|
||||
# Add the relationship to the result
|
||||
result.add_relationship(
|
||||
source_node=node_id,
|
||||
target_node=correlation_node_id,
|
||||
relationship_type=relationship_label,
|
||||
provider=self.name,
|
||||
confidence=0.9,
|
||||
raw_data={
|
||||
'correlation_value': value,
|
||||
'original_attribute': attribute,
|
||||
'correlation_type': 'attribute_matching'
|
||||
}
|
||||
)
|
||||
@@ -27,14 +27,25 @@ class ShodanProvider(BaseProvider):
|
||||
)
|
||||
self.base_url = "https://api.shodan.io"
|
||||
self.api_key = self.config.get_api_key('shodan')
|
||||
|
||||
self._is_active = self._check_api_connection()
|
||||
|
||||
# Initialize cache directory
|
||||
self.cache_dir = Path('cache') / 'shodan'
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _check_api_connection(self) -> bool:
|
||||
"""Checks if the Shodan API is reachable."""
|
||||
if not self.api_key:
|
||||
return False
|
||||
try:
|
||||
response = self.session.get(f"{self.base_url}/api-info?key={self.api_key}", timeout=5)
|
||||
return response.status_code == 200
|
||||
except requests.exceptions.RequestException:
|
||||
return False
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""Check if Shodan provider is available (has valid API key in this session)."""
|
||||
return self.api_key is not None and len(self.api_key.strip()) > 0
|
||||
return self._is_active and self.api_key is not None and len(self.api_key.strip()) > 0
|
||||
|
||||
def get_name(self) -> str:
|
||||
"""Return the provider name."""
|
||||
@@ -96,18 +107,6 @@ class ShodanProvider(BaseProvider):
|
||||
except (json.JSONDecodeError, ValueError, KeyError):
|
||||
return "stale"
|
||||
|
||||
def query_domain(self, domain: str) -> ProviderResult:
|
||||
"""
|
||||
Domain queries are no longer supported for the Shodan provider.
|
||||
|
||||
Args:
|
||||
domain: Domain to investigate
|
||||
|
||||
Returns:
|
||||
Empty ProviderResult
|
||||
"""
|
||||
return ProviderResult()
|
||||
|
||||
def query_ip(self, ip: str) -> ProviderResult:
|
||||
"""
|
||||
Query Shodan for information about an IP address (IPv4 or IPv6), with caching of processed data.
|
||||
|
||||
Reference in New Issue
Block a user