diff --git a/core/scanner.py b/core/scanner.py index 013f2d4..6ce05f3 100644 --- a/core/scanner.py +++ b/core/scanner.py @@ -491,7 +491,7 @@ class Scanner: if provider_results is None: provider_successful = False elif not self._is_stop_requested(): - discovered, is_large_entity = self._process_provider_results_forensic( + discovered, is_large_entity = self._process_provider_results( target, provider, provider_results, node_attributes, depth ) if is_large_entity: @@ -711,7 +711,7 @@ class Scanner: self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)") - def _process_provider_results_forensic(self, target: str, provider, results: List, + def _process_provider_results(self, target: str, provider, results: List, node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]: """Process provider results, returns (discovered_targets, is_large_entity).""" provider_name = provider.get_name() @@ -741,8 +741,14 @@ class Scanner: discovery_method=f"{provider_name}_query_depth_{current_depth}" ) + # Collect attributes for the source node self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source]) + # If the relationship is asn_membership, collect attributes for the target ASN node + if rel_type == 'asn_membership': + self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target]) + + if isinstance(rel_target, list): # If the target is a list, iterate and process each item for single_target in rel_target: @@ -909,18 +915,22 @@ class Scanner: attributes.setdefault('related_domains_san', []).append(target) elif provider_name == 'shodan': + # This logic will now apply to the correct node (ASN or IP) shodan_attributes = attributes.setdefault('shodan', {}) for key, value in raw_data.items(): if key not in shodan_attributes or not shodan_attributes.get(key): shodan_attributes[key] = value + if _is_valid_ip(node_id): + if 'ports' in raw_data: + attributes['ports'] = raw_data['ports'] + if 'os' in raw_data and raw_data['os']: + attributes['os'] = raw_data['os'] + if rel_type == "asn_membership": - attributes['asn'] = { - 'id': target, - 'description': raw_data.get('org', ''), - 'isp': raw_data.get('isp', ''), - 'country': raw_data.get('country', '') - } + # This is the key change: these attributes are for the target (the ASN), + # not the source (the IP). We will add them to the ASN node later. + pass record_type_name = rel_type if record_type_name not in attributes: diff --git a/providers/shodan_provider.py b/providers/shodan_provider.py index 579a994..2c84beb 100644 --- a/providers/shodan_provider.py +++ b/providers/shodan_provider.py @@ -1,15 +1,20 @@ # dnsrecon/providers/shodan_provider.py import json +import os +from pathlib import Path from typing import List, Dict, Any, Tuple +from datetime import datetime, timezone +import requests + from .base_provider import BaseProvider from utils.helpers import _is_valid_ip, _is_valid_domain class ShodanProvider(BaseProvider): """ - Provider for querying Shodan API for IP address and hostname information. - Now uses session-specific API keys. + Provider for querying Shodan API for IP address information. + Now uses session-specific API keys, is limited to IP-only queries, and includes caching. """ def __init__(self, name=None, session_config=None): @@ -22,6 +27,10 @@ class ShodanProvider(BaseProvider): ) self.base_url = "https://api.shodan.io" self.api_key = self.config.get_api_key('shodan') + + # Initialize cache directory + self.cache_dir = Path('cache') / 'shodan' + self.cache_dir.mkdir(parents=True, exist_ok=True) def is_available(self) -> bool: """Check if Shodan provider is available (has valid API key in this session).""" @@ -33,7 +42,7 @@ class ShodanProvider(BaseProvider): def get_display_name(self) -> str: """Return the provider display name for the UI.""" - return "shodan" + return "Shodan" def requires_api_key(self) -> bool: """Return True if the provider requires an API key.""" @@ -41,267 +50,146 @@ class ShodanProvider(BaseProvider): def get_eligibility(self) -> Dict[str, bool]: """Return a dictionary indicating if the provider can query domains and/or IPs.""" - return {'domains': True, 'ips': True} + return {'domains': False, 'ips': True} + + def _get_cache_file_path(self, ip: str) -> Path: + """Generate cache file path for an IP address.""" + safe_ip = ip.replace('.', '_').replace(':', '_') + return self.cache_dir / f"{safe_ip}.json" + + def _get_cache_status(self, cache_file_path: Path) -> str: + """ + Check cache status for an IP. + Returns: 'not_found', 'fresh', or 'stale' + """ + if not cache_file_path.exists(): + return "not_found" + + try: + with open(cache_file_path, 'r') as f: + cache_data = json.load(f) + + last_query_str = cache_data.get("last_upstream_query") + if not last_query_str: + return "stale" + + last_query = datetime.fromisoformat(last_query_str.replace('Z', '+00:00')) + hours_since_query = (datetime.now(timezone.utc) - last_query).total_seconds() / 3600 + + cache_timeout = self.config.cache_timeout_hours + if hours_since_query < cache_timeout: + return "fresh" + else: + return "stale" + + except (json.JSONDecodeError, ValueError, KeyError): + return "stale" + + def _load_from_cache(self, cache_file_path: Path) -> Dict[str, Any]: + """Load Shodan data from a cache file.""" + try: + with open(cache_file_path, 'r') as f: + return json.load(f) + except (json.JSONDecodeError, FileNotFoundError): + return {} + + def _save_to_cache(self, cache_file_path: Path, data: Dict[str, Any]) -> None: + """Save Shodan data to a cache file.""" + try: + data['last_upstream_query'] = datetime.now(timezone.utc).isoformat() + with open(cache_file_path, 'w') as f: + json.dump(data, f, separators=(',', ':')) + except Exception as e: + self.logger.logger.warning(f"Failed to save Shodan cache for {cache_file_path.name}: {e}") def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: """ - Query Shodan for information about a domain. - Uses Shodan's hostname search to find associated IPs. - - Args: - domain: Domain to investigate - - Returns: - List of relationships discovered from Shodan data + Domain queries are no longer supported for the Shodan provider. """ - if not _is_valid_domain(domain) or not self.is_available(): - return [] - - relationships = [] - - try: - # Search for hostname in Shodan - search_query = f"hostname:{domain}" - url = f"{self.base_url}/shodan/host/search" - params = { - 'key': self.api_key, - 'query': search_query, - 'minify': True # Get minimal data to reduce bandwidth - } - - response = self.make_request(url, method="GET", params=params, target_indicator=domain) - - if not response or response.status_code != 200: - return [] - - data = response.json() - - if 'matches' not in data: - return [] - - # Process search results - for match in data['matches']: - ip_address = match.get('ip_str') - hostnames = match.get('hostnames', []) - - if ip_address and domain in hostnames: - raw_data = { - 'ip_address': ip_address, - 'hostnames': hostnames, - 'country': match.get('location', {}).get('country_name', ''), - 'city': match.get('location', {}).get('city', ''), - 'isp': match.get('isp', ''), - 'org': match.get('org', ''), - 'ports': match.get('ports', []), - 'last_update': match.get('last_update', '') - } - - relationships.append(( - domain, - ip_address, - 'a_record', # Domain resolves to IP - 0.8, - raw_data - )) - - self.log_relationship_discovery( - source_node=domain, - target_node=ip_address, - relationship_type='a_record', - confidence_score=0.8, - raw_data=raw_data, - discovery_method="shodan_hostname_search" - ) - - # Also create relationships to other hostnames on the same IP - for hostname in hostnames: - if hostname != domain and _is_valid_domain(hostname): - hostname_raw_data = { - 'shared_ip': ip_address, - 'all_hostnames': hostnames, - 'discovery_context': 'shared_hosting' - } - - relationships.append(( - domain, - hostname, - 'passive_dns', # Shared hosting relationship - 0.6, # Lower confidence for shared hosting - hostname_raw_data - )) - - self.log_relationship_discovery( - source_node=domain, - target_node=hostname, - relationship_type='passive_dns', - confidence_score=0.6, - raw_data=hostname_raw_data, - discovery_method="shodan_shared_hosting" - ) - - except json.JSONDecodeError as e: - self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}") - - return relationships + return [] def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: """ - Query Shodan for information about an IP address. - - Args: - ip: IP address to investigate - - Returns: - List of relationships discovered from Shodan IP data + Query Shodan for information about an IP address, with caching. """ if not _is_valid_ip(ip) or not self.is_available(): return [] + + cache_file = self._get_cache_file_path(ip) + cache_status = self._get_cache_status(cache_file) + + data = {} + + try: + if cache_status == "fresh": + data = self._load_from_cache(cache_file) + self.logger.logger.info(f"Using cached Shodan data for {ip}") + else: # "stale" or "not_found" + url = f"{self.base_url}/shodan/host/{ip}" + params = {'key': self.api_key} + response = self.make_request(url, method="GET", params=params, target_indicator=ip) + + if response and response.status_code == 200: + data = response.json() + self._save_to_cache(cache_file, data) + elif cache_status == "stale": + # If API fails on a stale cache, use the old data + data = self._load_from_cache(cache_file) + except requests.exceptions.RequestException as e: + self.logger.logger.error(f"Shodan API query failed for {ip}: {e}") + if cache_status == "stale": + data = self._load_from_cache(cache_file) + + if not data: + return [] + + return self._process_shodan_data(ip, data) + + def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: + """ + Process Shodan data to extract relationships. + """ relationships = [] - try: - # Query Shodan host information - url = f"{self.base_url}/shodan/host/{ip}" - params = {'key': self.api_key} - - response = self.make_request(url, method="GET", params=params, target_indicator=ip) - - if not response or response.status_code != 200: - return [] - - data = response.json() - - # Extract hostname relationships - hostnames = data.get('hostnames', []) - for hostname in hostnames: - if _is_valid_domain(hostname): - raw_data = { - 'ip_address': ip, - 'hostname': hostname, - 'country': data.get('country_name', ''), - 'city': data.get('city', ''), - 'isp': data.get('isp', ''), - 'org': data.get('org', ''), - 'asn': data.get('asn', ''), - 'ports': data.get('ports', []), - 'last_update': data.get('last_update', ''), - 'os': data.get('os', '') - } - - relationships.append(( - ip, - hostname, - 'a_record', # IP resolves to hostname - 0.8, - raw_data - )) - - self.log_relationship_discovery( - source_node=ip, - target_node=hostname, - relationship_type='a_record', - confidence_score=0.8, - raw_data=raw_data, - discovery_method="shodan_host_lookup" - ) - - # Extract ASN relationship if available - asn = data.get('asn') - if asn: - # Ensure the ASN starts with "AS" - if isinstance(asn, str) and asn.startswith('AS'): - asn_name = asn - asn_number = asn[2:] - else: - asn_name = f"AS{asn}" - asn_number = str(asn) - - asn_raw_data = { - 'ip_address': ip, - 'asn': asn_number, - 'isp': data.get('isp', ''), - 'org': data.get('org', '') - } - + # Extract hostname relationships + hostnames = data.get('hostnames', []) + for hostname in hostnames: + if _is_valid_domain(hostname): relationships.append(( ip, - asn_name, - 'asn_membership', - 0.7, - asn_raw_data + hostname, + 'a_record', + 0.8, + data )) - self.log_relationship_discovery( source_node=ip, - target_node=asn_name, - relationship_type='asn_membership', - confidence_score=0.7, - raw_data=asn_raw_data, - discovery_method="shodan_asn_lookup" + target_node=hostname, + relationship_type='a_record', + confidence_score=0.8, + raw_data=data, + discovery_method="shodan_host_lookup" ) - except json.JSONDecodeError as e: - self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}") - - return relationships - - def search_by_organization(self, org_name: str) -> List[Dict[str, Any]]: - """ - Search Shodan for hosts belonging to a specific organization. - - Args: - org_name: Organization name to search for - - Returns: - List of host information dictionaries - """ - if not self.is_available(): - return [] - - try: - search_query = f"org:\"{org_name}\"" - url = f"{self.base_url}/shodan/host/search" - params = { - 'key': self.api_key, - 'query': search_query, - 'minify': True - } - - response = self.make_request(url, method="GET", params=params, target_indicator=org_name) - - if response and response.status_code == 200: - data = response.json() - return data.get('matches', []) - - except Exception as e: - self.logger.logger.error(f"Error searching Shodan by organization {org_name}: {e}") - - return [] - - def get_host_services(self, ip: str) -> List[Dict[str, Any]]: - """ - Get service information for a specific IP address. - - Args: - ip: IP address to query - - Returns: - List of service information dictionaries - """ - if not _is_valid_ip(ip) or not self.is_available(): - return [] - - try: - url = f"{self.base_url}/shodan/host/{ip}" - params = {'key': self.api_key} - - response = self.make_request(url, method="GET", params=params, target_indicator=ip) - - if response and response.status_code == 200: - data = response.json() - return data.get('data', []) # Service banners - - except Exception as e: - self.logger.logger.error(f"Error getting Shodan services for IP {ip}: {e}") - - return [] \ No newline at end of file + # Extract ASN relationship + asn = data.get('asn') + if asn: + asn_name = f"AS{asn[2:]}" if isinstance(asn, str) and asn.startswith('AS') else f"AS{asn}" + relationships.append(( + ip, + asn_name, + 'asn_membership', + 0.7, + data + )) + self.log_relationship_discovery( + source_node=ip, + target_node=asn_name, + relationship_type='asn_membership', + confidence_score=0.7, + raw_data=data, + discovery_method="shodan_asn_lookup" + ) + + return relationships \ No newline at end of file