update the shodan to use only ip

This commit is contained in:
overcuriousity 2025-09-16 00:57:24 +02:00
parent f0f80be955
commit baa57bfac2
2 changed files with 153 additions and 255 deletions

View File

@ -491,7 +491,7 @@ class Scanner:
if provider_results is None: if provider_results is None:
provider_successful = False provider_successful = False
elif not self._is_stop_requested(): elif not self._is_stop_requested():
discovered, is_large_entity = self._process_provider_results_forensic( discovered, is_large_entity = self._process_provider_results(
target, provider, provider_results, node_attributes, depth target, provider, provider_results, node_attributes, depth
) )
if is_large_entity: if is_large_entity:
@ -711,7 +711,7 @@ class Scanner:
self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)") self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)")
def _process_provider_results_forensic(self, target: str, provider, results: List, def _process_provider_results(self, target: str, provider, results: List,
node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]: node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]:
"""Process provider results, returns (discovered_targets, is_large_entity).""" """Process provider results, returns (discovered_targets, is_large_entity)."""
provider_name = provider.get_name() provider_name = provider.get_name()
@ -741,8 +741,14 @@ class Scanner:
discovery_method=f"{provider_name}_query_depth_{current_depth}" discovery_method=f"{provider_name}_query_depth_{current_depth}"
) )
# Collect attributes for the source node
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source]) self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
# If the relationship is asn_membership, collect attributes for the target ASN node
if rel_type == 'asn_membership':
self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
if isinstance(rel_target, list): if isinstance(rel_target, list):
# If the target is a list, iterate and process each item # If the target is a list, iterate and process each item
for single_target in rel_target: for single_target in rel_target:
@ -909,18 +915,22 @@ class Scanner:
attributes.setdefault('related_domains_san', []).append(target) attributes.setdefault('related_domains_san', []).append(target)
elif provider_name == 'shodan': elif provider_name == 'shodan':
# This logic will now apply to the correct node (ASN or IP)
shodan_attributes = attributes.setdefault('shodan', {}) shodan_attributes = attributes.setdefault('shodan', {})
for key, value in raw_data.items(): for key, value in raw_data.items():
if key not in shodan_attributes or not shodan_attributes.get(key): if key not in shodan_attributes or not shodan_attributes.get(key):
shodan_attributes[key] = value shodan_attributes[key] = value
if _is_valid_ip(node_id):
if 'ports' in raw_data:
attributes['ports'] = raw_data['ports']
if 'os' in raw_data and raw_data['os']:
attributes['os'] = raw_data['os']
if rel_type == "asn_membership": if rel_type == "asn_membership":
attributes['asn'] = { # This is the key change: these attributes are for the target (the ASN),
'id': target, # not the source (the IP). We will add them to the ASN node later.
'description': raw_data.get('org', ''), pass
'isp': raw_data.get('isp', ''),
'country': raw_data.get('country', '')
}
record_type_name = rel_type record_type_name = rel_type
if record_type_name not in attributes: if record_type_name not in attributes:

View File

@ -1,15 +1,20 @@
# dnsrecon/providers/shodan_provider.py # dnsrecon/providers/shodan_provider.py
import json import json
import os
from pathlib import Path
from typing import List, Dict, Any, Tuple from typing import List, Dict, Any, Tuple
from datetime import datetime, timezone
import requests
from .base_provider import BaseProvider from .base_provider import BaseProvider
from utils.helpers import _is_valid_ip, _is_valid_domain from utils.helpers import _is_valid_ip, _is_valid_domain
class ShodanProvider(BaseProvider): class ShodanProvider(BaseProvider):
""" """
Provider for querying Shodan API for IP address and hostname information. Provider for querying Shodan API for IP address information.
Now uses session-specific API keys. Now uses session-specific API keys, is limited to IP-only queries, and includes caching.
""" """
def __init__(self, name=None, session_config=None): def __init__(self, name=None, session_config=None):
@ -23,6 +28,10 @@ class ShodanProvider(BaseProvider):
self.base_url = "https://api.shodan.io" self.base_url = "https://api.shodan.io"
self.api_key = self.config.get_api_key('shodan') self.api_key = self.config.get_api_key('shodan')
# Initialize cache directory
self.cache_dir = Path('cache') / 'shodan'
self.cache_dir.mkdir(parents=True, exist_ok=True)
def is_available(self) -> bool: def is_available(self) -> bool:
"""Check if Shodan provider is available (has valid API key in this session).""" """Check if Shodan provider is available (has valid API key in this session)."""
return self.api_key is not None and len(self.api_key.strip()) > 0 return self.api_key is not None and len(self.api_key.strip()) > 0
@ -33,7 +42,7 @@ class ShodanProvider(BaseProvider):
def get_display_name(self) -> str: def get_display_name(self) -> str:
"""Return the provider display name for the UI.""" """Return the provider display name for the UI."""
return "shodan" return "Shodan"
def requires_api_key(self) -> bool: def requires_api_key(self) -> bool:
"""Return True if the provider requires an API key.""" """Return True if the provider requires an API key."""
@ -41,267 +50,146 @@ class ShodanProvider(BaseProvider):
def get_eligibility(self) -> Dict[str, bool]: def get_eligibility(self) -> Dict[str, bool]:
"""Return a dictionary indicating if the provider can query domains and/or IPs.""" """Return a dictionary indicating if the provider can query domains and/or IPs."""
return {'domains': True, 'ips': True} return {'domains': False, 'ips': True}
def _get_cache_file_path(self, ip: str) -> Path:
"""Generate cache file path for an IP address."""
safe_ip = ip.replace('.', '_').replace(':', '_')
return self.cache_dir / f"{safe_ip}.json"
def _get_cache_status(self, cache_file_path: Path) -> str:
"""
Check cache status for an IP.
Returns: 'not_found', 'fresh', or 'stale'
"""
if not cache_file_path.exists():
return "not_found"
try:
with open(cache_file_path, 'r') as f:
cache_data = json.load(f)
last_query_str = cache_data.get("last_upstream_query")
if not last_query_str:
return "stale"
last_query = datetime.fromisoformat(last_query_str.replace('Z', '+00:00'))
hours_since_query = (datetime.now(timezone.utc) - last_query).total_seconds() / 3600
cache_timeout = self.config.cache_timeout_hours
if hours_since_query < cache_timeout:
return "fresh"
else:
return "stale"
except (json.JSONDecodeError, ValueError, KeyError):
return "stale"
def _load_from_cache(self, cache_file_path: Path) -> Dict[str, Any]:
"""Load Shodan data from a cache file."""
try:
with open(cache_file_path, 'r') as f:
return json.load(f)
except (json.JSONDecodeError, FileNotFoundError):
return {}
def _save_to_cache(self, cache_file_path: Path, data: Dict[str, Any]) -> None:
"""Save Shodan data to a cache file."""
try:
data['last_upstream_query'] = datetime.now(timezone.utc).isoformat()
with open(cache_file_path, 'w') as f:
json.dump(data, f, separators=(',', ':'))
except Exception as e:
self.logger.logger.warning(f"Failed to save Shodan cache for {cache_file_path.name}: {e}")
def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
""" """
Query Shodan for information about a domain. Domain queries are no longer supported for the Shodan provider.
Uses Shodan's hostname search to find associated IPs.
Args:
domain: Domain to investigate
Returns:
List of relationships discovered from Shodan data
""" """
if not _is_valid_domain(domain) or not self.is_available(): return []
return []
relationships = []
try:
# Search for hostname in Shodan
search_query = f"hostname:{domain}"
url = f"{self.base_url}/shodan/host/search"
params = {
'key': self.api_key,
'query': search_query,
'minify': True # Get minimal data to reduce bandwidth
}
response = self.make_request(url, method="GET", params=params, target_indicator=domain)
if not response or response.status_code != 200:
return []
data = response.json()
if 'matches' not in data:
return []
# Process search results
for match in data['matches']:
ip_address = match.get('ip_str')
hostnames = match.get('hostnames', [])
if ip_address and domain in hostnames:
raw_data = {
'ip_address': ip_address,
'hostnames': hostnames,
'country': match.get('location', {}).get('country_name', ''),
'city': match.get('location', {}).get('city', ''),
'isp': match.get('isp', ''),
'org': match.get('org', ''),
'ports': match.get('ports', []),
'last_update': match.get('last_update', '')
}
relationships.append((
domain,
ip_address,
'a_record', # Domain resolves to IP
0.8,
raw_data
))
self.log_relationship_discovery(
source_node=domain,
target_node=ip_address,
relationship_type='a_record',
confidence_score=0.8,
raw_data=raw_data,
discovery_method="shodan_hostname_search"
)
# Also create relationships to other hostnames on the same IP
for hostname in hostnames:
if hostname != domain and _is_valid_domain(hostname):
hostname_raw_data = {
'shared_ip': ip_address,
'all_hostnames': hostnames,
'discovery_context': 'shared_hosting'
}
relationships.append((
domain,
hostname,
'passive_dns', # Shared hosting relationship
0.6, # Lower confidence for shared hosting
hostname_raw_data
))
self.log_relationship_discovery(
source_node=domain,
target_node=hostname,
relationship_type='passive_dns',
confidence_score=0.6,
raw_data=hostname_raw_data,
discovery_method="shodan_shared_hosting"
)
except json.JSONDecodeError as e:
self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}")
return relationships
def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]: def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
""" """
Query Shodan for information about an IP address. Query Shodan for information about an IP address, with caching.
Args:
ip: IP address to investigate
Returns:
List of relationships discovered from Shodan IP data
""" """
if not _is_valid_ip(ip) or not self.is_available(): if not _is_valid_ip(ip) or not self.is_available():
return [] return []
cache_file = self._get_cache_file_path(ip)
cache_status = self._get_cache_status(cache_file)
data = {}
try:
if cache_status == "fresh":
data = self._load_from_cache(cache_file)
self.logger.logger.info(f"Using cached Shodan data for {ip}")
else: # "stale" or "not_found"
url = f"{self.base_url}/shodan/host/{ip}"
params = {'key': self.api_key}
response = self.make_request(url, method="GET", params=params, target_indicator=ip)
if response and response.status_code == 200:
data = response.json()
self._save_to_cache(cache_file, data)
elif cache_status == "stale":
# If API fails on a stale cache, use the old data
data = self._load_from_cache(cache_file)
except requests.exceptions.RequestException as e:
self.logger.logger.error(f"Shodan API query failed for {ip}: {e}")
if cache_status == "stale":
data = self._load_from_cache(cache_file)
if not data:
return []
return self._process_shodan_data(ip, data)
def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
"""
Process Shodan data to extract relationships.
"""
relationships = [] relationships = []
try: # Extract hostname relationships
# Query Shodan host information hostnames = data.get('hostnames', [])
url = f"{self.base_url}/shodan/host/{ip}" for hostname in hostnames:
params = {'key': self.api_key} if _is_valid_domain(hostname):
response = self.make_request(url, method="GET", params=params, target_indicator=ip)
if not response or response.status_code != 200:
return []
data = response.json()
# Extract hostname relationships
hostnames = data.get('hostnames', [])
for hostname in hostnames:
if _is_valid_domain(hostname):
raw_data = {
'ip_address': ip,
'hostname': hostname,
'country': data.get('country_name', ''),
'city': data.get('city', ''),
'isp': data.get('isp', ''),
'org': data.get('org', ''),
'asn': data.get('asn', ''),
'ports': data.get('ports', []),
'last_update': data.get('last_update', ''),
'os': data.get('os', '')
}
relationships.append((
ip,
hostname,
'a_record', # IP resolves to hostname
0.8,
raw_data
))
self.log_relationship_discovery(
source_node=ip,
target_node=hostname,
relationship_type='a_record',
confidence_score=0.8,
raw_data=raw_data,
discovery_method="shodan_host_lookup"
)
# Extract ASN relationship if available
asn = data.get('asn')
if asn:
# Ensure the ASN starts with "AS"
if isinstance(asn, str) and asn.startswith('AS'):
asn_name = asn
asn_number = asn[2:]
else:
asn_name = f"AS{asn}"
asn_number = str(asn)
asn_raw_data = {
'ip_address': ip,
'asn': asn_number,
'isp': data.get('isp', ''),
'org': data.get('org', '')
}
relationships.append(( relationships.append((
ip, ip,
asn_name, hostname,
'asn_membership', 'a_record',
0.7, 0.8,
asn_raw_data data
)) ))
self.log_relationship_discovery( self.log_relationship_discovery(
source_node=ip, source_node=ip,
target_node=asn_name, target_node=hostname,
relationship_type='asn_membership', relationship_type='a_record',
confidence_score=0.7, confidence_score=0.8,
raw_data=asn_raw_data, raw_data=data,
discovery_method="shodan_asn_lookup" discovery_method="shodan_host_lookup"
) )
except json.JSONDecodeError as e: # Extract ASN relationship
self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}") asn = data.get('asn')
if asn:
asn_name = f"AS{asn[2:]}" if isinstance(asn, str) and asn.startswith('AS') else f"AS{asn}"
relationships.append((
ip,
asn_name,
'asn_membership',
0.7,
data
))
self.log_relationship_discovery(
source_node=ip,
target_node=asn_name,
relationship_type='asn_membership',
confidence_score=0.7,
raw_data=data,
discovery_method="shodan_asn_lookup"
)
return relationships return relationships
def search_by_organization(self, org_name: str) -> List[Dict[str, Any]]:
"""
Search Shodan for hosts belonging to a specific organization.
Args:
org_name: Organization name to search for
Returns:
List of host information dictionaries
"""
if not self.is_available():
return []
try:
search_query = f"org:\"{org_name}\""
url = f"{self.base_url}/shodan/host/search"
params = {
'key': self.api_key,
'query': search_query,
'minify': True
}
response = self.make_request(url, method="GET", params=params, target_indicator=org_name)
if response and response.status_code == 200:
data = response.json()
return data.get('matches', [])
except Exception as e:
self.logger.logger.error(f"Error searching Shodan by organization {org_name}: {e}")
return []
def get_host_services(self, ip: str) -> List[Dict[str, Any]]:
"""
Get service information for a specific IP address.
Args:
ip: IP address to query
Returns:
List of service information dictionaries
"""
if not _is_valid_ip(ip) or not self.is_available():
return []
try:
url = f"{self.base_url}/shodan/host/{ip}"
params = {'key': self.api_key}
response = self.make_request(url, method="GET", params=params, target_indicator=ip)
if response and response.status_code == 200:
data = response.json()
return data.get('data', []) # Service banners
except Exception as e:
self.logger.logger.error(f"Error getting Shodan services for IP {ip}: {e}")
return []