update the shodan to use only ip

This commit is contained in:
overcuriousity 2025-09-16 00:57:24 +02:00
parent f0f80be955
commit baa57bfac2
2 changed files with 153 additions and 255 deletions

View File

@ -491,7 +491,7 @@ class Scanner:
if provider_results is None:
provider_successful = False
elif not self._is_stop_requested():
discovered, is_large_entity = self._process_provider_results_forensic(
discovered, is_large_entity = self._process_provider_results(
target, provider, provider_results, node_attributes, depth
)
if is_large_entity:
@ -711,7 +711,7 @@ class Scanner:
self.logger.logger.info(f"Provider state updated: {target} -> {provider_name} -> {status} ({results_count} results)")
def _process_provider_results_forensic(self, target: str, provider, results: List,
def _process_provider_results(self, target: str, provider, results: List,
node_attributes: Dict, current_depth: int) -> Tuple[Set[str], bool]:
"""Process provider results, returns (discovered_targets, is_large_entity)."""
provider_name = provider.get_name()
@ -741,8 +741,14 @@ class Scanner:
discovery_method=f"{provider_name}_query_depth_{current_depth}"
)
# Collect attributes for the source node
self._collect_node_attributes(source, provider_name, rel_type, rel_target, raw_data, node_attributes[source])
# If the relationship is asn_membership, collect attributes for the target ASN node
if rel_type == 'asn_membership':
self._collect_node_attributes(rel_target, provider_name, rel_type, source, raw_data, node_attributes[rel_target])
if isinstance(rel_target, list):
# If the target is a list, iterate and process each item
for single_target in rel_target:
@ -909,18 +915,22 @@ class Scanner:
attributes.setdefault('related_domains_san', []).append(target)
elif provider_name == 'shodan':
# This logic will now apply to the correct node (ASN or IP)
shodan_attributes = attributes.setdefault('shodan', {})
for key, value in raw_data.items():
if key not in shodan_attributes or not shodan_attributes.get(key):
shodan_attributes[key] = value
if _is_valid_ip(node_id):
if 'ports' in raw_data:
attributes['ports'] = raw_data['ports']
if 'os' in raw_data and raw_data['os']:
attributes['os'] = raw_data['os']
if rel_type == "asn_membership":
attributes['asn'] = {
'id': target,
'description': raw_data.get('org', ''),
'isp': raw_data.get('isp', ''),
'country': raw_data.get('country', '')
}
# This is the key change: these attributes are for the target (the ASN),
# not the source (the IP). We will add them to the ASN node later.
pass
record_type_name = rel_type
if record_type_name not in attributes:

View File

@ -1,15 +1,20 @@
# dnsrecon/providers/shodan_provider.py
import json
import os
from pathlib import Path
from typing import List, Dict, Any, Tuple
from datetime import datetime, timezone
import requests
from .base_provider import BaseProvider
from utils.helpers import _is_valid_ip, _is_valid_domain
class ShodanProvider(BaseProvider):
"""
Provider for querying Shodan API for IP address and hostname information.
Now uses session-specific API keys.
Provider for querying Shodan API for IP address information.
Now uses session-specific API keys, is limited to IP-only queries, and includes caching.
"""
def __init__(self, name=None, session_config=None):
@ -23,6 +28,10 @@ class ShodanProvider(BaseProvider):
self.base_url = "https://api.shodan.io"
self.api_key = self.config.get_api_key('shodan')
# Initialize cache directory
self.cache_dir = Path('cache') / 'shodan'
self.cache_dir.mkdir(parents=True, exist_ok=True)
def is_available(self) -> bool:
"""Check if Shodan provider is available (has valid API key in this session)."""
return self.api_key is not None and len(self.api_key.strip()) > 0
@ -33,7 +42,7 @@ class ShodanProvider(BaseProvider):
def get_display_name(self) -> str:
"""Return the provider display name for the UI."""
return "shodan"
return "Shodan"
def requires_api_key(self) -> bool:
"""Return True if the provider requires an API key."""
@ -41,267 +50,146 @@ class ShodanProvider(BaseProvider):
def get_eligibility(self) -> Dict[str, bool]:
"""Return a dictionary indicating if the provider can query domains and/or IPs."""
return {'domains': True, 'ips': True}
return {'domains': False, 'ips': True}
def _get_cache_file_path(self, ip: str) -> Path:
"""Generate cache file path for an IP address."""
safe_ip = ip.replace('.', '_').replace(':', '_')
return self.cache_dir / f"{safe_ip}.json"
def _get_cache_status(self, cache_file_path: Path) -> str:
"""
Check cache status for an IP.
Returns: 'not_found', 'fresh', or 'stale'
"""
if not cache_file_path.exists():
return "not_found"
try:
with open(cache_file_path, 'r') as f:
cache_data = json.load(f)
last_query_str = cache_data.get("last_upstream_query")
if not last_query_str:
return "stale"
last_query = datetime.fromisoformat(last_query_str.replace('Z', '+00:00'))
hours_since_query = (datetime.now(timezone.utc) - last_query).total_seconds() / 3600
cache_timeout = self.config.cache_timeout_hours
if hours_since_query < cache_timeout:
return "fresh"
else:
return "stale"
except (json.JSONDecodeError, ValueError, KeyError):
return "stale"
def _load_from_cache(self, cache_file_path: Path) -> Dict[str, Any]:
"""Load Shodan data from a cache file."""
try:
with open(cache_file_path, 'r') as f:
return json.load(f)
except (json.JSONDecodeError, FileNotFoundError):
return {}
def _save_to_cache(self, cache_file_path: Path, data: Dict[str, Any]) -> None:
"""Save Shodan data to a cache file."""
try:
data['last_upstream_query'] = datetime.now(timezone.utc).isoformat()
with open(cache_file_path, 'w') as f:
json.dump(data, f, separators=(',', ':'))
except Exception as e:
self.logger.logger.warning(f"Failed to save Shodan cache for {cache_file_path.name}: {e}")
def query_domain(self, domain: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
"""
Query Shodan for information about a domain.
Uses Shodan's hostname search to find associated IPs.
Args:
domain: Domain to investigate
Returns:
List of relationships discovered from Shodan data
Domain queries are no longer supported for the Shodan provider.
"""
if not _is_valid_domain(domain) or not self.is_available():
return []
relationships = []
try:
# Search for hostname in Shodan
search_query = f"hostname:{domain}"
url = f"{self.base_url}/shodan/host/search"
params = {
'key': self.api_key,
'query': search_query,
'minify': True # Get minimal data to reduce bandwidth
}
response = self.make_request(url, method="GET", params=params, target_indicator=domain)
if not response or response.status_code != 200:
return []
data = response.json()
if 'matches' not in data:
return []
# Process search results
for match in data['matches']:
ip_address = match.get('ip_str')
hostnames = match.get('hostnames', [])
if ip_address and domain in hostnames:
raw_data = {
'ip_address': ip_address,
'hostnames': hostnames,
'country': match.get('location', {}).get('country_name', ''),
'city': match.get('location', {}).get('city', ''),
'isp': match.get('isp', ''),
'org': match.get('org', ''),
'ports': match.get('ports', []),
'last_update': match.get('last_update', '')
}
relationships.append((
domain,
ip_address,
'a_record', # Domain resolves to IP
0.8,
raw_data
))
self.log_relationship_discovery(
source_node=domain,
target_node=ip_address,
relationship_type='a_record',
confidence_score=0.8,
raw_data=raw_data,
discovery_method="shodan_hostname_search"
)
# Also create relationships to other hostnames on the same IP
for hostname in hostnames:
if hostname != domain and _is_valid_domain(hostname):
hostname_raw_data = {
'shared_ip': ip_address,
'all_hostnames': hostnames,
'discovery_context': 'shared_hosting'
}
relationships.append((
domain,
hostname,
'passive_dns', # Shared hosting relationship
0.6, # Lower confidence for shared hosting
hostname_raw_data
))
self.log_relationship_discovery(
source_node=domain,
target_node=hostname,
relationship_type='passive_dns',
confidence_score=0.6,
raw_data=hostname_raw_data,
discovery_method="shodan_shared_hosting"
)
except json.JSONDecodeError as e:
self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}")
return relationships
return []
def query_ip(self, ip: str) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
"""
Query Shodan for information about an IP address.
Args:
ip: IP address to investigate
Returns:
List of relationships discovered from Shodan IP data
Query Shodan for information about an IP address, with caching.
"""
if not _is_valid_ip(ip) or not self.is_available():
return []
cache_file = self._get_cache_file_path(ip)
cache_status = self._get_cache_status(cache_file)
data = {}
try:
if cache_status == "fresh":
data = self._load_from_cache(cache_file)
self.logger.logger.info(f"Using cached Shodan data for {ip}")
else: # "stale" or "not_found"
url = f"{self.base_url}/shodan/host/{ip}"
params = {'key': self.api_key}
response = self.make_request(url, method="GET", params=params, target_indicator=ip)
if response and response.status_code == 200:
data = response.json()
self._save_to_cache(cache_file, data)
elif cache_status == "stale":
# If API fails on a stale cache, use the old data
data = self._load_from_cache(cache_file)
except requests.exceptions.RequestException as e:
self.logger.logger.error(f"Shodan API query failed for {ip}: {e}")
if cache_status == "stale":
data = self._load_from_cache(cache_file)
if not data:
return []
return self._process_shodan_data(ip, data)
def _process_shodan_data(self, ip: str, data: Dict[str, Any]) -> List[Tuple[str, str, str, float, Dict[str, Any]]]:
"""
Process Shodan data to extract relationships.
"""
relationships = []
try:
# Query Shodan host information
url = f"{self.base_url}/shodan/host/{ip}"
params = {'key': self.api_key}
response = self.make_request(url, method="GET", params=params, target_indicator=ip)
if not response or response.status_code != 200:
return []
data = response.json()
# Extract hostname relationships
hostnames = data.get('hostnames', [])
for hostname in hostnames:
if _is_valid_domain(hostname):
raw_data = {
'ip_address': ip,
'hostname': hostname,
'country': data.get('country_name', ''),
'city': data.get('city', ''),
'isp': data.get('isp', ''),
'org': data.get('org', ''),
'asn': data.get('asn', ''),
'ports': data.get('ports', []),
'last_update': data.get('last_update', ''),
'os': data.get('os', '')
}
relationships.append((
ip,
hostname,
'a_record', # IP resolves to hostname
0.8,
raw_data
))
self.log_relationship_discovery(
source_node=ip,
target_node=hostname,
relationship_type='a_record',
confidence_score=0.8,
raw_data=raw_data,
discovery_method="shodan_host_lookup"
)
# Extract ASN relationship if available
asn = data.get('asn')
if asn:
# Ensure the ASN starts with "AS"
if isinstance(asn, str) and asn.startswith('AS'):
asn_name = asn
asn_number = asn[2:]
else:
asn_name = f"AS{asn}"
asn_number = str(asn)
asn_raw_data = {
'ip_address': ip,
'asn': asn_number,
'isp': data.get('isp', ''),
'org': data.get('org', '')
}
# Extract hostname relationships
hostnames = data.get('hostnames', [])
for hostname in hostnames:
if _is_valid_domain(hostname):
relationships.append((
ip,
asn_name,
'asn_membership',
0.7,
asn_raw_data
hostname,
'a_record',
0.8,
data
))
self.log_relationship_discovery(
source_node=ip,
target_node=asn_name,
relationship_type='asn_membership',
confidence_score=0.7,
raw_data=asn_raw_data,
discovery_method="shodan_asn_lookup"
target_node=hostname,
relationship_type='a_record',
confidence_score=0.8,
raw_data=data,
discovery_method="shodan_host_lookup"
)
except json.JSONDecodeError as e:
self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}")
# Extract ASN relationship
asn = data.get('asn')
if asn:
asn_name = f"AS{asn[2:]}" if isinstance(asn, str) and asn.startswith('AS') else f"AS{asn}"
relationships.append((
ip,
asn_name,
'asn_membership',
0.7,
data
))
self.log_relationship_discovery(
source_node=ip,
target_node=asn_name,
relationship_type='asn_membership',
confidence_score=0.7,
raw_data=data,
discovery_method="shodan_asn_lookup"
)
return relationships
def search_by_organization(self, org_name: str) -> List[Dict[str, Any]]:
"""
Search Shodan for hosts belonging to a specific organization.
Args:
org_name: Organization name to search for
Returns:
List of host information dictionaries
"""
if not self.is_available():
return []
try:
search_query = f"org:\"{org_name}\""
url = f"{self.base_url}/shodan/host/search"
params = {
'key': self.api_key,
'query': search_query,
'minify': True
}
response = self.make_request(url, method="GET", params=params, target_indicator=org_name)
if response and response.status_code == 200:
data = response.json()
return data.get('matches', [])
except Exception as e:
self.logger.logger.error(f"Error searching Shodan by organization {org_name}: {e}")
return []
def get_host_services(self, ip: str) -> List[Dict[str, Any]]:
"""
Get service information for a specific IP address.
Args:
ip: IP address to query
Returns:
List of service information dictionaries
"""
if not _is_valid_ip(ip) or not self.is_available():
return []
try:
url = f"{self.base_url}/shodan/host/{ip}"
params = {'key': self.api_key}
response = self.make_request(url, method="GET", params=params, target_indicator=ip)
if response and response.status_code == 200:
data = response.json()
return data.get('data', []) # Service banners
except Exception as e:
self.logger.logger.error(f"Error getting Shodan services for IP {ip}: {e}")
return []