adjustments to shodan & export manager

This commit is contained in:
overcuriousity
2025-09-18 19:22:58 +02:00
parent d4081e1a32
commit cbfd40ee98
2 changed files with 707 additions and 124 deletions

View File

@@ -117,6 +117,9 @@ class ShodanProvider(BaseProvider):
Returns:
ProviderResult containing discovered relationships and attributes
Raises:
Exception: For temporary failures that should be retried (timeouts, 502/503 errors, connection issues)
"""
if not _is_valid_ip(ip) or not self.is_available():
return ProviderResult()
@@ -129,50 +132,117 @@ class ShodanProvider(BaseProvider):
cache_file = self._get_cache_file_path(normalized_ip)
cache_status = self._get_cache_status(cache_file)
result = ProviderResult()
if cache_status == "fresh":
self.logger.logger.debug(f"Using fresh cache for Shodan query: {normalized_ip}")
return self._load_from_cache(cache_file)
# Need to query API
self.logger.logger.debug(f"Querying Shodan API for: {normalized_ip}")
url = f"{self.base_url}/shodan/host/{normalized_ip}"
params = {'key': self.api_key}
try:
if cache_status == "fresh":
result = self._load_from_cache(cache_file)
self.logger.logger.info(f"Using cached Shodan data for {normalized_ip}")
else: # "stale" or "not_found"
url = f"{self.base_url}/shodan/host/{normalized_ip}"
params = {'key': self.api_key}
response = self.make_request(url, method="GET", params=params, target_indicator=normalized_ip)
if response and response.status_code == 200:
data = response.json()
# Process the data into ProviderResult BEFORE caching
result = self._process_shodan_data(normalized_ip, data)
self._save_to_cache(cache_file, result, data) # Save both result and raw data
elif response and response.status_code == 404:
# Handle all 404s as successful "no information available" responses
# Shodan returns 404 when no information is available for an IP
self.logger.logger.debug(f"Shodan has no information for {normalized_ip}")
result = ProviderResult() # Empty but successful result
# Cache the empty result to avoid repeated queries
self._save_to_cache(cache_file, result, {'error': 'No information available'})
elif cache_status == "stale":
# If API fails on a stale cache, use the old data
result = self._load_from_cache(cache_file)
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to API failure")
response = self.make_request(url, method="GET", params=params, target_indicator=normalized_ip)
if not response:
# Connection failed - use stale cache if available, otherwise retry
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to connection failure")
return self._load_from_cache(cache_file)
else:
# Other HTTP error codes should be treated as failures
status_code = response.status_code if response else "No response"
raise requests.exceptions.RequestException(f"Shodan API returned HTTP {status_code}")
except requests.exceptions.RequestException as e:
self.logger.logger.debug(f"Shodan API error for {normalized_ip}: {e}")
if cache_status == "stale":
# Use stale cache if available
result = self._load_from_cache(cache_file)
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to API error")
raise requests.exceptions.RequestException("No response from Shodan API - should retry")
if response.status_code == 200:
self.logger.logger.debug(f"Shodan returned data for {normalized_ip}")
data = response.json()
result = self._process_shodan_data(normalized_ip, data)
self._save_to_cache(cache_file, result, data)
return result
elif response.status_code == 404:
# 404 = "no information available" - successful but empty result, don't retry
self.logger.logger.debug(f"Shodan has no information for {normalized_ip} (404)")
result = ProviderResult() # Empty but successful result
# Cache the empty result to avoid repeated queries
self._save_to_cache(cache_file, result, {'shodan_status': 'no_information', 'status_code': 404})
return result
elif response.status_code in [401, 403]:
# Authentication/authorization errors - permanent failures, don't retry
self.logger.logger.error(f"Shodan API authentication failed for {normalized_ip} (HTTP {response.status_code})")
return ProviderResult() # Empty result, don't retry
elif response.status_code in [429]:
# Rate limiting - should be handled by rate limiter, but if we get here, retry
self.logger.logger.warning(f"Shodan API rate limited for {normalized_ip} (HTTP {response.status_code})")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to rate limiting")
return self._load_from_cache(cache_file)
else:
raise requests.exceptions.RequestException(f"Shodan API rate limited (HTTP {response.status_code}) - should retry")
elif response.status_code in [500, 502, 503, 504]:
# Server errors - temporary failures that should be retried
self.logger.logger.warning(f"Shodan API server error for {normalized_ip} (HTTP {response.status_code})")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to server error")
return self._load_from_cache(cache_file)
else:
raise requests.exceptions.RequestException(f"Shodan API server error (HTTP {response.status_code}) - should retry")
else:
# FIXED: Only re-raise for actual network/timeout errors, not 404s
# 404s are already handled above as successful empty results
raise e
# Other HTTP error codes - treat as temporary failures
self.logger.logger.warning(f"Shodan API returned unexpected status {response.status_code} for {normalized_ip}")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to unexpected API error")
return self._load_from_cache(cache_file)
else:
raise requests.exceptions.RequestException(f"Shodan API error (HTTP {response.status_code}) - should retry")
except requests.exceptions.Timeout:
# Timeout errors - should be retried
self.logger.logger.warning(f"Shodan API timeout for {normalized_ip}")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to timeout")
return self._load_from_cache(cache_file)
else:
raise # Re-raise timeout for retry
return result
except requests.exceptions.ConnectionError:
# Connection errors - should be retried
self.logger.logger.warning(f"Shodan API connection error for {normalized_ip}")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to connection error")
return self._load_from_cache(cache_file)
else:
raise # Re-raise connection error for retry
except requests.exceptions.RequestException:
# Other request exceptions - should be retried
self.logger.logger.warning(f"Shodan API request exception for {normalized_ip}")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to request exception")
return self._load_from_cache(cache_file)
else:
raise # Re-raise request exception for retry
except json.JSONDecodeError:
# JSON parsing error on 200 response - treat as temporary failure
self.logger.logger.error(f"Invalid JSON response from Shodan for {normalized_ip}")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to JSON parsing error")
return self._load_from_cache(cache_file)
else:
raise requests.exceptions.RequestException("Invalid JSON response from Shodan - should retry")
except Exception as e:
# Unexpected exceptions - log and treat as temporary failures
self.logger.logger.error(f"Unexpected exception in Shodan query for {normalized_ip}: {e}")
if cache_status == "stale":
self.logger.logger.info(f"Using stale cache for {normalized_ip} due to unexpected exception")
return self._load_from_cache(cache_file)
else:
raise requests.exceptions.RequestException(f"Unexpected error in Shodan query: {e}") from e
def _load_from_cache(self, cache_file_path: Path) -> ProviderResult:
"""Load processed Shodan data from a cache file."""