it
This commit is contained in:
@@ -163,11 +163,11 @@ class BaseProvider(ABC):
|
||||
target_indicator: str = "",
|
||||
max_retries: int = 3) -> Optional[requests.Response]:
|
||||
"""
|
||||
Make a rate-limited HTTP request with forensic logging and retry logic.
|
||||
Now supports cancellation via stop_event from scanner.
|
||||
Make a rate-limited HTTP request with aggressive stop signal handling.
|
||||
Terminates immediately when stop is requested, including during retries.
|
||||
"""
|
||||
# Check for cancellation before starting
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
if self._is_stop_requested():
|
||||
print(f"Request cancelled before start: {url}")
|
||||
return None
|
||||
|
||||
@@ -188,21 +188,24 @@ class BaseProvider(ABC):
|
||||
response.headers = cached_data['headers']
|
||||
return response
|
||||
|
||||
for attempt in range(max_retries + 1):
|
||||
# Check for cancellation before each attempt
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
# Determine effective max_retries based on stop signal
|
||||
effective_max_retries = 0 if self._is_stop_requested() else max_retries
|
||||
last_exception = None
|
||||
|
||||
for attempt in range(effective_max_retries + 1):
|
||||
# AGGRESSIVE: Check for cancellation before each attempt
|
||||
if self._is_stop_requested():
|
||||
print(f"Request cancelled during attempt {attempt + 1}: {url}")
|
||||
return None
|
||||
|
||||
# Apply rate limiting (but reduce wait time if cancellation is requested)
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
break
|
||||
|
||||
self.rate_limiter.wait_if_needed()
|
||||
# Apply rate limiting with cancellation awareness
|
||||
if not self._wait_with_cancellation_check():
|
||||
print(f"Request cancelled during rate limiting: {url}")
|
||||
return None
|
||||
|
||||
# Check again after rate limiting
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
print(f"Request cancelled after rate limiting: {url}")
|
||||
# AGGRESSIVE: Final check before making HTTP request
|
||||
if self._is_stop_requested():
|
||||
print(f"Request cancelled before HTTP call: {url}")
|
||||
return None
|
||||
|
||||
start_time = time.time()
|
||||
@@ -219,10 +222,11 @@ class BaseProvider(ABC):
|
||||
|
||||
print(f"Making {method} request to: {url} (attempt {attempt + 1})")
|
||||
|
||||
# Use shorter timeout if termination is requested
|
||||
# AGGRESSIVE: Use much shorter timeout if termination is requested
|
||||
request_timeout = self.timeout
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
request_timeout = min(5, self.timeout) # Max 5 seconds if termination requested
|
||||
if self._is_stop_requested():
|
||||
request_timeout = 2 # Max 2 seconds if termination requested
|
||||
print(f"Stop requested - using short timeout: {request_timeout}s")
|
||||
|
||||
# Make request
|
||||
if method.upper() == "GET":
|
||||
@@ -271,28 +275,28 @@ class BaseProvider(ABC):
|
||||
error = str(e)
|
||||
self.failed_requests += 1
|
||||
print(f"Request failed (attempt {attempt + 1}): {error}")
|
||||
last_exception = e
|
||||
|
||||
# Check for cancellation before retrying
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
print(f"Request cancelled, not retrying: {url}")
|
||||
# AGGRESSIVE: Immediately abort retries if stop requested
|
||||
if self._is_stop_requested():
|
||||
print(f"Stop requested - aborting retries for: {url}")
|
||||
break
|
||||
|
||||
# Check if we should retry
|
||||
if attempt < max_retries and self._should_retry(e):
|
||||
backoff_time = (2 ** attempt) * 1 # Exponential backoff: 1s, 2s, 4s
|
||||
print(f"Retrying in {backoff_time} seconds...")
|
||||
# Check if we should retry (but only if stop not requested)
|
||||
if attempt < effective_max_retries and self._should_retry(e):
|
||||
# Use a longer, more respectful backoff for 429 errors
|
||||
if isinstance(e, requests.exceptions.HTTPError) and e.response and e.response.status_code == 429:
|
||||
# Start with a 10-second backoff and increase exponentially
|
||||
backoff_time = 10 * (2 ** attempt)
|
||||
print(f"Rate limit hit. Retrying in {backoff_time} seconds...")
|
||||
else:
|
||||
backoff_time = min(1.0, (2 ** attempt) * 0.5) # Shorter backoff for other errors
|
||||
print(f"Retrying in {backoff_time} seconds...")
|
||||
|
||||
# Shorter backoff if termination is requested
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
backoff_time = min(0.5, backoff_time)
|
||||
|
||||
# Sleep with cancellation checking
|
||||
sleep_start = time.time()
|
||||
while time.time() - sleep_start < backoff_time:
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
print(f"Request cancelled during backoff: {url}")
|
||||
return None
|
||||
time.sleep(0.1) # Check every 100ms
|
||||
# AGGRESSIVE: Much shorter backoff and more frequent checking
|
||||
if not self._sleep_with_cancellation_check(backoff_time):
|
||||
print(f"Stop requested during backoff - aborting: {url}")
|
||||
return None
|
||||
continue
|
||||
else:
|
||||
break
|
||||
@@ -301,6 +305,7 @@ class BaseProvider(ABC):
|
||||
error = f"Unexpected error: {str(e)}"
|
||||
self.failed_requests += 1
|
||||
print(f"Unexpected error: {error}")
|
||||
last_exception = e
|
||||
break
|
||||
|
||||
# All attempts failed - log and return None
|
||||
@@ -316,8 +321,57 @@ class BaseProvider(ABC):
|
||||
target_indicator=target_indicator
|
||||
)
|
||||
|
||||
if error and last_exception:
|
||||
raise last_exception
|
||||
|
||||
return None
|
||||
|
||||
def _is_stop_requested(self) -> bool:
|
||||
"""
|
||||
Enhanced stop signal checking that handles both local and Redis-based signals.
|
||||
"""
|
||||
if hasattr(self, '_stop_event') and self._stop_event and self._stop_event.is_set():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _wait_with_cancellation_check(self) -> bool:
|
||||
"""
|
||||
Wait for rate limiting while aggressively checking for cancellation.
|
||||
Returns False if cancelled during wait.
|
||||
"""
|
||||
current_time = time.time()
|
||||
time_since_last = current_time - self.rate_limiter.last_request_time
|
||||
|
||||
if time_since_last < self.rate_limiter.min_interval:
|
||||
sleep_time = self.rate_limiter.min_interval - time_since_last
|
||||
if not self._sleep_with_cancellation_check(sleep_time):
|
||||
return False
|
||||
|
||||
self.rate_limiter.last_request_time = time.time()
|
||||
return True
|
||||
|
||||
def _sleep_with_cancellation_check(self, sleep_time: float) -> bool:
|
||||
"""
|
||||
Sleep for the specified time while aggressively checking for cancellation.
|
||||
|
||||
Args:
|
||||
sleep_time: Time to sleep in seconds
|
||||
|
||||
Returns:
|
||||
bool: True if sleep completed, False if cancelled
|
||||
"""
|
||||
sleep_start = time.time()
|
||||
check_interval = 0.05 # Check every 50ms for aggressive responsiveness
|
||||
|
||||
while time.time() - sleep_start < sleep_time:
|
||||
if self._is_stop_requested():
|
||||
return False
|
||||
remaining_time = sleep_time - (time.time() - sleep_start)
|
||||
time.sleep(min(check_interval, remaining_time))
|
||||
|
||||
return True
|
||||
|
||||
def set_stop_event(self, stop_event: threading.Event) -> None:
|
||||
"""
|
||||
Set the stop event for this provider to enable cancellation.
|
||||
@@ -337,15 +391,15 @@ class BaseProvider(ABC):
|
||||
Returns:
|
||||
True if the request should be retried
|
||||
"""
|
||||
# Retry on connection errors, timeouts, and 5xx server errors
|
||||
# Retry on connection errors and timeouts
|
||||
if isinstance(exception, (requests.exceptions.ConnectionError,
|
||||
requests.exceptions.Timeout)):
|
||||
return True
|
||||
|
||||
if isinstance(exception, requests.exceptions.HTTPError):
|
||||
if hasattr(exception, 'response') and exception.response:
|
||||
# Retry on server errors (5xx) but not client errors (4xx)
|
||||
return exception.response.status_code >= 500
|
||||
# Retry on server errors (5xx) AND on rate-limiting errors (429)
|
||||
return exception.response.status_code >= 500 or exception.response.status_code == 429
|
||||
|
||||
return False
|
||||
|
||||
|
||||
@@ -157,8 +157,7 @@ class CrtShProvider(BaseProvider):
|
||||
def query_domain(self, domain: str) -> List[Tuple[str, str, RelationshipType, float, Dict[str, Any]]]:
|
||||
"""
|
||||
Query crt.sh for certificates containing the domain.
|
||||
Creates domain-to-domain relationships and stores certificate data as metadata.
|
||||
Now supports early termination via stop_event.
|
||||
Enhanced with more frequent stop signal checking for reliable termination.
|
||||
"""
|
||||
if not _is_valid_domain(domain):
|
||||
return []
|
||||
@@ -197,10 +196,10 @@ class CrtShProvider(BaseProvider):
|
||||
domain_certificates = {}
|
||||
all_discovered_domains = set()
|
||||
|
||||
# Process certificates and group by domain (with cancellation checks)
|
||||
# Process certificates with enhanced cancellation checking
|
||||
for i, cert_data in enumerate(certificates):
|
||||
# Check for cancellation every 10 certificates
|
||||
if i % 10 == 0 and self._stop_event and self._stop_event.is_set():
|
||||
# Check for cancellation every 5 certificates instead of 10 for faster response
|
||||
if i % 5 == 0 and self._stop_event and self._stop_event.is_set():
|
||||
print(f"CrtSh processing cancelled at certificate {i} for domain: {domain}")
|
||||
break
|
||||
|
||||
@@ -209,6 +208,11 @@ class CrtShProvider(BaseProvider):
|
||||
|
||||
# Add all domains from this certificate to our tracking
|
||||
for cert_domain in cert_domains:
|
||||
# Additional stop check during domain processing
|
||||
if i % 20 == 0 and self._stop_event and self._stop_event.is_set():
|
||||
print(f"CrtSh domain processing cancelled for domain: {domain}")
|
||||
break
|
||||
|
||||
if not _is_valid_domain(cert_domain):
|
||||
continue
|
||||
|
||||
@@ -226,13 +230,13 @@ class CrtShProvider(BaseProvider):
|
||||
print(f"CrtSh query cancelled before relationship creation for domain: {domain}")
|
||||
return []
|
||||
|
||||
# Create relationships from query domain to ALL discovered domains
|
||||
for discovered_domain in all_discovered_domains:
|
||||
# Create relationships from query domain to ALL discovered domains with stop checking
|
||||
for i, discovered_domain in enumerate(all_discovered_domains):
|
||||
if discovered_domain == domain:
|
||||
continue # Skip self-relationships
|
||||
|
||||
# Check for cancellation during relationship creation
|
||||
if self._stop_event and self._stop_event.is_set():
|
||||
# Check for cancellation every 10 relationships
|
||||
if i % 10 == 0 and self._stop_event and self._stop_event.is_set():
|
||||
print(f"CrtSh relationship creation cancelled for domain: {domain}")
|
||||
break
|
||||
|
||||
@@ -284,8 +288,6 @@ class CrtShProvider(BaseProvider):
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.logger.error(f"Failed to parse JSON response from crt.sh: {e}")
|
||||
except Exception as e:
|
||||
self.logger.logger.error(f"Error querying crt.sh for {domain}: {e}")
|
||||
|
||||
return relationships
|
||||
|
||||
|
||||
@@ -134,8 +134,6 @@ class ShodanProvider(BaseProvider):
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}")
|
||||
except Exception as e:
|
||||
self.logger.logger.error(f"Error querying Shodan for domain {domain}: {e}")
|
||||
|
||||
return relationships
|
||||
|
||||
@@ -231,9 +229,7 @@ class ShodanProvider(BaseProvider):
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
self.logger.logger.error(f"Failed to parse JSON response from Shodan: {e}")
|
||||
except Exception as e:
|
||||
self.logger.logger.error(f"Error querying Shodan for IP {ip}: {e}")
|
||||
|
||||
|
||||
return relationships
|
||||
|
||||
def search_by_organization(self, org_name: str) -> List[Dict[str, Any]]:
|
||||
|
||||
Reference in New Issue
Block a user