whois_analyzer.py aktualisiert

This commit is contained in:
Mario Stöckl 2025-08-25 13:33:04 +00:00
parent 78eae642b6
commit 9e338f7923

View File

@ -1,4 +1,4 @@
"""Index analyzer plugin for WHOIS data enrichment.""" """Index analyzer plugin for WHOIS data enrichment - Debug Version."""
import ipaddress import ipaddress
import logging import logging
@ -57,38 +57,65 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
self.whois_cache: Dict[str, Optional[Dict]] = {} self.whois_cache: Dict[str, Optional[Dict]] = {}
self.processed_ips: Set[str] = set() self.processed_ips: Set[str] = set()
# Debug counters
self.debug_stats = {
'total_events': 0,
'events_with_ips': 0,
'valid_ips_found': 0,
'invalid_ips_found': 0,
'api_calls_made': 0,
'api_successes': 0,
'api_failures': 0,
'whois_lib_available': HAS_WHOIS
}
logger.info(f"WHOIS Analyzer initialized. python-whois available: {HAS_WHOIS}")
def _validate_ip(self, ip_address: str) -> bool: def _validate_ip(self, ip_address: str) -> bool:
"""Validate an IP address for analysis (same logic as GeoIP analyzer). """Validate an IP address for analysis.
Args: Args:
ip_address: The IP address to validate ip_address: The IP address to validate
Returns: Returns:
True if IP is valid and global (public) True if IP is valid and should be processed
""" """
try: try:
ip = ipaddress.ip_address(ip_address.strip()) ip_str = ip_address.strip()
return ip.is_global ip = ipaddress.ip_address(ip_str)
except (ValueError, AttributeError):
# DEBUG: Log all IPs being validated
logger.debug(f"Validating IP: {ip_str} - is_global: {ip.is_global}, is_private: {ip.is_private}")
# Be less restrictive than just is_global - include more IPs for testing
if ip.is_private or ip.is_loopback or ip.is_multicast:
logger.debug(f"Skipping private/loopback/multicast IP: {ip_str}")
return False
# Accept global IPs and also some reserved ranges that might have WHOIS data
return True
except (ValueError, AttributeError) as e:
logger.debug(f"Invalid IP address format: {ip_address} - {e}")
return False return False
def _get_asn_data_via_api(self, ip_address: str) -> Optional[Dict]: def _get_asn_data_via_api(self, ip_address: str) -> Optional[Dict]:
"""Get ASN data using a free API service as fallback. """Get ASN data using a free API service as fallback."""
Args:
ip_address: IP address to lookup
Returns:
Dictionary with ASN data or None
"""
try: try:
# Using ip-api.com which has a free tier self.debug_stats['api_calls_made'] += 1
# Alternative: ipinfo.io, whoisapi.org, etc.
url = f"http://ip-api.com/json/{ip_address}?fields=as,asname,isp,org,country,regionName,city" # Using ip-api.com which has a free tier (150 requests per minute)
url = f"http://ip-api.com/json/{ip_address}?fields=status,message,as,asname,isp,org,country,regionName,city"
logger.debug(f"Making API call to: {url}")
response = requests.get(url, timeout=self.timeout) response = requests.get(url, timeout=self.timeout)
logger.debug(f"API response status: {response.status_code}")
if response.status_code == 200: if response.status_code == 200:
data = response.json() data = response.json()
logger.debug(f"API response data: {data}")
if data.get('status') == 'success': if data.get('status') == 'success':
# Parse ASN number from 'as' field (format: "AS15169 Google LLC") # Parse ASN number from 'as' field (format: "AS15169 Google LLC")
as_info = data.get('as', '') as_info = data.get('as', '')
@ -96,7 +123,7 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
if as_info and as_info.startswith('AS'): if as_info and as_info.startswith('AS'):
asn = as_info.split()[0][2:] # Remove 'AS' prefix asn = as_info.split()[0][2:] # Remove 'AS' prefix
return { result = {
'asn': asn, 'asn': asn,
'asn_name': data.get('asname'), 'asn_name': data.get('asname'),
'isp': data.get('isp'), 'isp': data.get('isp'),
@ -106,25 +133,31 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
'city': data.get('city') 'city': data.get('city')
} }
self.debug_stats['api_successes'] += 1
logger.debug(f"API lookup successful for {ip_address}: {result}")
return result
else:
logger.debug(f"API returned failure status for {ip_address}: {data.get('message', 'Unknown error')}")
self.debug_stats['api_failures'] += 1
else:
logger.warning(f"API request failed with status {response.status_code}")
self.debug_stats['api_failures'] += 1
return None return None
except Exception as e: except Exception as e:
logger.debug(f"API lookup failed for {ip_address}: {e}") logger.error(f"API lookup failed for {ip_address}: {e}")
self.debug_stats['api_failures'] += 1
return None return None
def _get_whois_data_python_whois(self, ip_address: str) -> Optional[Dict]: def _get_whois_data_python_whois(self, ip_address: str) -> Optional[Dict]:
"""Get WHOIS data using python-whois library. """Get WHOIS data using python-whois library."""
Args:
ip_address: IP address to lookup
Returns:
Dictionary with WHOIS data or None
"""
if not HAS_WHOIS: if not HAS_WHOIS:
logger.debug("python-whois library not available")
return None return None
try: try:
logger.debug(f"Attempting python-whois lookup for {ip_address}")
w = whois.whois(ip_address) w = whois.whois(ip_address)
# Extract relevant information # Extract relevant information
@ -161,6 +194,11 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
if hasattr(w, 'updated_date'): if hasattr(w, 'updated_date'):
data['updated_date'] = str(w.updated_date) data['updated_date'] = str(w.updated_date)
if data:
logger.debug(f"python-whois lookup successful for {ip_address}: {data}")
else:
logger.debug(f"python-whois returned no data for {ip_address}")
return data if data else None return data if data else None
except Exception as e: except Exception as e:
@ -168,15 +206,9 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
return None return None
def _get_whois_data(self, ip_address: str) -> Optional[Dict]: def _get_whois_data(self, ip_address: str) -> Optional[Dict]:
"""Get WHOIS data for an IP address using available methods. """Get WHOIS data for an IP address using available methods."""
Args:
ip_address: IP address to lookup
Returns:
Dictionary with WHOIS data or None
"""
if ip_address in self.whois_cache: if ip_address in self.whois_cache:
logger.debug(f"Using cached WHOIS data for {ip_address}")
return self.whois_cache[ip_address] return self.whois_cache[ip_address]
whois_data = None whois_data = None
@ -191,16 +223,16 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
# Cache the result (even if None) # Cache the result (even if None)
self.whois_cache[ip_address] = whois_data self.whois_cache[ip_address] = whois_data
if whois_data:
logger.info(f"Successfully retrieved WHOIS data for {ip_address}")
else:
logger.debug(f"No WHOIS data found for {ip_address}")
return whois_data return whois_data
def _enrich_event(self, event, ip_field: str, whois_data: Dict): def _enrich_event(self, event, ip_field: str, whois_data: Dict):
"""Add WHOIS data to the event. """Add WHOIS data to the event."""
Args:
event: The event object to enrich
ip_field: The field name containing the IP address
whois_data: Dictionary with WHOIS data
"""
try: try:
# Create enrichment attributes with field-specific naming # Create enrichment attributes with field-specific naming
enrichment = {'whois_checked': True} enrichment = {'whois_checked': True}
@ -240,10 +272,14 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
if whois_data.get('updated_date'): if whois_data.get('updated_date'):
enrichment[f'{ip_field}_updated_date'] = whois_data['updated_date'] enrichment[f'{ip_field}_updated_date'] = whois_data['updated_date']
logger.debug(f"Adding enrichment data: {enrichment}")
event.add_attributes(enrichment) event.add_attributes(enrichment)
event.add_tags(['whois-enriched']) event.add_tags(['whois-enriched'])
event.commit() event.commit()
logger.info(f"Successfully enriched event for {ip_field}")
except Exception as e: except Exception as e:
logger.error(f"Error enriching event for {ip_field}: {e}") logger.error(f"Error enriching event for {ip_field}: {e}")
# Still mark as checked to avoid reprocessing # Still mark as checked to avoid reprocessing
@ -256,11 +292,23 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
def run(self): def run(self):
"""Main analyzer logic.""" """Main analyzer logic."""
logger.info("Starting WHOIS enrichment analysis") logger.info("Starting WHOIS enrichment analysis")
logger.info(f"Debug stats: {self.debug_stats}")
# Test a single known IP to verify API connectivity
test_ip = "8.8.8.8"
logger.info(f"Testing API connectivity with {test_ip}")
test_result = self._get_asn_data_via_api(test_ip)
if test_result:
logger.info(f"API test successful: {test_result}")
else:
logger.warning("API test failed - this may indicate connectivity issues")
# Build query for events with IP fields that haven't been checked # Build query for events with IP fields that haven't been checked
ip_exists_queries = [f'_exists_:{field}' for field in self.IP_FIELDS] ip_exists_queries = [f'_exists_:{field}' for field in self.IP_FIELDS]
query = f'({" OR ".join(ip_exists_queries)}) AND NOT _exists_:whois_checked' query = f'({" OR ".join(ip_exists_queries)}) AND NOT _exists_:whois_checked'
logger.info(f"Query: {query}")
events = self.event_stream( events = self.event_stream(
query_string=query, query_string=query,
return_fields=self.IP_FIELDS + ['whois_checked'] return_fields=self.IP_FIELDS + ['whois_checked']
@ -274,6 +322,20 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
for event in events: for event in events:
current_batch.append(event) current_batch.append(event)
self.debug_stats['total_events'] += 1
# Debug: Log event fields
ip_fields_present = []
for field in self.IP_FIELDS:
value = event.source.get(field)
if value:
ip_fields_present.append(f"{field}={value}")
if ip_fields_present:
logger.debug(f"Event {self.debug_stats['total_events']} has IP fields: {ip_fields_present}")
self.debug_stats['events_with_ips'] += 1
else:
logger.debug(f"Event {self.debug_stats['total_events']} has no IP fields")
if len(current_batch) >= self.batch_size: if len(current_batch) >= self.batch_size:
processed, enriched = self._process_batch(current_batch) processed, enriched = self._process_batch(current_batch)
@ -288,6 +350,7 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
# Log progress # Log progress
if total_processed % (self.batch_size * 5) == 0: if total_processed % (self.batch_size * 5) == 0:
logger.info(f"Progress: {total_processed} processed, {enriched_count} enriched") logger.info(f"Progress: {total_processed} processed, {enriched_count} enriched")
logger.info(f"Debug stats: {self.debug_stats}")
# Process remaining events # Process remaining events
if current_batch: if current_batch:
@ -296,7 +359,7 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
enriched_count += enriched enriched_count += enriched
except Exception as e: except Exception as e:
logger.error(f"Error during WHOIS processing: {e}") logger.error(f"Error during WHOIS processing: {e}", exc_info=True)
# Create a view if we enriched any events # Create a view if we enriched any events
if enriched_count > 0: if enriched_count > 0:
@ -306,23 +369,22 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
query_string='tag:"whois-enriched"' query_string='tag:"whois-enriched"'
) )
logger.info(f"WHOIS enrichment complete: {total_processed} processed, {enriched_count} enriched") # Final debug summary
return f"Processed {total_processed} events, enriched {enriched_count} with WHOIS data" logger.info(f"WHOIS enrichment complete:")
logger.info(f" - Total events processed: {total_processed}")
logger.info(f" - Events enriched: {enriched_count}")
logger.info(f" - Debug stats: {self.debug_stats}")
return f"Processed {total_processed} events, enriched {enriched_count} with WHOIS data. Debug stats: {self.debug_stats}"
def _process_batch(self, events): def _process_batch(self, events):
"""Process a batch of events. """Process a batch of events."""
Args:
events: List of events to process
Returns:
Tuple of (processed_count, enriched_count)
"""
processed_count = 0 processed_count = 0
enriched_count = 0 enriched_count = 0
for event in events: for event in events:
processed_count += 1 processed_count += 1
event_enriched = False
# Check each IP field in the event # Check each IP field in the event
for ip_field in self.IP_FIELDS: for ip_field in self.IP_FIELDS:
@ -338,9 +400,13 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
for ip_addr in ip_addresses: for ip_addr in ip_addresses:
if not self._validate_ip(ip_addr): if not self._validate_ip(ip_addr):
self.debug_stats['invalid_ips_found'] += 1
continue continue
self.debug_stats['valid_ips_found'] += 1
if ip_addr in self.processed_ips: if ip_addr in self.processed_ips:
logger.debug(f"IP {ip_addr} already processed")
continue continue
self.processed_ips.add(ip_addr) self.processed_ips.add(ip_addr)
@ -351,23 +417,19 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
if whois_data: if whois_data:
self._enrich_event(event, ip_field, whois_data) self._enrich_event(event, ip_field, whois_data)
enriched_count += 1 enriched_count += 1
logger.debug(f"Enriched {ip_addr} with WHOIS data") event_enriched = True
logger.info(f"Enriched {ip_addr} with WHOIS data")
break # Only enrich once per event
else: else:
# Mark as checked even if no data found
event.add_attributes({'whois_checked': True, 'whois_no_data': True})
event.commit()
logger.debug(f"No WHOIS data for {ip_addr}") logger.debug(f"No WHOIS data for {ip_addr}")
# Break after first successful IP processing to avoid duplicate enrichment # Mark event as checked even if no enrichment occurred
break if not event_enriched:
else: try:
continue event.add_attributes({'whois_checked': True, 'whois_no_data': True})
break event.commit()
except Exception as e:
# If no valid IPs found, still mark as checked logger.error(f"Error marking event as checked: {e}")
if not any(event.source.get(field) for field in self.IP_FIELDS):
event.add_attributes({'whois_checked': True})
event.commit()
return processed_count, enriched_count return processed_count, enriched_count