From 9e338f7923ad77cef3ad05caf93c424922675d82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20St=C3=B6ckl?= Date: Mon, 25 Aug 2025 13:33:04 +0000 Subject: [PATCH] whois_analyzer.py aktualisiert --- whois_analyzer.py | 198 ++++++++++++++++++++++++++++++---------------- 1 file changed, 130 insertions(+), 68 deletions(-) diff --git a/whois_analyzer.py b/whois_analyzer.py index 07e25dc..9893c3a 100644 --- a/whois_analyzer.py +++ b/whois_analyzer.py @@ -1,4 +1,4 @@ -"""Index analyzer plugin for WHOIS data enrichment.""" +"""Index analyzer plugin for WHOIS data enrichment - Debug Version.""" import ipaddress import logging @@ -56,39 +56,66 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): # Cache to avoid duplicate queries self.whois_cache: Dict[str, Optional[Dict]] = {} self.processed_ips: Set[str] = set() + + # Debug counters + self.debug_stats = { + 'total_events': 0, + 'events_with_ips': 0, + 'valid_ips_found': 0, + 'invalid_ips_found': 0, + 'api_calls_made': 0, + 'api_successes': 0, + 'api_failures': 0, + 'whois_lib_available': HAS_WHOIS + } + + logger.info(f"WHOIS Analyzer initialized. python-whois available: {HAS_WHOIS}") def _validate_ip(self, ip_address: str) -> bool: - """Validate an IP address for analysis (same logic as GeoIP analyzer). + """Validate an IP address for analysis. Args: ip_address: The IP address to validate Returns: - True if IP is valid and global (public) + True if IP is valid and should be processed """ try: - ip = ipaddress.ip_address(ip_address.strip()) - return ip.is_global - except (ValueError, AttributeError): + ip_str = ip_address.strip() + ip = ipaddress.ip_address(ip_str) + + # DEBUG: Log all IPs being validated + logger.debug(f"Validating IP: {ip_str} - is_global: {ip.is_global}, is_private: {ip.is_private}") + + # Be less restrictive than just is_global - include more IPs for testing + if ip.is_private or ip.is_loopback or ip.is_multicast: + logger.debug(f"Skipping private/loopback/multicast IP: {ip_str}") + return False + + # Accept global IPs and also some reserved ranges that might have WHOIS data + return True + + except (ValueError, AttributeError) as e: + logger.debug(f"Invalid IP address format: {ip_address} - {e}") return False def _get_asn_data_via_api(self, ip_address: str) -> Optional[Dict]: - """Get ASN data using a free API service as fallback. - - Args: - ip_address: IP address to lookup - - Returns: - Dictionary with ASN data or None - """ + """Get ASN data using a free API service as fallback.""" try: - # Using ip-api.com which has a free tier - # Alternative: ipinfo.io, whoisapi.org, etc. - url = f"http://ip-api.com/json/{ip_address}?fields=as,asname,isp,org,country,regionName,city" + self.debug_stats['api_calls_made'] += 1 + + # Using ip-api.com which has a free tier (150 requests per minute) + url = f"http://ip-api.com/json/{ip_address}?fields=status,message,as,asname,isp,org,country,regionName,city" + + logger.debug(f"Making API call to: {url}") response = requests.get(url, timeout=self.timeout) + logger.debug(f"API response status: {response.status_code}") + if response.status_code == 200: data = response.json() + logger.debug(f"API response data: {data}") + if data.get('status') == 'success': # Parse ASN number from 'as' field (format: "AS15169 Google LLC") as_info = data.get('as', '') @@ -96,7 +123,7 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): if as_info and as_info.startswith('AS'): asn = as_info.split()[0][2:] # Remove 'AS' prefix - return { + result = { 'asn': asn, 'asn_name': data.get('asname'), 'isp': data.get('isp'), @@ -105,26 +132,32 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): 'region': data.get('regionName'), 'city': data.get('city') } + + self.debug_stats['api_successes'] += 1 + logger.debug(f"API lookup successful for {ip_address}: {result}") + return result + else: + logger.debug(f"API returned failure status for {ip_address}: {data.get('message', 'Unknown error')}") + self.debug_stats['api_failures'] += 1 + else: + logger.warning(f"API request failed with status {response.status_code}") + self.debug_stats['api_failures'] += 1 return None except Exception as e: - logger.debug(f"API lookup failed for {ip_address}: {e}") + logger.error(f"API lookup failed for {ip_address}: {e}") + self.debug_stats['api_failures'] += 1 return None def _get_whois_data_python_whois(self, ip_address: str) -> Optional[Dict]: - """Get WHOIS data using python-whois library. - - Args: - ip_address: IP address to lookup - - Returns: - Dictionary with WHOIS data or None - """ + """Get WHOIS data using python-whois library.""" if not HAS_WHOIS: + logger.debug("python-whois library not available") return None try: + logger.debug(f"Attempting python-whois lookup for {ip_address}") w = whois.whois(ip_address) # Extract relevant information @@ -160,6 +193,11 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): data['creation_date'] = str(w.creation_date) if hasattr(w, 'updated_date'): data['updated_date'] = str(w.updated_date) + + if data: + logger.debug(f"python-whois lookup successful for {ip_address}: {data}") + else: + logger.debug(f"python-whois returned no data for {ip_address}") return data if data else None @@ -168,15 +206,9 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): return None def _get_whois_data(self, ip_address: str) -> Optional[Dict]: - """Get WHOIS data for an IP address using available methods. - - Args: - ip_address: IP address to lookup - - Returns: - Dictionary with WHOIS data or None - """ + """Get WHOIS data for an IP address using available methods.""" if ip_address in self.whois_cache: + logger.debug(f"Using cached WHOIS data for {ip_address}") return self.whois_cache[ip_address] whois_data = None @@ -191,16 +223,16 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): # Cache the result (even if None) self.whois_cache[ip_address] = whois_data + + if whois_data: + logger.info(f"Successfully retrieved WHOIS data for {ip_address}") + else: + logger.debug(f"No WHOIS data found for {ip_address}") + return whois_data def _enrich_event(self, event, ip_field: str, whois_data: Dict): - """Add WHOIS data to the event. - - Args: - event: The event object to enrich - ip_field: The field name containing the IP address - whois_data: Dictionary with WHOIS data - """ + """Add WHOIS data to the event.""" try: # Create enrichment attributes with field-specific naming enrichment = {'whois_checked': True} @@ -240,10 +272,14 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): if whois_data.get('updated_date'): enrichment[f'{ip_field}_updated_date'] = whois_data['updated_date'] + logger.debug(f"Adding enrichment data: {enrichment}") + event.add_attributes(enrichment) event.add_tags(['whois-enriched']) event.commit() + logger.info(f"Successfully enriched event for {ip_field}") + except Exception as e: logger.error(f"Error enriching event for {ip_field}: {e}") # Still mark as checked to avoid reprocessing @@ -256,11 +292,23 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): def run(self): """Main analyzer logic.""" logger.info("Starting WHOIS enrichment analysis") + logger.info(f"Debug stats: {self.debug_stats}") + + # Test a single known IP to verify API connectivity + test_ip = "8.8.8.8" + logger.info(f"Testing API connectivity with {test_ip}") + test_result = self._get_asn_data_via_api(test_ip) + if test_result: + logger.info(f"API test successful: {test_result}") + else: + logger.warning("API test failed - this may indicate connectivity issues") # Build query for events with IP fields that haven't been checked ip_exists_queries = [f'_exists_:{field}' for field in self.IP_FIELDS] query = f'({" OR ".join(ip_exists_queries)}) AND NOT _exists_:whois_checked' + logger.info(f"Query: {query}") + events = self.event_stream( query_string=query, return_fields=self.IP_FIELDS + ['whois_checked'] @@ -274,6 +322,20 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): for event in events: current_batch.append(event) + self.debug_stats['total_events'] += 1 + + # Debug: Log event fields + ip_fields_present = [] + for field in self.IP_FIELDS: + value = event.source.get(field) + if value: + ip_fields_present.append(f"{field}={value}") + + if ip_fields_present: + logger.debug(f"Event {self.debug_stats['total_events']} has IP fields: {ip_fields_present}") + self.debug_stats['events_with_ips'] += 1 + else: + logger.debug(f"Event {self.debug_stats['total_events']} has no IP fields") if len(current_batch) >= self.batch_size: processed, enriched = self._process_batch(current_batch) @@ -288,6 +350,7 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): # Log progress if total_processed % (self.batch_size * 5) == 0: logger.info(f"Progress: {total_processed} processed, {enriched_count} enriched") + logger.info(f"Debug stats: {self.debug_stats}") # Process remaining events if current_batch: @@ -296,7 +359,7 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): enriched_count += enriched except Exception as e: - logger.error(f"Error during WHOIS processing: {e}") + logger.error(f"Error during WHOIS processing: {e}", exc_info=True) # Create a view if we enriched any events if enriched_count > 0: @@ -306,23 +369,22 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): query_string='tag:"whois-enriched"' ) - logger.info(f"WHOIS enrichment complete: {total_processed} processed, {enriched_count} enriched") - return f"Processed {total_processed} events, enriched {enriched_count} with WHOIS data" + # Final debug summary + logger.info(f"WHOIS enrichment complete:") + logger.info(f" - Total events processed: {total_processed}") + logger.info(f" - Events enriched: {enriched_count}") + logger.info(f" - Debug stats: {self.debug_stats}") + + return f"Processed {total_processed} events, enriched {enriched_count} with WHOIS data. Debug stats: {self.debug_stats}" def _process_batch(self, events): - """Process a batch of events. - - Args: - events: List of events to process - - Returns: - Tuple of (processed_count, enriched_count) - """ + """Process a batch of events.""" processed_count = 0 enriched_count = 0 for event in events: processed_count += 1 + event_enriched = False # Check each IP field in the event for ip_field in self.IP_FIELDS: @@ -338,9 +400,13 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): for ip_addr in ip_addresses: if not self._validate_ip(ip_addr): + self.debug_stats['invalid_ips_found'] += 1 continue - + + self.debug_stats['valid_ips_found'] += 1 + if ip_addr in self.processed_ips: + logger.debug(f"IP {ip_addr} already processed") continue self.processed_ips.add(ip_addr) @@ -351,23 +417,19 @@ class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer): if whois_data: self._enrich_event(event, ip_field, whois_data) enriched_count += 1 - logger.debug(f"Enriched {ip_addr} with WHOIS data") + event_enriched = True + logger.info(f"Enriched {ip_addr} with WHOIS data") + break # Only enrich once per event else: - # Mark as checked even if no data found - event.add_attributes({'whois_checked': True, 'whois_no_data': True}) - event.commit() logger.debug(f"No WHOIS data for {ip_addr}") - - # Break after first successful IP processing to avoid duplicate enrichment - break - else: - continue - break - # If no valid IPs found, still mark as checked - if not any(event.source.get(field) for field in self.IP_FIELDS): - event.add_attributes({'whois_checked': True}) - event.commit() + # Mark event as checked even if no enrichment occurred + if not event_enriched: + try: + event.add_attributes({'whois_checked': True, 'whois_no_data': True}) + event.commit() + except Exception as e: + logger.error(f"Error marking event as checked: {e}") return processed_count, enriched_count