whois_analyzer.py hinzugefügt
This commit is contained in:
parent
51ac3c692a
commit
7c34d76f3a
376
whois_analyzer.py
Normal file
376
whois_analyzer.py
Normal file
@ -0,0 +1,376 @@
|
||||
"""Index analyzer plugin for WHOIS data enrichment."""
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import time
|
||||
import os
|
||||
from typing import Dict, Optional, Set
|
||||
|
||||
import requests
|
||||
from flask import current_app
|
||||
|
||||
from timesketch.lib.analyzers import interface
|
||||
from timesketch.lib.analyzers import manager
|
||||
|
||||
# Try to import whois library, with fallback handling
|
||||
try:
|
||||
import whois
|
||||
HAS_WHOIS = True
|
||||
except ImportError:
|
||||
HAS_WHOIS = False
|
||||
|
||||
logger = logging.getLogger("timesketch.analyzers.whois_enrichment")
|
||||
|
||||
|
||||
class WhoisEnrichmentAnalyzer(interface.BaseAnalyzer):
|
||||
"""Analyzer to enrich IP addresses with WHOIS data."""
|
||||
|
||||
NAME = 'whois_enrichment'
|
||||
DISPLAY_NAME = 'WHOIS IP Enrichment'
|
||||
DESCRIPTION = 'Enriches source IP addresses with WHOIS/ASN data'
|
||||
|
||||
# Common IP fields to check (same as GeoIP analyzer for consistency)
|
||||
IP_FIELDS = [
|
||||
'ip',
|
||||
'host_ip',
|
||||
'src_ip',
|
||||
'dst_ip',
|
||||
'source_ip',
|
||||
'dest_ip',
|
||||
'ip_address',
|
||||
'client_ip',
|
||||
'address',
|
||||
'saddr',
|
||||
'daddr'
|
||||
]
|
||||
|
||||
def __init__(self, index_name, sketch_id, timeline_id=None):
|
||||
super().__init__(index_name, sketch_id, timeline_id)
|
||||
|
||||
# Configuration options
|
||||
self.batch_size = current_app.config.get('WHOIS_BATCH_SIZE', 50)
|
||||
self.rate_limit_delay = current_app.config.get('WHOIS_RATE_LIMIT_DELAY', 1.0)
|
||||
self.max_retries = current_app.config.get('WHOIS_MAX_RETRIES', 2)
|
||||
self.timeout = current_app.config.get('WHOIS_TIMEOUT', 30)
|
||||
|
||||
# Cache to avoid duplicate queries
|
||||
self.whois_cache: Dict[str, Optional[Dict]] = {}
|
||||
self.processed_ips: Set[str] = set()
|
||||
|
||||
def _validate_ip(self, ip_address: str) -> bool:
|
||||
"""Validate an IP address for analysis (same logic as GeoIP analyzer).
|
||||
|
||||
Args:
|
||||
ip_address: The IP address to validate
|
||||
|
||||
Returns:
|
||||
True if IP is valid and global (public)
|
||||
"""
|
||||
try:
|
||||
ip = ipaddress.ip_address(ip_address.strip())
|
||||
return ip.is_global
|
||||
except (ValueError, AttributeError):
|
||||
return False
|
||||
|
||||
def _get_asn_data_via_api(self, ip_address: str) -> Optional[Dict]:
|
||||
"""Get ASN data using a free API service as fallback.
|
||||
|
||||
Args:
|
||||
ip_address: IP address to lookup
|
||||
|
||||
Returns:
|
||||
Dictionary with ASN data or None
|
||||
"""
|
||||
try:
|
||||
# Using ip-api.com which has a free tier
|
||||
# Alternative: ipinfo.io, whoisapi.org, etc.
|
||||
url = f"http://ip-api.com/json/{ip_address}?fields=as,asname,isp,org,country,regionName,city"
|
||||
|
||||
response = requests.get(url, timeout=self.timeout)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data.get('status') == 'success':
|
||||
# Parse ASN number from 'as' field (format: "AS15169 Google LLC")
|
||||
as_info = data.get('as', '')
|
||||
asn = None
|
||||
if as_info and as_info.startswith('AS'):
|
||||
asn = as_info.split()[0][2:] # Remove 'AS' prefix
|
||||
|
||||
return {
|
||||
'asn': asn,
|
||||
'asn_name': data.get('asname'),
|
||||
'isp': data.get('isp'),
|
||||
'organization': data.get('org'),
|
||||
'country': data.get('country'),
|
||||
'region': data.get('regionName'),
|
||||
'city': data.get('city')
|
||||
}
|
||||
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"API lookup failed for {ip_address}: {e}")
|
||||
return None
|
||||
|
||||
def _get_whois_data_python_whois(self, ip_address: str) -> Optional[Dict]:
|
||||
"""Get WHOIS data using python-whois library.
|
||||
|
||||
Args:
|
||||
ip_address: IP address to lookup
|
||||
|
||||
Returns:
|
||||
Dictionary with WHOIS data or None
|
||||
"""
|
||||
if not HAS_WHOIS:
|
||||
return None
|
||||
|
||||
try:
|
||||
w = whois.whois(ip_address)
|
||||
|
||||
# Extract relevant information
|
||||
data = {}
|
||||
|
||||
# Network information
|
||||
if hasattr(w, 'nets') and w.nets:
|
||||
net = w.nets[0] if isinstance(w.nets, list) else w.nets
|
||||
data['network_name'] = getattr(net, 'name', None)
|
||||
data['network_range'] = getattr(net, 'range', None)
|
||||
data['network_type'] = getattr(net, 'type', None)
|
||||
|
||||
# ASN information
|
||||
if hasattr(w, 'asn'):
|
||||
data['asn'] = w.asn
|
||||
if hasattr(w, 'asn_description'):
|
||||
data['asn_description'] = w.asn_description
|
||||
|
||||
# Organization information
|
||||
if hasattr(w, 'org'):
|
||||
data['organization'] = w.org
|
||||
if hasattr(w, 'address'):
|
||||
data['address'] = w.address
|
||||
if hasattr(w, 'city'):
|
||||
data['city'] = w.city
|
||||
if hasattr(w, 'state'):
|
||||
data['state'] = w.state
|
||||
if hasattr(w, 'country'):
|
||||
data['country'] = w.country
|
||||
|
||||
# Registration dates
|
||||
if hasattr(w, 'creation_date'):
|
||||
data['creation_date'] = str(w.creation_date)
|
||||
if hasattr(w, 'updated_date'):
|
||||
data['updated_date'] = str(w.updated_date)
|
||||
|
||||
return data if data else None
|
||||
|
||||
except Exception as e:
|
||||
logger.debug(f"Python-whois lookup failed for {ip_address}: {e}")
|
||||
return None
|
||||
|
||||
def _get_whois_data(self, ip_address: str) -> Optional[Dict]:
|
||||
"""Get WHOIS data for an IP address using available methods.
|
||||
|
||||
Args:
|
||||
ip_address: IP address to lookup
|
||||
|
||||
Returns:
|
||||
Dictionary with WHOIS data or None
|
||||
"""
|
||||
if ip_address in self.whois_cache:
|
||||
return self.whois_cache[ip_address]
|
||||
|
||||
whois_data = None
|
||||
|
||||
# Try python-whois first if available
|
||||
if HAS_WHOIS:
|
||||
whois_data = self._get_whois_data_python_whois(ip_address)
|
||||
|
||||
# Fallback to API if python-whois failed or unavailable
|
||||
if not whois_data:
|
||||
whois_data = self._get_asn_data_via_api(ip_address)
|
||||
|
||||
# Cache the result (even if None)
|
||||
self.whois_cache[ip_address] = whois_data
|
||||
return whois_data
|
||||
|
||||
def _enrich_event(self, event, ip_field: str, whois_data: Dict):
|
||||
"""Add WHOIS data to the event.
|
||||
|
||||
Args:
|
||||
event: The event object to enrich
|
||||
ip_field: The field name containing the IP address
|
||||
whois_data: Dictionary with WHOIS data
|
||||
"""
|
||||
try:
|
||||
# Create enrichment attributes with field-specific naming
|
||||
enrichment = {'whois_checked': True}
|
||||
|
||||
# Add ASN information
|
||||
if whois_data.get('asn'):
|
||||
enrichment[f'{ip_field}_asn'] = whois_data['asn']
|
||||
if whois_data.get('asn_name') or whois_data.get('asn_description'):
|
||||
asn_name = whois_data.get('asn_name') or whois_data.get('asn_description')
|
||||
enrichment[f'{ip_field}_asn_name'] = asn_name
|
||||
|
||||
# Add organization information
|
||||
if whois_data.get('organization'):
|
||||
enrichment[f'{ip_field}_organization'] = whois_data['organization']
|
||||
if whois_data.get('isp'):
|
||||
enrichment[f'{ip_field}_isp'] = whois_data['isp']
|
||||
|
||||
# Add network information
|
||||
if whois_data.get('network_name'):
|
||||
enrichment[f'{ip_field}_network_name'] = whois_data['network_name']
|
||||
if whois_data.get('network_range'):
|
||||
enrichment[f'{ip_field}_network_range'] = whois_data['network_range']
|
||||
if whois_data.get('network_type'):
|
||||
enrichment[f'{ip_field}_network_type'] = whois_data['network_type']
|
||||
|
||||
# Add location information (if not covered by GeoIP)
|
||||
if whois_data.get('country'):
|
||||
enrichment[f'{ip_field}_whois_country'] = whois_data['country']
|
||||
if whois_data.get('region'):
|
||||
enrichment[f'{ip_field}_whois_region'] = whois_data['region']
|
||||
if whois_data.get('city'):
|
||||
enrichment[f'{ip_field}_whois_city'] = whois_data['city']
|
||||
|
||||
# Add registration dates
|
||||
if whois_data.get('creation_date'):
|
||||
enrichment[f'{ip_field}_creation_date'] = whois_data['creation_date']
|
||||
if whois_data.get('updated_date'):
|
||||
enrichment[f'{ip_field}_updated_date'] = whois_data['updated_date']
|
||||
|
||||
event.add_attributes(enrichment)
|
||||
event.add_tags(['whois-enriched'])
|
||||
event.commit()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error enriching event for {ip_field}: {e}")
|
||||
# Still mark as checked to avoid reprocessing
|
||||
try:
|
||||
event.add_attributes({'whois_checked': True, 'whois_error': str(e)})
|
||||
event.commit()
|
||||
except Exception as commit_error:
|
||||
logger.error(f"Error marking event as checked: {commit_error}")
|
||||
|
||||
def run(self):
|
||||
"""Main analyzer logic."""
|
||||
logger.info("Starting WHOIS enrichment analysis")
|
||||
|
||||
# Build query for events with IP fields that haven't been checked
|
||||
ip_exists_queries = [f'_exists_:{field}' for field in self.IP_FIELDS]
|
||||
query = f'({" OR ".join(ip_exists_queries)}) AND NOT _exists_:whois_checked'
|
||||
|
||||
events = self.event_stream(
|
||||
query_string=query,
|
||||
return_fields=self.IP_FIELDS + ['whois_checked']
|
||||
)
|
||||
|
||||
total_processed = 0
|
||||
enriched_count = 0
|
||||
|
||||
try:
|
||||
current_batch = []
|
||||
|
||||
for event in events:
|
||||
current_batch.append(event)
|
||||
|
||||
if len(current_batch) >= self.batch_size:
|
||||
processed, enriched = self._process_batch(current_batch)
|
||||
total_processed += processed
|
||||
enriched_count += enriched
|
||||
current_batch = []
|
||||
|
||||
# Rate limiting
|
||||
if self.rate_limit_delay > 0:
|
||||
time.sleep(self.rate_limit_delay)
|
||||
|
||||
# Log progress
|
||||
if total_processed % (self.batch_size * 5) == 0:
|
||||
logger.info(f"Progress: {total_processed} processed, {enriched_count} enriched")
|
||||
|
||||
# Process remaining events
|
||||
if current_batch:
|
||||
processed, enriched = self._process_batch(current_batch)
|
||||
total_processed += processed
|
||||
enriched_count += enriched
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during WHOIS processing: {e}")
|
||||
|
||||
# Create a view if we enriched any events
|
||||
if enriched_count > 0:
|
||||
self.sketch.add_view(
|
||||
view_name="WHOIS Enriched Events",
|
||||
analyzer_name=self.NAME,
|
||||
query_string='tag:"whois-enriched"'
|
||||
)
|
||||
|
||||
logger.info(f"WHOIS enrichment complete: {total_processed} processed, {enriched_count} enriched")
|
||||
return f"Processed {total_processed} events, enriched {enriched_count} with WHOIS data"
|
||||
|
||||
def _process_batch(self, events):
|
||||
"""Process a batch of events.
|
||||
|
||||
Args:
|
||||
events: List of events to process
|
||||
|
||||
Returns:
|
||||
Tuple of (processed_count, enriched_count)
|
||||
"""
|
||||
processed_count = 0
|
||||
enriched_count = 0
|
||||
|
||||
for event in events:
|
||||
processed_count += 1
|
||||
|
||||
# Check each IP field in the event
|
||||
for ip_field in self.IP_FIELDS:
|
||||
ip_value = event.source.get(ip_field)
|
||||
if not ip_value:
|
||||
continue
|
||||
|
||||
# Handle both single IP and list of IPs
|
||||
if isinstance(ip_value, str):
|
||||
ip_addresses = [ip_value]
|
||||
else:
|
||||
ip_addresses = ip_value if isinstance(ip_value, list) else [str(ip_value)]
|
||||
|
||||
for ip_addr in ip_addresses:
|
||||
if not self._validate_ip(ip_addr):
|
||||
continue
|
||||
|
||||
if ip_addr in self.processed_ips:
|
||||
continue
|
||||
|
||||
self.processed_ips.add(ip_addr)
|
||||
|
||||
# Get WHOIS data
|
||||
whois_data = self._get_whois_data(ip_addr)
|
||||
|
||||
if whois_data:
|
||||
self._enrich_event(event, ip_field, whois_data)
|
||||
enriched_count += 1
|
||||
logger.debug(f"Enriched {ip_addr} with WHOIS data")
|
||||
else:
|
||||
# Mark as checked even if no data found
|
||||
event.add_attributes({'whois_checked': True, 'whois_no_data': True})
|
||||
event.commit()
|
||||
logger.debug(f"No WHOIS data for {ip_addr}")
|
||||
|
||||
# Break after first successful IP processing to avoid duplicate enrichment
|
||||
break
|
||||
else:
|
||||
continue
|
||||
break
|
||||
|
||||
# If no valid IPs found, still mark as checked
|
||||
if not any(event.source.get(field) for field in self.IP_FIELDS):
|
||||
event.add_attributes({'whois_checked': True})
|
||||
event.commit()
|
||||
|
||||
return processed_count, enriched_count
|
||||
|
||||
|
||||
# Register the analyzer
|
||||
manager.AnalysisManager.register_analyzer(WhoisEnrichmentAnalyzer)
|
Loading…
x
Reference in New Issue
Block a user