This repository has been archived on 2025-08-27. You can view files and clone it, but cannot push or open issues or pull requests.
timesketch_misp/misp_analyzer.py

541 lines
22 KiB
Python

"""Index analyzer plugin for MISP"""
import logging
import ntpath
import re
import requests
import time
import json
from collections import defaultdict
from typing import List, Dict, Set, Any, Optional
from urllib3.exceptions import InsecureRequestWarning
from flask import current_app
from timesketch.lib.analyzers import interface
from timesketch.lib.analyzers import manager
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
logger = logging.getLogger("timesketch.analyzers.misp")
class MispAnalyzer(interface.BaseAnalyzer):
"""Ultra-reliable MISP Analyzer for large-scale processing."""
NAME = "misp_analyzer"
DISPLAY_NAME = "MISP Enhanced"
DESCRIPTION = "Mark events using MISP with ultra-reliable large-scale support"
def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs):
"""Initialize the Analyzer."""
super().__init__(index_name, sketch_id, timeline_id=timeline_id)
self.misp_url = current_app.config.get("MISP_URL")
self.misp_api_key = current_app.config.get("MISP_API_KEY")
self.total_event_counter = 0
self.result_dict = {}
self._query_string = kwargs.get("query_string")
self._attr = kwargs.get("attr")
self._timesketch_attr = kwargs.get("timesketch_attr")
self.include_community = kwargs.get("include_community", True)
self.chunk_size = kwargs.get("chunk_size", 500)
self.max_retries = kwargs.get("max_retries", 5)
self.base_timeout = kwargs.get("base_timeout", 30)
self.max_timeout = kwargs.get("max_timeout", 180)
self.request_delay = kwargs.get("request_delay", 1.0)
self.max_indicators_per_batch = kwargs.get("max_indicators_per_batch", 10)
self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
# Tracking sets
self.marked_events = set()
self.processed_indicators = set()
self.failed_indicators = set()
self.stats = {
'events_processed': 0,
'indicators_extracted': 0,
'api_calls_successful': 0,
'api_calls_failed': 0,
'events_marked': 0,
'total_matches': 0,
'timeouts': 0,
'retries': 0
}
# Session for connection reuse
self.session = requests.Session()
self.session.verify = False
self.session.headers.update({
"Authorization": self.misp_api_key,
"Content-Type": "application/json",
"User-Agent": "Timesketch-MISP-Analyzer/1.0"
})
@staticmethod
def get_kwargs():
"""Get kwargs for the analyzer with ultra-reliable settings."""
base_config = {
"include_community": True,
"chunk_size": 500,
"max_retries": 5,
"base_timeout": 30,
"request_delay": 1.0,
"max_indicators_per_batch": 10,
}
to_query = [
{
"query_string": "md5_hash:*",
"attr": "md5",
"timesketch_attr": "md5_hash",
**base_config
},
{
"query_string": "sha1_hash:*",
"attr": "sha1",
"timesketch_attr": "sha1_hash",
**base_config
},
{
"query_string": "sha256_hash:*",
"attr": "sha256",
"timesketch_attr": "sha256_hash",
**base_config
},
{
"query_string": "filename:*",
"attr": "filename",
"timesketch_attr": "filename",
**base_config
},
{
"query_string": "message:*",
"attr": "ip",
"timesketch_attr": "message",
**base_config
},
{
"query_string": "source_ip:* OR src_ip:* OR client_ip:*",
"attr": "ip",
"timesketch_attr": "source_ip",
**base_config
},
]
return to_query
def _is_valid_ip(self, ip_str: str) -> bool:
"""Enhanced IP validation for nginx logs."""
try:
import ipaddress
ip_str = ip_str.strip()
# Basic format check first
if not re.match(r'^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$', ip_str):
return False
ip_obj = ipaddress.ip_address(ip_str)
# Filter out non-routable addresses
if (ip_obj.is_private or ip_obj.is_loopback or
ip_obj.is_multicast or ip_obj.is_reserved or
ip_obj.is_link_local or ip_obj.is_unspecified):
return False
# Nginx-specific filters
if (ip_str.startswith(('0.', '10.', '172.', '192.168.', '127.', '169.254.', '224.')) or
ip_str in ['255.255.255.255', '0.0.0.0']):
return False
return True
except (ValueError, AttributeError, TypeError):
return False
def _is_valid_hash(self, hash_str: str, hash_type: str) -> bool:
"""Validate hash format with strict checking."""
if not hash_str or not isinstance(hash_str, str):
return False
hash_str = hash_str.strip().lower()
# Check for obvious non-hash patterns
if not hash_str or hash_str in ['null', 'none', '0', '-']:
return False
hash_lengths = {"md5": 32, "sha1": 40, "sha256": 64}
expected_length = hash_lengths.get(hash_type)
if not expected_length or len(hash_str) != expected_length:
return False
return all(c in '0123456789abcdef' for c in hash_str)
def _calculate_payload_size(self, payload: Dict[str, Any]) -> int:
"""Calculate approximate payload size in bytes."""
try:
return len(json.dumps(payload).encode('utf-8'))
except:
return 0
def _make_misp_request_single(self, indicator: str, attr_type: str, retry_count: int = 0) -> List[Dict]:
"""Make single indicator MISP request with progressive timeout."""
timeout = min(self.base_timeout + (retry_count * 10), self.max_timeout)
# Determine search types
search_types = ["ip-src", "ip-dst"] if attr_type == "ip" else [attr_type]
results = []
for search_type in search_types:
try:
# Build minimal payload
distribution_levels = [0] # Start with own org only
if self.include_community:
distribution_levels.extend([1, 2])
payload = {
"returnFormat": "json",
"value": indicator,
"type": search_type,
"enforceWarninglist": False,
"includeDecayScore": False,
"includeFullModel": False,
"distribution": distribution_levels,
"limit": 100, # Conservative limit
}
payload_size = self._calculate_payload_size(payload)
logger.debug(f"Querying {indicator} ({search_type}) - payload: {payload_size} bytes, timeout: {timeout}s")
response = self.session.post(
f"{self.misp_url}/attributes/restSearch/",
json=payload,
timeout=timeout,
)
if response.status_code == 200:
data = response.json()
attributes = data.get("response", {}).get("Attribute", [])
results.extend(attributes)
self.stats['api_calls_successful'] += 1
elif response.status_code == 429: # Rate limited
wait_time = min(5 * (retry_count + 1), 30)
logger.warning(f"Rate limited for {indicator}, waiting {wait_time}s")
time.sleep(wait_time)
raise requests.exceptions.RequestException("Rate limited")
elif response.status_code >= 500: # Server error
logger.warning(f"Server error {response.status_code} for {indicator}")
if retry_count < self.max_retries:
raise requests.exceptions.RequestException(f"Server error {response.status_code}")
else:
logger.debug(f"No results for {indicator} ({search_type}): status {response.status_code}")
# Delay between search types
time.sleep(0.2)
except (requests.exceptions.Timeout, TimeoutError) as e:
self.stats['timeouts'] += 1
if retry_count < self.max_retries:
wait_time = min(2 ** retry_count, 30)
logger.warning(f"Timeout for {indicator} (attempt {retry_count + 1}/{self.max_retries}), retrying in {wait_time}s")
time.sleep(wait_time)
self.stats['retries'] += 1
return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
else:
logger.error(f"Max retries exceeded for {indicator}: {e}")
self.stats['api_calls_failed'] += 1
self.failed_indicators.add(indicator)
return []
except requests.exceptions.ConnectionError as e:
self.stats['api_calls_failed'] += 1
if retry_count < self.max_retries:
wait_time = min(5 * (retry_count + 1), 60)
logger.warning(f"Connection error for {indicator} (attempt {retry_count + 1}), retrying in {wait_time}s: {e}")
time.sleep(wait_time)
self.stats['retries'] += 1
return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
else:
logger.error(f"Connection failed permanently for {indicator}: {e}")
self.failed_indicators.add(indicator)
return []
except Exception as e:
logger.error(f"Unexpected error querying {indicator}: {e}")
self.stats['api_calls_failed'] += 1
return []
return results
def extract_indicators_from_event(self, event: Any, attr: str, timesketch_attr: str) -> List[str]:
"""Extract and validate indicators from event."""
try:
loc = event.source.get(timesketch_attr)
if not loc:
return []
indicators = []
loc_str = str(loc)
if attr == "ip":
if timesketch_attr == "message":
# Extract IPs from nginx access log messages
ip_matches = self.ip_pattern.findall(loc_str)
indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
elif timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
if self._is_valid_ip(loc_str):
indicators = [loc_str]
elif attr in ["md5", "sha1", "sha256"]:
if self._is_valid_hash(loc_str, attr):
indicators = [loc_str.lower().strip()]
elif attr == "filename":
filename = ntpath.basename(loc_str).strip()
if filename and len(filename) > 3 and '.' in filename:
indicators = [filename]
return indicators
except Exception as e:
logger.debug(f"Error extracting indicators from event: {e}")
return []
def mark_event_with_intelligence(self, event: Any, misp_results: List[Dict], attr: str) -> None:
"""Mark event with MISP intelligence information."""
try:
event_id = event.source.get('_id', '')
if event_id in self.marked_events:
return
self.marked_events.add(event_id)
# Build intelligence message
if attr == "ip":
msg_prefix = "MISP: Threat IP"
elif attr in ["md5", "sha1", "sha256"]:
msg_prefix = f"MISP: Malicious {attr.upper()}"
else:
msg_prefix = f"MISP: Known {attr.upper()}"
# Extract key information
events_info = []
orgs_info = set()
threat_levels = []
for misp_attr in misp_results[:3]: # Limit to first 3 for message clarity
event_info = misp_attr.get("Event", {})
event_desc = event_info.get("info", "Unknown")[:40] # Truncate
org_name = event_info.get("Orgc", {}).get("name", "Unknown")
threat_level = event_info.get("threat_level_id")
if event_desc and event_desc != "Unknown":
events_info.append(event_desc)
if org_name and org_name != "Unknown":
orgs_info.add(org_name)
if threat_level:
threat_levels.append(int(threat_level))
# Build comprehensive message
msg_parts = [msg_prefix]
if events_info:
msg_parts.append(f"Events: {' | '.join(events_info[:2])}")
if len(misp_results) > 3:
msg_parts.append(f"({len(misp_results)} total matches)")
if len(orgs_info) > 1:
msg_parts.append(f"Sources: {', '.join(list(orgs_info)[:2])}")
elif orgs_info and list(orgs_info)[0] != "Unknown":
msg_parts.append(f"Source: {list(orgs_info)[0]}")
if threat_levels:
min_threat = min(threat_levels) # Lower = higher threat
threat_names = {1: "HIGH", 2: "MEDIUM", 3: "LOW", 4: "UNDEFINED"}
msg_parts.append(f"Threat: {threat_names.get(min_threat, 'UNKNOWN')}")
final_message = " | ".join(msg_parts)
# Add tags
tags = [f"MISP-{attr}", "threat-intel"]
if self.include_community and len(orgs_info) > 1:
tags.append("cross-org-intel")
event.add_comment(final_message)
event.add_tags(tags)
event.commit()
self.stats['events_marked'] += 1
self.stats['total_matches'] += len(misp_results)
except Exception as e:
logger.error(f"Error marking event: {e}")
def process_indicators_batch(self, indicators: List[str], attr: str) -> Dict[str, List[Dict]]:
"""Process indicators with careful rate limiting."""
results = {}
for i, indicator in enumerate(indicators):
if indicator in self.failed_indicators:
continue
logger.debug(f"Processing indicator {i+1}/{len(indicators)}: {indicator}")
misp_results = self._make_misp_request_single(indicator, attr)
if misp_results:
results[indicator] = misp_results
logger.info(f"MISP hit: {indicator} ({len(misp_results)} matches)")
# Rate limiting between requests
time.sleep(self.request_delay)
# Progress update every 50 indicators
if (i + 1) % 50 == 0:
logger.info(f"Processed {i+1}/{len(indicators)} indicators, "
f"{len(results)} hits, "
f"{self.stats['timeouts']} timeouts, "
f"{self.stats['api_calls_failed']} failures")
return results
def query_misp(self, query: str, attr: str, timesketch_attr: str) -> None:
"""Main processing with ultra-reliable chunked approach."""
logger.info(f"Starting ultra-reliable MISP analysis for {attr} in {timesketch_attr}")
logger.info(f"Configuration: chunk_size={self.chunk_size}, "
f"max_retries={self.max_retries}, "
f"request_delay={self.request_delay}s, "
f"include_community={self.include_community}")
# Process events in chunks
events_stream = self.event_stream(
query_string=query,
return_fields=[timesketch_attr, '_id', 'timestamp']
)
current_chunk = []
try:
for event in events_stream:
current_chunk.append(event)
self.stats['events_processed'] += 1
# Process when chunk is full
if len(current_chunk) >= self.chunk_size:
self._process_events_chunk(current_chunk, attr, timesketch_attr)
current_chunk = []
# Progress logging
if self.stats['events_processed'] % 5000 == 0:
success_rate = (self.stats['api_calls_successful'] /
max(1, self.stats['api_calls_successful'] + self.stats['api_calls_failed']) * 100)
logger.info(f"Progress: {self.stats['events_processed']} events, "
f"{self.stats['events_marked']} marked, "
f"{len(self.processed_indicators)} indicators processed, "
f"{success_rate:.1f}% API success rate")
# Process final chunk
if current_chunk:
self._process_events_chunk(current_chunk, attr, timesketch_attr)
except Exception as e:
logger.error(f"Critical error during processing: {e}")
raise
def _process_events_chunk(self, events_chunk: List[Any], attr: str, timesketch_attr: str) -> None:
"""Process a chunk of events with indicator extraction and MISP queries."""
# Extract all unique indicators from chunk
chunk_indicators = []
event_to_indicators = {}
for event in events_chunk:
indicators = self.extract_indicators_from_event(event, attr, timesketch_attr)
if indicators:
event_id = event.source.get('_id', '')
event_to_indicators[event_id] = (event, indicators)
chunk_indicators.extend(indicators)
# Get unique new indicators
unique_indicators = list(dict.fromkeys(chunk_indicators))
new_indicators = [ind for ind in unique_indicators
if ind not in self.processed_indicators and ind not in self.failed_indicators]
if not new_indicators:
return
logger.info(f"Processing {len(new_indicators)} new {attr} indicators from {len(events_chunk)} events")
self.stats['indicators_extracted'] += len(new_indicators)
# Query MISP for new indicators
misp_results = self.process_indicators_batch(new_indicators, attr)
# Update cache
self.processed_indicators.update(new_indicators)
for indicator, results in misp_results.items():
self.result_dict[f"{attr}:{indicator}"] = results
# Mark matching events
for event_id, (event, indicators) in event_to_indicators.items():
if event_id in self.marked_events:
continue
matching_results = []
for indicator in indicators:
key = f"{attr}:{indicator}"
if key in self.result_dict:
matching_results.extend(self.result_dict[key])
if matching_results:
self.mark_event_with_intelligence(event, matching_results, attr)
def run(self) -> str:
"""Entry point with comprehensive error handling and reporting."""
if not self.misp_url or not self.misp_api_key:
return "Error: MISP configuration missing"
start_time = time.time()
try:
self.query_misp(self._query_string, self._attr, self._timesketch_attr)
# Create view for matches
if self.stats['events_marked'] > 0:
view_name = f"MISP {self._attr.upper()} Threats"
if self.include_community:
view_name += " (Cross-Org)"
self.sketch.add_view(
view_name=view_name,
analyzer_name=self.NAME,
query_string=f'tag:"MISP-{self._attr}" OR tag:"threat-intel"',
)
# Comprehensive results
elapsed = time.time() - start_time
total_api_calls = self.stats['api_calls_successful'] + self.stats['api_calls_failed']
success_rate = (self.stats['api_calls_successful'] / max(1, total_api_calls)) * 100
result = (f"[{self._timesketch_attr}] MISP Analysis: "
f"{self.stats['events_marked']}/{self.stats['events_processed']} events marked | "
f"{len(self.processed_indicators)} indicators processed | "
f"{total_api_calls} API calls ({success_rate:.1f}% success) | "
f"{self.stats['timeouts']} timeouts | "
f"{elapsed:.0f}s")
logger.info(result)
return result
except Exception as e:
logger.error(f"MISP analyzer failed: {e}")
return f"[{self._timesketch_attr}] MISP Error: {str(e)}"
finally:
try:
self.session.close()
except:
pass
manager.AnalysisManager.register_analyzer(MispAnalyzer)