misp_analyzer.py aktualisiert

This commit is contained in:
Mario Stöckl 2025-07-30 13:29:12 +00:00
parent 59ea0dd658
commit f52710cfb6

View File

@ -1,30 +1,25 @@
"""Index analyzer plugin for MISP"""
"""Index analyzer plugin for MISP - Simple and reliable for large-scale processing."""
import logging
import ntpath
import re
import requests
import time
import json
from collections import defaultdict
from typing import List, Dict, Set, Any, Optional
from urllib3.exceptions import InsecureRequestWarning
from flask import current_app
from timesketch.lib.analyzers import interface
from timesketch.lib.analyzers import manager
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
logger = logging.getLogger("timesketch.analyzers.misp")
class MispAnalyzer(interface.BaseAnalyzer):
"""Ultra-reliable MISP Analyzer for large-scale processing."""
"""Simple, reliable MISP Analyzer for large-scale processing."""
NAME = "misp_analyzer"
DISPLAY_NAME = "MISP Enhanced"
DESCRIPTION = "Mark events using MISP with ultra-reliable large-scale support"
DISPLAY_NAME = "MISP"
DESCRIPTION = "Mark events using MISP - Simple and Reliable"
def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs):
"""Initialize the Analyzer."""
@ -37,383 +32,271 @@ class MispAnalyzer(interface.BaseAnalyzer):
self._attr = kwargs.get("attr")
self._timesketch_attr = kwargs.get("timesketch_attr")
self.include_community = kwargs.get("include_community", True)
self.chunk_size = kwargs.get("chunk_size", 500)
self.max_retries = kwargs.get("max_retries", 5)
self.base_timeout = kwargs.get("base_timeout", 30)
self.max_timeout = kwargs.get("max_timeout", 180)
self.request_delay = kwargs.get("request_delay", 1.0)
self.max_indicators_per_batch = kwargs.get("max_indicators_per_batch", 10)
# Simple configuration for reliability
self.include_community = kwargs.get("include_community", False) # Default to false for reliability
self.chunk_size = kwargs.get("chunk_size", 1000) # Process in chunks
self.max_retries = kwargs.get("max_retries", 2) # Minimal retries
self.request_delay = kwargs.get("request_delay", 0.5) # Small delay between requests
self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
# Tracking sets
self.marked_events = set()
# Track processed items
self.processed_indicators = set()
self.failed_indicators = set()
# Simple stats
self.stats = {
'events_processed': 0,
'indicators_extracted': 0,
'api_calls_successful': 0,
'api_calls_failed': 0,
'events_marked': 0,
'total_matches': 0,
'timeouts': 0,
'retries': 0
'indicators_found': 0,
'api_calls': 0,
'api_timeouts': 0,
'events_marked': 0
}
# Session for connection reuse
self.session = requests.Session()
self.session.verify = False
self.session.headers.update({
"Authorization": self.misp_api_key,
"Content-Type": "application/json",
"User-Agent": "Timesketch-MISP-Analyzer/1.0"
})
@staticmethod
def get_kwargs():
"""Get kwargs for the analyzer with ultra-reliable settings."""
base_config = {
"include_community": True,
"chunk_size": 500,
"max_retries": 5,
"base_timeout": 30,
"request_delay": 1.0,
"max_indicators_per_batch": 10,
}
"""Get kwargs for the analyzer - keeping original working structure."""
to_query = [
{
"query_string": "md5_hash:*",
"attr": "md5",
"timesketch_attr": "md5_hash",
**base_config
"include_community": False, # Start with own org only
},
{
"query_string": "sha1_hash:*",
"attr": "sha1",
"timesketch_attr": "sha1_hash",
**base_config
"include_community": False,
},
{
"query_string": "sha256_hash:*",
"attr": "sha256",
"timesketch_attr": "sha256_hash",
**base_config
"include_community": False,
},
{
"query_string": "filename:*",
"attr": "filename",
"timesketch_attr": "filename",
**base_config
"include_community": False,
},
{
"query_string": "message:*",
"attr": "ip",
"attr": "ip-src",
"timesketch_attr": "message",
**base_config
"include_community": False,
},
{
"query_string": "source_ip:* OR src_ip:* OR client_ip:*",
"attr": "ip",
"query_string": "message:*",
"attr": "ip-dst",
"timesketch_attr": "message",
"include_community": False,
},
{
"query_string": "source_ip:*",
"attr": "ip-src",
"timesketch_attr": "source_ip",
**base_config
"include_community": False,
},
]
return to_query
def _is_valid_ip(self, ip_str: str) -> bool:
"""Enhanced IP validation for nginx logs."""
def _is_valid_ip(self, ip_str):
"""Simple IP validation - keeping original working version."""
try:
import ipaddress
ip_str = ip_str.strip()
# Basic format check first
if not re.match(r'^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$', ip_str):
ipaddress.ip_address(ip_str)
# Filter out invalid ranges
if ip_str.startswith(('0.', '127.', '255.255.255.255', '10.', '192.168.', '172.')):
return False
ip_obj = ipaddress.ip_address(ip_str)
# Filter out non-routable addresses
if (ip_obj.is_private or ip_obj.is_loopback or
ip_obj.is_multicast or ip_obj.is_reserved or
ip_obj.is_link_local or ip_obj.is_unspecified):
return False
# Nginx-specific filters
if (ip_str.startswith(('0.', '10.', '172.', '192.168.', '127.', '169.254.', '224.')) or
ip_str in ['255.255.255.255', '0.0.0.0']):
return False
return True
except (ValueError, AttributeError, TypeError):
except (ValueError, AttributeError):
return False
def _is_valid_hash(self, hash_str: str, hash_type: str) -> bool:
"""Validate hash format with strict checking."""
if not hash_str or not isinstance(hash_str, str):
def _is_valid_hash(self, hash_str, hash_type):
"""Simple hash validation - keeping original working version."""
if not hash_str:
return False
hash_str = hash_str.strip().lower()
# Check for obvious non-hash patterns
if not hash_str or hash_str in ['null', 'none', '0', '-']:
if hash_type == "md5":
return len(hash_str) == 32 and all(c in '0123456789abcdef' for c in hash_str)
elif hash_type == "sha1":
return len(hash_str) == 40 and all(c in '0123456789abcdef' for c in hash_str)
elif hash_type == "sha256":
return len(hash_str) == 64 and all(c in '0123456789abcdef' for c in hash_str)
return False
hash_lengths = {"md5": 32, "sha1": 40, "sha256": 64}
expected_length = hash_lengths.get(hash_type)
def query_misp_single(self, value, attr, retry_count=0):
"""Query MISP for a single value - enhanced with minimal retry logic."""
if value in self.failed_indicators:
return []
if not expected_length or len(hash_str) != expected_length:
return False
return all(c in '0123456789abcdef' for c in hash_str)
def _calculate_payload_size(self, payload: Dict[str, Any]) -> int:
"""Calculate approximate payload size in bytes."""
try:
return len(json.dumps(payload).encode('utf-8'))
except:
return 0
# Build basic payload
payload = {"returnFormat": "json", "value": value, "type": attr}
def _make_misp_request_single(self, indicator: str, attr_type: str, retry_count: int = 0) -> List[Dict]:
"""Make single indicator MISP request with progressive timeout."""
timeout = min(self.base_timeout + (retry_count * 10), self.max_timeout)
# Determine search types
search_types = ["ip-src", "ip-dst"] if attr_type == "ip" else [attr_type]
results = []
for search_type in search_types:
try:
# Build minimal payload
distribution_levels = [0] # Start with own org only
# Add community search if enabled
if self.include_community:
distribution_levels.extend([1, 2])
payload["distribution"] = [0, 1, 2] # Own, community, connected
payload = {
"returnFormat": "json",
"value": indicator,
"type": search_type,
"enforceWarninglist": False,
"includeDecayScore": False,
"includeFullModel": False,
"distribution": distribution_levels,
"limit": 100, # Conservative limit
}
self.stats['api_calls'] += 1
payload_size = self._calculate_payload_size(payload)
logger.debug(f"Querying {indicator} ({search_type}) - payload: {payload_size} bytes, timeout: {timeout}s")
response = self.session.post(
response = requests.post(
f"{self.misp_url}/attributes/restSearch/",
json=payload,
timeout=timeout,
headers={"Authorization": self.misp_api_key},
verify=False,
timeout=45, # Slightly increased from original 30s
)
if response.status_code == 200:
if response.status_code != 200:
logger.debug(f"MISP API returned status {response.status_code} for {value}")
return []
data = response.json()
attributes = data.get("response", {}).get("Attribute", [])
results.extend(attributes)
self.stats['api_calls_successful'] += 1
return data.get("response", {}).get("Attribute", [])
elif response.status_code == 429: # Rate limited
wait_time = min(5 * (retry_count + 1), 30)
logger.warning(f"Rate limited for {indicator}, waiting {wait_time}s")
time.sleep(wait_time)
raise requests.exceptions.RequestException("Rate limited")
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
self.stats['api_timeouts'] += 1
elif response.status_code >= 500: # Server error
logger.warning(f"Server error {response.status_code} for {indicator}")
if retry_count < self.max_retries:
raise requests.exceptions.RequestException(f"Server error {response.status_code}")
else:
logger.debug(f"No results for {indicator} ({search_type}): status {response.status_code}")
# Delay between search types
time.sleep(0.2)
except (requests.exceptions.Timeout, TimeoutError) as e:
self.stats['timeouts'] += 1
if retry_count < self.max_retries:
wait_time = min(2 ** retry_count, 30)
logger.warning(f"Timeout for {indicator} (attempt {retry_count + 1}/{self.max_retries}), retrying in {wait_time}s")
wait_time = (retry_count + 1) * 2 # Simple backoff: 2s, 4s
logger.warning(f"Timeout for {value}, retrying in {wait_time}s (attempt {retry_count + 1})")
time.sleep(wait_time)
self.stats['retries'] += 1
return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
return self.query_misp_single(value, attr, retry_count + 1)
else:
logger.error(f"Max retries exceeded for {indicator}: {e}")
self.stats['api_calls_failed'] += 1
self.failed_indicators.add(indicator)
return []
except requests.exceptions.ConnectionError as e:
self.stats['api_calls_failed'] += 1
if retry_count < self.max_retries:
wait_time = min(5 * (retry_count + 1), 60)
logger.warning(f"Connection error for {indicator} (attempt {retry_count + 1}), retrying in {wait_time}s: {e}")
time.sleep(wait_time)
self.stats['retries'] += 1
return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
else:
logger.error(f"Connection failed permanently for {indicator}: {e}")
self.failed_indicators.add(indicator)
logger.error(f"Max retries exceeded for {value}: {e}")
self.failed_indicators.add(value)
return []
except Exception as e:
logger.error(f"Unexpected error querying {indicator}: {e}")
self.stats['api_calls_failed'] += 1
logger.debug(f"Error querying MISP for {value}: {e}")
return []
return results
def extract_indicators_from_event(self, event: Any, attr: str, timesketch_attr: str) -> List[str]:
"""Extract and validate indicators from event."""
def mark_event(self, event, result, attr):
"""Mark event with MISP intelligence - keeping original working version."""
try:
loc = event.source.get(timesketch_attr)
if not loc:
return []
indicators = []
loc_str = str(loc)
if attr == "ip":
if timesketch_attr == "message":
# Extract IPs from nginx access log messages
ip_matches = self.ip_pattern.findall(loc_str)
indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
elif timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
if self._is_valid_ip(loc_str):
indicators = [loc_str]
elif attr in ["md5", "sha1", "sha256"]:
if self._is_valid_hash(loc_str, attr):
indicators = [loc_str.lower().strip()]
elif attr == "filename":
filename = ntpath.basename(loc_str).strip()
if filename and len(filename) > 3 and '.' in filename:
indicators = [filename]
return indicators
except Exception as e:
logger.debug(f"Error extracting indicators from event: {e}")
return []
def mark_event_with_intelligence(self, event: Any, misp_results: List[Dict], attr: str) -> None:
"""Mark event with MISP intelligence information."""
try:
event_id = event.source.get('_id', '')
if event_id in self.marked_events:
return
self.marked_events.add(event_id)
# Build intelligence message
if attr == "ip":
msg_prefix = "MISP: Threat IP"
elif attr in ["md5", "sha1", "sha256"]:
msg_prefix = f"MISP: Malicious {attr.upper()}"
if attr.startswith("ip-"):
msg = "MISP: Malicious IP - "
else:
msg_prefix = f"MISP: Known {attr.upper()}"
msg = "MISP: Known indicator - "
# Extract key information
events_info = []
orgs_info = set()
threat_levels = []
event_info = result[0].get("Event", {}).get("info", "Unknown")
msg += event_info
for misp_attr in misp_results[:3]: # Limit to first 3 for message clarity
event_info = misp_attr.get("Event", {})
event_desc = event_info.get("info", "Unknown")[:40] # Truncate
org_name = event_info.get("Orgc", {}).get("name", "Unknown")
threat_level = event_info.get("threat_level_id")
if len(result) > 1:
msg += f" (+{len(result)-1} more)"
if event_desc and event_desc != "Unknown":
events_info.append(event_desc)
if org_name and org_name != "Unknown":
orgs_info.add(org_name)
if threat_level:
threat_levels.append(int(threat_level))
# Build comprehensive message
msg_parts = [msg_prefix]
if events_info:
msg_parts.append(f"Events: {' | '.join(events_info[:2])}")
if len(misp_results) > 3:
msg_parts.append(f"({len(misp_results)} total matches)")
if len(orgs_info) > 1:
msg_parts.append(f"Sources: {', '.join(list(orgs_info)[:2])}")
elif orgs_info and list(orgs_info)[0] != "Unknown":
msg_parts.append(f"Source: {list(orgs_info)[0]}")
if threat_levels:
min_threat = min(threat_levels) # Lower = higher threat
threat_names = {1: "HIGH", 2: "MEDIUM", 3: "LOW", 4: "UNDEFINED"}
msg_parts.append(f"Threat: {threat_names.get(min_threat, 'UNKNOWN')}")
final_message = " | ".join(msg_parts)
# Add tags
tags = [f"MISP-{attr}", "threat-intel"]
if self.include_community and len(orgs_info) > 1:
tags.append("cross-org-intel")
event.add_comment(final_message)
event.add_tags(tags)
event.add_comment(msg)
event.add_tags([f"MISP-{attr}", "threat-intel"])
event.commit()
self.stats['events_marked'] += 1
self.stats['total_matches'] += len(misp_results)
except Exception as e:
logger.error(f"Error marking event: {e}")
def process_indicators_batch(self, indicators: List[str], attr: str) -> Dict[str, List[Dict]]:
"""Process indicators with careful rate limiting."""
results = {}
def extract_indicators_from_chunk(self, events_chunk, attr, timesketch_attr):
"""Extract indicators from a chunk of events."""
chunk_indicators = []
events_with_indicators = []
for i, indicator in enumerate(indicators):
for event in events_chunk:
self.stats['events_processed'] += 1
loc = event.source.get(timesketch_attr)
if not loc:
continue
indicators = []
# Extract based on attribute type - SAME AS ORIGINAL
if attr.startswith("ip-") and timesketch_attr == "message":
ip_matches = self.ip_pattern.findall(str(loc))
indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
elif attr.startswith("ip-") and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
if self._is_valid_ip(str(loc)):
indicators = [str(loc)]
elif attr in ["md5", "sha1", "sha256"]:
if self._is_valid_hash(str(loc), attr):
indicators = [str(loc)]
elif attr == "filename":
filename = ntpath.basename(str(loc))
if filename and len(filename) > 1:
indicators = [filename]
# Store event with its indicators
if indicators:
events_with_indicators.append((event, indicators))
chunk_indicators.extend(indicators)
return events_with_indicators, chunk_indicators
def process_chunk(self, events_chunk, attr, timesketch_attr):
"""Process a chunk of events."""
events_with_indicators, chunk_indicators = self.extract_indicators_from_chunk(
events_chunk, attr, timesketch_attr
)
if not chunk_indicators:
return
# Get unique new indicators
unique_indicators = list(dict.fromkeys(chunk_indicators))
new_indicators = [ind for ind in unique_indicators if ind not in self.processed_indicators]
if not new_indicators:
# Still need to check existing cache for matches
self.check_existing_matches(events_with_indicators, attr)
return
logger.info(f"Processing {len(new_indicators)} new {attr} indicators from {len(events_chunk)} events")
self.stats['indicators_found'] += len(new_indicators)
# Query MISP for each new indicator
for indicator in new_indicators:
if indicator in self.failed_indicators:
continue
logger.debug(f"Processing indicator {i+1}/{len(indicators)}: {indicator}")
result = self.query_misp_single(indicator, attr)
if result:
self.result_dict[f"{attr}:{indicator}"] = result
logger.info(f"MISP hit: {indicator} ({len(result)} matches)")
misp_results = self._make_misp_request_single(indicator, attr)
self.processed_indicators.add(indicator)
if misp_results:
results[indicator] = misp_results
logger.info(f"MISP hit: {indicator} ({len(misp_results)} matches)")
# Rate limiting between requests
# Small delay to be nice to MISP server
time.sleep(self.request_delay)
# Progress update every 50 indicators
if (i + 1) % 50 == 0:
logger.info(f"Processed {i+1}/{len(indicators)} indicators, "
f"{len(results)} hits, "
f"{self.stats['timeouts']} timeouts, "
f"{self.stats['api_calls_failed']} failures")
# Mark events that have matches
self.check_existing_matches(events_with_indicators, attr)
return results
def check_existing_matches(self, events_with_indicators, attr):
"""Check events against existing MISP results."""
for event, indicators in events_with_indicators:
# Check if any indicator has MISP match
for indicator in indicators:
key = f"{attr}:{indicator}"
if key in self.result_dict and self.result_dict[key]:
self.mark_event(event, self.result_dict[key], attr)
break # Only mark once per event
def query_misp(self, query: str, attr: str, timesketch_attr: str) -> None:
"""Main processing with ultra-reliable chunked approach."""
logger.info(f"Starting ultra-reliable MISP analysis for {attr} in {timesketch_attr}")
logger.info(f"Configuration: chunk_size={self.chunk_size}, "
f"max_retries={self.max_retries}, "
f"request_delay={self.request_delay}s, "
f"include_community={self.include_community}")
def query_misp(self, query, attr, timesketch_attr):
"""Process events in chunks - enhanced for large datasets."""
logger.info(f"Starting MISP analysis for {attr} in {timesketch_attr}")
logger.info(f"Community search: {'enabled' if self.include_community else 'disabled'}")
# Process events in chunks
# Get event stream
events_stream = self.event_stream(
query_string=query,
return_fields=[timesketch_attr, '_id', 'timestamp']
return_fields=[timesketch_attr, '_id']
)
current_chunk = []
@ -421,121 +304,64 @@ class MispAnalyzer(interface.BaseAnalyzer):
try:
for event in events_stream:
current_chunk.append(event)
self.stats['events_processed'] += 1
# Process when chunk is full
if len(current_chunk) >= self.chunk_size:
self._process_events_chunk(current_chunk, attr, timesketch_attr)
self.process_chunk(current_chunk, attr, timesketch_attr)
current_chunk = []
# Progress logging
if self.stats['events_processed'] % 5000 == 0:
success_rate = (self.stats['api_calls_successful'] /
max(1, self.stats['api_calls_successful'] + self.stats['api_calls_failed']) * 100)
logger.info(f"Progress: {self.stats['events_processed']} events, "
if self.stats['events_processed'] % 10000 == 0:
logger.info(f"Progress: {self.stats['events_processed']} events processed, "
f"{self.stats['events_marked']} marked, "
f"{len(self.processed_indicators)} indicators processed, "
f"{success_rate:.1f}% API success rate")
f"{self.stats['api_calls']} API calls, "
f"{self.stats['api_timeouts']} timeouts")
# Process final chunk
if current_chunk:
self._process_events_chunk(current_chunk, attr, timesketch_attr)
self.process_chunk(current_chunk, attr, timesketch_attr)
except Exception as e:
logger.error(f"Critical error during processing: {e}")
logger.error(f"Error during chunk processing: {e}")
raise
def _process_events_chunk(self, events_chunk: List[Any], attr: str, timesketch_attr: str) -> None:
"""Process a chunk of events with indicator extraction and MISP queries."""
# Extract all unique indicators from chunk
chunk_indicators = []
event_to_indicators = {}
# Create view if we found matches
if self.stats['events_marked'] > 0:
view_name = "MISP Threat Intelligence"
if self.include_community:
view_name += " (Community)"
for event in events_chunk:
indicators = self.extract_indicators_from_event(event, attr, timesketch_attr)
if indicators:
event_id = event.source.get('_id', '')
event_to_indicators[event_id] = (event, indicators)
chunk_indicators.extend(indicators)
self.sketch.add_view(
view_name=view_name,
analyzer_name=self.NAME,
query_string='tag:"MISP-*" OR tag:"threat-intel"',
)
# Get unique new indicators
unique_indicators = list(dict.fromkeys(chunk_indicators))
new_indicators = [ind for ind in unique_indicators
if ind not in self.processed_indicators and ind not in self.failed_indicators]
if not new_indicators:
return
logger.info(f"Processing {len(new_indicators)} new {attr} indicators from {len(events_chunk)} events")
self.stats['indicators_extracted'] += len(new_indicators)
# Query MISP for new indicators
misp_results = self.process_indicators_batch(new_indicators, attr)
# Update cache
self.processed_indicators.update(new_indicators)
for indicator, results in misp_results.items():
self.result_dict[f"{attr}:{indicator}"] = results
# Mark matching events
for event_id, (event, indicators) in event_to_indicators.items():
if event_id in self.marked_events:
continue
matching_results = []
for indicator in indicators:
key = f"{attr}:{indicator}"
if key in self.result_dict:
matching_results.extend(self.result_dict[key])
if matching_results:
self.mark_event_with_intelligence(event, matching_results, attr)
def run(self) -> str:
"""Entry point with comprehensive error handling and reporting."""
def run(self):
"""Entry point for the analyzer."""
if not self.misp_url or not self.misp_api_key:
return "Error: MISP configuration missing"
return "No MISP configuration found"
start_time = time.time()
try:
self.query_misp(self._query_string, self._attr, self._timesketch_attr)
# Create view for matches
if self.stats['events_marked'] > 0:
view_name = f"MISP {self._attr.upper()} Threats"
if self.include_community:
view_name += " (Cross-Org)"
self.sketch.add_view(
view_name=view_name,
analyzer_name=self.NAME,
query_string=f'tag:"MISP-{self._attr}" OR tag:"threat-intel"',
)
# Comprehensive results
elapsed = time.time() - start_time
total_api_calls = self.stats['api_calls_successful'] + self.stats['api_calls_failed']
success_rate = (self.stats['api_calls_successful'] / max(1, total_api_calls)) * 100
success_rate = ((self.stats['api_calls'] - self.stats['api_timeouts']) /
max(1, self.stats['api_calls']) * 100)
result = (f"[{self._timesketch_attr}] MISP Analysis: "
result = (f"[{self._timesketch_attr}] MISP Analysis Complete: "
f"{self.stats['events_marked']}/{self.stats['events_processed']} events marked | "
f"{len(self.processed_indicators)} indicators processed | "
f"{total_api_calls} API calls ({success_rate:.1f}% success) | "
f"{self.stats['timeouts']} timeouts | "
f"{self.stats['api_calls']} API calls ({success_rate:.1f}% success) | "
f"{elapsed:.0f}s")
logger.info(result)
return result
except Exception as e:
logger.error(f"MISP analyzer failed: {e}")
logger.error(f"MISP analyzer error: {e}")
return f"[{self._timesketch_attr}] MISP Error: {str(e)}"
finally:
try:
self.session.close()
except:
pass
manager.AnalysisManager.register_analyzer(MispAnalyzer)