misp_analyzer.py aktualisiert

This commit is contained in:
Mario Stöckl 2025-07-30 13:02:52 +00:00
parent a26e2fb0ad
commit 7404c0ee8d

View File

@ -1,9 +1,12 @@
"""Index analyzer plugin for MISP.""" """Index analyzer plugin for MISP - Enhanced for large-scale processing."""
import logging import logging
import ntpath import ntpath
import re import re
import requests import requests
import time
from collections import defaultdict
from typing import List, Dict, Set, Any
from flask import current_app from flask import current_app
from timesketch.lib.analyzers import interface from timesketch.lib.analyzers import interface
@ -14,11 +17,11 @@ logger = logging.getLogger("timesketch.analyzers.misp")
class MispAnalyzer(interface.BaseAnalyzer): class MispAnalyzer(interface.BaseAnalyzer):
"""Analyzer for MISP.""" """Enhanced Analyzer for MISP with large-scale processing capabilities."""
NAME = "misp_analyzer" NAME = "misp_analyzer"
DISPLAY_NAME = "MISP" DISPLAY_NAME = "MISP Enhanced"
DESCRIPTION = "Mark events using MISP" DESCRIPTION = "Mark events using MISP with cross-org and large-scale support"
def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs): def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs):
"""Initialize the Analyzer.""" """Initialize the Analyzer."""
@ -30,285 +33,431 @@ class MispAnalyzer(interface.BaseAnalyzer):
self._query_string = kwargs.get("query_string") self._query_string = kwargs.get("query_string")
self._attr = kwargs.get("attr") self._attr = kwargs.get("attr")
self._timesketch_attr = kwargs.get("timesketch_attr") self._timesketch_attr = kwargs.get("timesketch_attr")
# Enhanced configuration
self.include_community = kwargs.get("include_community", True)
self.batch_size = kwargs.get("batch_size", 100) # Process events in batches
self.api_batch_size = kwargs.get("api_batch_size", 50) # API call batching
self.max_retries = kwargs.get("max_retries", 3)
self.request_timeout = kwargs.get("request_timeout", 120) # 2 minutes
self.chunk_size = kwargs.get("chunk_size", 1000) # Memory management
# Regex patterns
self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b') self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
# Track marked events to prevent duplicates
# Track processed items to prevent duplicates
self.marked_events = set() self.marked_events = set()
self.processed_indicators = set()
# Statistics
self.stats = {
'events_processed': 0,
'indicators_extracted': 0,
'api_calls_made': 0,
'events_marked': 0,
'errors': 0
}
@staticmethod @staticmethod
def get_kwargs(): def get_kwargs():
"""Get kwargs for the analyzer.""" """Get kwargs for the analyzer with enhanced options."""
to_query = [ to_query = [
{ {
"query_string": "md5_hash:*", "query_string": "md5_hash:*",
"attr": "md5", "attr": "md5",
"timesketch_attr": "md5_hash", "timesketch_attr": "md5_hash",
"include_community": True,
"batch_size": 100,
"api_batch_size": 50,
}, },
{ {
"query_string": "sha1_hash:*", "query_string": "sha1_hash:*",
"attr": "sha1", "attr": "sha1",
"timesketch_attr": "sha1_hash", "timesketch_attr": "sha1_hash",
"include_community": True,
"batch_size": 100,
"api_batch_size": 50,
}, },
{ {
"query_string": "sha256_hash:*", "query_string": "sha256_hash:*",
"attr": "sha256", "attr": "sha256",
"timesketch_attr": "sha256_hash", "timesketch_attr": "sha256_hash",
"include_community": True,
"batch_size": 100,
"api_batch_size": 50,
}, },
{ {
"query_string": "filename:*", "query_string": "filename:*",
"attr": "filename", "attr": "filename",
"timesketch_attr": "filename", "timesketch_attr": "filename",
"include_community": True,
"batch_size": 100,
"api_batch_size": 50,
}, },
{ {
"query_string": "message:*", "query_string": "message:*",
"attr": "ip", # Generic IP instead of ip-src/ip-dst "attr": "ip",
"timesketch_attr": "message", "timesketch_attr": "message",
"include_community": True,
"batch_size": 100,
"api_batch_size": 50,
}, },
{ {
"query_string": "source_ip:*", "query_string": "source_ip:* OR src_ip:* OR client_ip:*",
"attr": "ip", "attr": "ip",
"timesketch_attr": "source_ip", "timesketch_attr": "source_ip",
"include_community": True,
"batch_size": 100,
"api_batch_size": 50,
}, },
] ]
return to_query return to_query
def _is_valid_ip(self, ip_str): def _is_valid_ip(self, ip_str: str) -> bool:
"""Validate IP address.""" """Validate IP address with enhanced filtering."""
try: try:
import ipaddress import ipaddress
ip_str = ip_str.strip() ip_str = ip_str.strip()
ipaddress.ip_address(ip_str) ip_obj = ipaddress.ip_address(ip_str)
if ip_str.startswith(('0.', '127.', '255.255.255.255')):
# Filter out private, loopback, and other non-routable IPs
if (ip_obj.is_private or ip_obj.is_loopback or
ip_obj.is_multicast or ip_obj.is_reserved or
ip_obj.is_link_local):
return False return False
# Additional nginx log specific filters
if ip_str.startswith(('0.', '255.255.255.255', '169.254.')):
return False
return True return True
except (ValueError, AttributeError): except (ValueError, AttributeError):
return False return False
def _is_valid_hash(self, hash_str, hash_type): def _is_valid_hash(self, hash_str: str, hash_type: str) -> bool:
"""Validate hash format.""" """Validate hash format."""
if not hash_str: if not hash_str:
return False return False
hash_str = hash_str.strip().lower() hash_str = hash_str.strip().lower()
if hash_type == "md5": hash_lengths = {"md5": 32, "sha1": 40, "sha256": 64}
return len(hash_str) == 32 and all(c in '0123456789abcdef' for c in hash_str) expected_length = hash_lengths.get(hash_type)
elif hash_type == "sha1":
return len(hash_str) == 40 and all(c in '0123456789abcdef' for c in hash_str)
elif hash_type == "sha256":
return len(hash_str) == 64 and all(c in '0123456789abcdef' for c in hash_str)
if not expected_length:
return False return False
def query_misp_single(self, value, attr): return (len(hash_str) == expected_length and
"""Query MISP for a single value - ENHANCED for cross-org visibility.""" all(c in '0123456789abcdef' for c in hash_str))
results = []
# Query both ip-src and ip-dst for IPs, include cross-org events def _make_misp_request(self, payload: Dict[str, Any], retry_count: int = 0) -> List[Dict]:
"""Make MISP API request with retry logic."""
try:
response = requests.post(
f"{self.misp_url}/attributes/restSearch/",
json=payload,
headers={"Authorization": self.misp_api_key},
verify=False,
timeout=self.request_timeout,
)
if response.status_code == 200:
data = response.json()
return data.get("response", {}).get("Attribute", [])
elif response.status_code == 429: # Rate limited
wait_time = min(2 ** retry_count, 60) # Exponential backoff, max 60s
logger.warning(f"Rate limited, waiting {wait_time}s before retry")
time.sleep(wait_time)
raise requests.exceptions.RequestException("Rate limited")
else:
logger.warning(f"MISP API returned status {response.status_code}")
return []
except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
if retry_count < self.max_retries:
wait_time = min(2 ** retry_count * 5, 120) # Exponential backoff
logger.warning(f"Request failed (attempt {retry_count + 1}), retrying in {wait_time}s: {e}")
time.sleep(wait_time)
return self._make_misp_request(payload, retry_count + 1)
else:
logger.error(f"Request failed after {self.max_retries} retries: {e}")
self.stats['errors'] += 1
return []
except Exception as e:
logger.error(f"Unexpected error in MISP request: {e}")
self.stats['errors'] += 1
return []
def query_misp_batch(self, indicators: List[str], attr: str) -> Dict[str, List[Dict]]:
"""Query MISP for multiple indicators efficiently."""
results = defaultdict(list)
# Determine search types based on attribute
if attr == "ip": if attr == "ip":
search_types = ["ip-src", "ip-dst"] search_types = ["ip-src", "ip-dst"]
else: else:
search_types = [attr] search_types = [attr]
for search_type in search_types: for search_type in search_types:
try: # Batch indicators to reduce API calls
# Include events from other organizations for i in range(0, len(indicators), self.api_batch_size):
batch = indicators[i:i + self.api_batch_size]
# Build payload with distribution settings
distribution_levels = [0] # Own org
if self.include_community:
distribution_levels.extend([1, 2]) # Community and connected orgs
payload = { payload = {
"returnFormat": "json", "returnFormat": "json",
"value": value, "value": batch,
"type": search_type, "type": search_type,
# Include events from all organizations with proper distribution "enforceWarninglist": False,
"enforceWarninglist": False, # Don't filter known-good IPs "includeDecayScore": False,
"includeDecayScore": False, # Skip decay scores for speed "includeFullModel": False,
"includeFullModel": False, # Skip full model for speed "excludeDecayed": False,
"decayingModel": [], # No decaying model filters "distribution": distribution_levels,
"excludeDecayed": False, # Include older indicators "limit": 10000, # High limit for large datasets
# Distribution levels: 0=Own org, 1=Community, 2=Connected, 3=All, 5=Inherit
"distribution": [0, 1]
} }
response = requests.post( self.stats['api_calls_made'] += 1
f"{self.misp_url}/attributes/restSearch/", logger.info(f"Querying MISP for {len(batch)} {search_type} indicators (call #{self.stats['api_calls_made']})")
json=payload,
headers={"Authorization": self.misp_api_key},
verify=False,
timeout=30,
)
if response.status_code == 200: batch_results = self._make_misp_request(payload)
data = response.json()
attributes = data.get("response", {}).get("Attribute", [])
results.extend(attributes)
except Exception: # Group results by indicator value
continue for result in batch_results:
indicator_value = result.get("value", "").strip()
if indicator_value in batch:
results[indicator_value].append(result)
return results # Rate limiting courtesy pause
time.sleep(0.5)
def mark_event(self, event, result, attr): return dict(results)
"""Add MISP intelligence to event - FIXED to prevent duplicates."""
def extract_indicators_from_event(self, event: Any, attr: str, timesketch_attr: str) -> List[str]:
"""Extract indicators from a single event."""
loc = event.source.get(timesketch_attr)
if not loc:
return []
indicators = []
loc_str = str(loc)
if attr == "ip" and timesketch_attr == "message":
# Extract IPs from nginx access log messages
ip_matches = self.ip_pattern.findall(loc_str)
indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
elif attr == "ip" and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
if self._is_valid_ip(loc_str):
indicators = [loc_str]
elif attr in ["md5", "sha1", "sha256"]:
if self._is_valid_hash(loc_str, attr):
indicators = [loc_str]
elif attr == "filename":
filename = ntpath.basename(loc_str)
if filename and len(filename) > 3: # Meaningful filename
indicators = [filename]
return indicators
def mark_event_with_intel(self, event: Any, misp_results: List[Dict], attr: str) -> None:
"""Mark event with MISP intelligence."""
try: try:
# Check if event already marked
event_id = event.source.get('_id', '') event_id = event.source.get('_id', '')
if event_id in self.marked_events: if event_id in self.marked_events:
return return
self.marked_events.add(event_id) self.marked_events.add(event_id)
# Show organization info for cross-org awareness # Build comprehensive message
if attr == "ip": if attr == "ip":
msg = "MISP: Malicious IP detected - " msg = "MISP: Malicious IP detected"
else: else:
msg = "MISP: Known indicator - " msg = f"MISP: Known {attr.upper()} indicator"
# Collect unique events and organizations # Collect event and organization info
events_info = {} events_info = {}
orgs_info = set() orgs_info = set()
threat_levels = set()
for misp_attr in result: for misp_attr in misp_results:
event_info = misp_attr.get("Event", {}) event_info = misp_attr.get("Event", {})
event_id = event_info.get("id", "") event_id_misp = event_info.get("id", "")
event_desc = event_info.get("info", "Unknown") event_desc = event_info.get("info", "Unknown")
org_name = event_info.get("Orgc", {}).get("name", "Unknown Org") org_name = event_info.get("Orgc", {}).get("name", "Unknown")
threat_level = event_info.get("threat_level_id", "")
events_info[event_id] = f'"{event_desc}"' events_info[event_id_misp] = event_desc[:50] # Truncate long descriptions
orgs_info.add(org_name) orgs_info.add(org_name)
if threat_level:
threat_levels.add(threat_level)
# Build message with org info # Enhanced message with threat context
event_descriptions = list(events_info.values())[:2] # First 2 events event_descriptions = list(events_info.values())[:2]
msg += " | ".join(event_descriptions) if event_descriptions:
msg += f" | Events: {' | '.join(event_descriptions)}"
if len(result) > 2: if len(misp_results) > 2:
msg += f" | +{len(result)-2} more" msg += f" | +{len(misp_results)-2} more indicators"
# Add organization information # Organization information
if len(orgs_info) > 1: if len(orgs_info) > 1:
msg += f" | Orgs: {', '.join(list(orgs_info)[:3])}" msg += f" | Sources: {', '.join(list(orgs_info)[:3])}"
elif orgs_info: elif orgs_info and list(orgs_info)[0] != "Unknown":
org_name = list(orgs_info)[0] msg += f" | Source: {list(orgs_info)[0]}"
if org_name != "Unknown Org":
msg += f" | Org: {org_name}" # Threat level context
if threat_levels:
highest_threat = min(threat_levels) # Lower number = higher threat
threat_map = {"1": "HIGH", "2": "MEDIUM", "3": "LOW", "4": "UNDEFINED"}
msg += f" | Threat: {threat_map.get(str(highest_threat), 'UNKNOWN')}"
# Add tags and comment
tags = [f"MISP-{attr}", "threat-intel"]
if self.include_community and len(orgs_info) > 1:
tags.append("cross-org-intel")
event.add_comment(msg) event.add_comment(msg)
event.add_tags([f"MISP-{attr}", "threat-intel", "cross-org-intel"]) event.add_tags(tags)
event.commit() event.commit()
self.stats['events_marked'] += 1
except Exception as e: except Exception as e:
logger.error(f"Error marking event: {e}") logger.error(f"Error marking event {event_id}: {e}")
self.stats['errors'] += 1
def query_misp(self, query, attr, timesketch_attr): def process_events_chunk(self, events_chunk: List[Any], attr: str, timesketch_attr: str) -> None:
"""Extract indicators and query MISP.""" """Process a chunk of events efficiently."""
events = self.event_stream(query_string=query, return_fields=[timesketch_attr, '_id']) # Extract all indicators from the chunk
query_list = [] chunk_indicators = []
events_list = [] event_to_indicators = {}
processed = 0
# Extract indicators from events for event in events_chunk:
for event in events: indicators = self.extract_indicators_from_event(event, attr, timesketch_attr)
processed += 1 if indicators:
if processed > 5000: event_id = event.source.get('_id', '')
break event_to_indicators[event_id] = (event, indicators)
chunk_indicators.extend(indicators)
loc = event.source.get(timesketch_attr) # Remove duplicates while preserving order
if not loc: unique_indicators = list(dict.fromkeys(chunk_indicators))
continue new_indicators = [ind for ind in unique_indicators if ind not in self.processed_indicators]
events_list.append(event) if not new_indicators:
indicators = []
# Extract based on attribute type
if attr == "ip" and timesketch_attr == "message":
ip_matches = self.ip_pattern.findall(str(loc))
indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
elif attr == "ip" and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
if self._is_valid_ip(str(loc)):
indicators = [str(loc)]
elif attr in ["md5", "sha1", "sha256"]:
if self._is_valid_hash(str(loc), attr):
indicators = [str(loc)]
elif attr == "filename":
filename = ntpath.basename(str(loc))
if filename and len(filename) > 1:
indicators = [filename]
# Add valid indicators to query list
for indicator in indicators:
if indicator not in query_list:
query_list.append(indicator)
self.result_dict[f"{attr}:{indicator}"] = []
logger.info(f"Extracted {len(query_list)} {attr} indicators from {processed} events")
if not query_list:
return return
# Query MISP for each indicator logger.info(f"Processing {len(new_indicators)} new indicators from chunk of {len(events_chunk)} events")
for indicator in query_list:
result = self.query_misp_single(indicator, attr)
if result:
self.result_dict[f"{attr}:{indicator}"] = result
# Log organization diversity
orgs = set()
for r in result:
org = r.get("Event", {}).get("Orgc", {}).get("name", "Unknown")
orgs.add(org)
logger.info(f"MISP hit: {indicator} ({len(result)} indicators from {len(orgs)} orgs)")
# Mark matching events # Query MISP for new indicators
for event in events_list: misp_results = self.query_misp_batch(new_indicators, attr)
loc = event.source.get(timesketch_attr)
if not loc:
continue
# Check if event already processed # Update processed indicators and result cache
event_id = event.source.get('_id', '') self.processed_indicators.update(new_indicators)
for indicator, results in misp_results.items():
if results:
self.result_dict[f"{attr}:{indicator}"] = results
# Mark events that have matching indicators
for event_id, (event, indicators) in event_to_indicators.items():
if event_id in self.marked_events: if event_id in self.marked_events:
continue continue
# Re-extract indicators from this event matching_results = []
event_indicators = [] for indicator in indicators:
if attr == "ip" and timesketch_attr == "message":
ip_matches = self.ip_pattern.findall(str(loc))
event_indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
elif attr == "ip" and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
if self._is_valid_ip(str(loc)):
event_indicators = [str(loc)]
elif attr in ["md5", "sha1", "sha256"]:
if self._is_valid_hash(str(loc), attr):
event_indicators = [str(loc)]
elif attr == "filename":
filename = ntpath.basename(str(loc))
if filename:
event_indicators = [filename]
# Check if any indicator has MISP match
for indicator in event_indicators:
key = f"{attr}:{indicator}" key = f"{attr}:{indicator}"
if key in self.result_dict and self.result_dict[key]: if key in self.result_dict:
self.total_event_counter += 1 matching_results.extend(self.result_dict[key])
self.mark_event(event, self.result_dict[key], attr)
break # Only mark once per event
# Create view if we found matches if matching_results:
if self.total_event_counter > 0: self.mark_event_with_intel(event, matching_results, attr)
self.sketch.add_view(
view_name="MISP Cross-Org Threat Intel", def query_misp(self, query: str, attr: str, timesketch_attr: str) -> None:
analyzer_name=self.NAME, """Main processing function with chunked approach for large datasets."""
query_string='tag:"MISP-*" OR tag:"threat-intel" OR tag:"cross-org-intel"', logger.info(f"Starting MISP analysis for {attr} in {timesketch_attr}")
logger.info(f"Community querying: {'enabled' if self.include_community else 'disabled'}")
# Process events in chunks to manage memory
events_stream = self.event_stream(
query_string=query,
return_fields=[timesketch_attr, '_id', 'timestamp']
) )
def run(self): current_chunk = []
"""Entry point for the analyzer."""
if not self.misp_url or not self.misp_api_key:
return "No MISP configuration found"
try: try:
self.query_misp(self._query_string, self._attr, self._timesketch_attr) for event in events_stream:
return f"[{self._timesketch_attr}] MISP Match: {self.total_event_counter}" current_chunk.append(event)
self.stats['events_processed'] += 1
# Process chunk when it reaches the specified size
if len(current_chunk) >= self.chunk_size:
self.process_events_chunk(current_chunk, attr, timesketch_attr)
current_chunk = []
# Progress logging
if self.stats['events_processed'] % 10000 == 0:
logger.info(f"Progress: {self.stats['events_processed']} events processed, "
f"{self.stats['events_marked']} marked, "
f"{self.stats['api_calls_made']} API calls made")
# Process remaining events in the last chunk
if current_chunk:
self.process_events_chunk(current_chunk, attr, timesketch_attr)
except Exception as e: except Exception as e:
logger.error(f"MISP analyzer error: {e}") logger.error(f"Error during event processing: {e}")
self.stats['errors'] += 1
# Create comprehensive view if we found matches
if self.stats['events_marked'] > 0:
view_name = f"MISP Threat Intel - {attr.upper()}"
if self.include_community:
view_name += " (Cross-Org)"
self.sketch.add_view(
view_name=view_name,
analyzer_name=self.NAME,
query_string=f'tag:"MISP-{attr}" OR tag:"threat-intel"',
)
def run(self) -> str:
"""Entry point for the analyzer with comprehensive error handling."""
if not self.misp_url or not self.misp_api_key:
return "Error: No MISP configuration found"
start_time = time.time()
try:
logger.info(f"Starting MISP analyzer with config: "
f"batch_size={self.batch_size}, "
f"api_batch_size={self.api_batch_size}, "
f"chunk_size={self.chunk_size}, "
f"include_community={self.include_community}")
self.query_misp(self._query_string, self._attr, self._timesketch_attr)
elapsed_time = time.time() - start_time
# Comprehensive results summary
result_msg = (f"[{self._timesketch_attr}] MISP Analysis Complete: "
f"{self.stats['events_marked']}/{self.stats['events_processed']} events marked | "
f"{self.stats['api_calls_made']} API calls | "
f"{len(self.processed_indicators)} indicators processed | "
f"{elapsed_time:.1f}s")
if self.stats['errors'] > 0:
result_msg += f" | {self.stats['errors']} errors"
logger.info(result_msg)
return result_msg
except Exception as e:
logger.error(f"MISP analyzer critical error: {e}")
return f"[{self._timesketch_attr}] MISP Error: {str(e)}" return f"[{self._timesketch_attr}] MISP Error: {str(e)}"