541 lines
22 KiB
Python
541 lines
22 KiB
Python
"""Index analyzer plugin for MISP"""
|
|
|
|
import logging
|
|
import ntpath
|
|
import re
|
|
import requests
|
|
import time
|
|
import json
|
|
from collections import defaultdict
|
|
from typing import List, Dict, Set, Any, Optional
|
|
from urllib3.exceptions import InsecureRequestWarning
|
|
|
|
from flask import current_app
|
|
from timesketch.lib.analyzers import interface
|
|
from timesketch.lib.analyzers import manager
|
|
|
|
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
|
|
|
logger = logging.getLogger("timesketch.analyzers.misp")
|
|
|
|
|
|
class MispAnalyzer(interface.BaseAnalyzer):
|
|
"""Ultra-reliable MISP Analyzer for large-scale processing."""
|
|
|
|
NAME = "misp_analyzer"
|
|
DISPLAY_NAME = "MISP Enhanced"
|
|
DESCRIPTION = "Mark events using MISP with ultra-reliable large-scale support"
|
|
|
|
def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs):
|
|
"""Initialize the Analyzer."""
|
|
super().__init__(index_name, sketch_id, timeline_id=timeline_id)
|
|
self.misp_url = current_app.config.get("MISP_URL")
|
|
self.misp_api_key = current_app.config.get("MISP_API_KEY")
|
|
self.total_event_counter = 0
|
|
self.result_dict = {}
|
|
self._query_string = kwargs.get("query_string")
|
|
self._attr = kwargs.get("attr")
|
|
self._timesketch_attr = kwargs.get("timesketch_attr")
|
|
|
|
self.include_community = kwargs.get("include_community", True)
|
|
self.chunk_size = kwargs.get("chunk_size", 500)
|
|
self.max_retries = kwargs.get("max_retries", 5)
|
|
self.base_timeout = kwargs.get("base_timeout", 30)
|
|
self.max_timeout = kwargs.get("max_timeout", 180)
|
|
self.request_delay = kwargs.get("request_delay", 1.0)
|
|
self.max_indicators_per_batch = kwargs.get("max_indicators_per_batch", 10)
|
|
|
|
self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
|
|
|
|
# Tracking sets
|
|
self.marked_events = set()
|
|
self.processed_indicators = set()
|
|
self.failed_indicators = set()
|
|
|
|
self.stats = {
|
|
'events_processed': 0,
|
|
'indicators_extracted': 0,
|
|
'api_calls_successful': 0,
|
|
'api_calls_failed': 0,
|
|
'events_marked': 0,
|
|
'total_matches': 0,
|
|
'timeouts': 0,
|
|
'retries': 0
|
|
}
|
|
|
|
# Session for connection reuse
|
|
self.session = requests.Session()
|
|
self.session.verify = False
|
|
self.session.headers.update({
|
|
"Authorization": self.misp_api_key,
|
|
"Content-Type": "application/json",
|
|
"User-Agent": "Timesketch-MISP-Analyzer/1.0"
|
|
})
|
|
|
|
@staticmethod
|
|
def get_kwargs():
|
|
"""Get kwargs for the analyzer with ultra-reliable settings."""
|
|
base_config = {
|
|
"include_community": True,
|
|
"chunk_size": 500,
|
|
"max_retries": 5,
|
|
"base_timeout": 30,
|
|
"request_delay": 1.0,
|
|
"max_indicators_per_batch": 10,
|
|
}
|
|
|
|
to_query = [
|
|
{
|
|
"query_string": "md5_hash:*",
|
|
"attr": "md5",
|
|
"timesketch_attr": "md5_hash",
|
|
**base_config
|
|
},
|
|
{
|
|
"query_string": "sha1_hash:*",
|
|
"attr": "sha1",
|
|
"timesketch_attr": "sha1_hash",
|
|
**base_config
|
|
},
|
|
{
|
|
"query_string": "sha256_hash:*",
|
|
"attr": "sha256",
|
|
"timesketch_attr": "sha256_hash",
|
|
**base_config
|
|
},
|
|
{
|
|
"query_string": "filename:*",
|
|
"attr": "filename",
|
|
"timesketch_attr": "filename",
|
|
**base_config
|
|
},
|
|
{
|
|
"query_string": "message:*",
|
|
"attr": "ip",
|
|
"timesketch_attr": "message",
|
|
**base_config
|
|
},
|
|
{
|
|
"query_string": "source_ip:* OR src_ip:* OR client_ip:*",
|
|
"attr": "ip",
|
|
"timesketch_attr": "source_ip",
|
|
**base_config
|
|
},
|
|
]
|
|
return to_query
|
|
|
|
def _is_valid_ip(self, ip_str: str) -> bool:
|
|
"""Enhanced IP validation for nginx logs."""
|
|
try:
|
|
import ipaddress
|
|
ip_str = ip_str.strip()
|
|
|
|
# Basic format check first
|
|
if not re.match(r'^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$', ip_str):
|
|
return False
|
|
|
|
ip_obj = ipaddress.ip_address(ip_str)
|
|
|
|
# Filter out non-routable addresses
|
|
if (ip_obj.is_private or ip_obj.is_loopback or
|
|
ip_obj.is_multicast or ip_obj.is_reserved or
|
|
ip_obj.is_link_local or ip_obj.is_unspecified):
|
|
return False
|
|
|
|
# Nginx-specific filters
|
|
if (ip_str.startswith(('0.', '10.', '172.', '192.168.', '127.', '169.254.', '224.')) or
|
|
ip_str in ['255.255.255.255', '0.0.0.0']):
|
|
return False
|
|
|
|
return True
|
|
except (ValueError, AttributeError, TypeError):
|
|
return False
|
|
|
|
def _is_valid_hash(self, hash_str: str, hash_type: str) -> bool:
|
|
"""Validate hash format with strict checking."""
|
|
if not hash_str or not isinstance(hash_str, str):
|
|
return False
|
|
|
|
hash_str = hash_str.strip().lower()
|
|
|
|
# Check for obvious non-hash patterns
|
|
if not hash_str or hash_str in ['null', 'none', '0', '-']:
|
|
return False
|
|
|
|
hash_lengths = {"md5": 32, "sha1": 40, "sha256": 64}
|
|
expected_length = hash_lengths.get(hash_type)
|
|
|
|
if not expected_length or len(hash_str) != expected_length:
|
|
return False
|
|
|
|
return all(c in '0123456789abcdef' for c in hash_str)
|
|
|
|
def _calculate_payload_size(self, payload: Dict[str, Any]) -> int:
|
|
"""Calculate approximate payload size in bytes."""
|
|
try:
|
|
return len(json.dumps(payload).encode('utf-8'))
|
|
except:
|
|
return 0
|
|
|
|
def _make_misp_request_single(self, indicator: str, attr_type: str, retry_count: int = 0) -> List[Dict]:
|
|
"""Make single indicator MISP request with progressive timeout."""
|
|
timeout = min(self.base_timeout + (retry_count * 10), self.max_timeout)
|
|
|
|
# Determine search types
|
|
search_types = ["ip-src", "ip-dst"] if attr_type == "ip" else [attr_type]
|
|
results = []
|
|
|
|
for search_type in search_types:
|
|
try:
|
|
# Build minimal payload
|
|
distribution_levels = [0] # Start with own org only
|
|
if self.include_community:
|
|
distribution_levels.extend([1, 2])
|
|
|
|
payload = {
|
|
"returnFormat": "json",
|
|
"value": indicator,
|
|
"type": search_type,
|
|
"enforceWarninglist": False,
|
|
"includeDecayScore": False,
|
|
"includeFullModel": False,
|
|
"distribution": distribution_levels,
|
|
"limit": 100, # Conservative limit
|
|
}
|
|
|
|
payload_size = self._calculate_payload_size(payload)
|
|
logger.debug(f"Querying {indicator} ({search_type}) - payload: {payload_size} bytes, timeout: {timeout}s")
|
|
|
|
response = self.session.post(
|
|
f"{self.misp_url}/attributes/restSearch/",
|
|
json=payload,
|
|
timeout=timeout,
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
attributes = data.get("response", {}).get("Attribute", [])
|
|
results.extend(attributes)
|
|
self.stats['api_calls_successful'] += 1
|
|
|
|
elif response.status_code == 429: # Rate limited
|
|
wait_time = min(5 * (retry_count + 1), 30)
|
|
logger.warning(f"Rate limited for {indicator}, waiting {wait_time}s")
|
|
time.sleep(wait_time)
|
|
raise requests.exceptions.RequestException("Rate limited")
|
|
|
|
elif response.status_code >= 500: # Server error
|
|
logger.warning(f"Server error {response.status_code} for {indicator}")
|
|
if retry_count < self.max_retries:
|
|
raise requests.exceptions.RequestException(f"Server error {response.status_code}")
|
|
|
|
else:
|
|
logger.debug(f"No results for {indicator} ({search_type}): status {response.status_code}")
|
|
|
|
# Delay between search types
|
|
time.sleep(0.2)
|
|
|
|
except (requests.exceptions.Timeout, TimeoutError) as e:
|
|
self.stats['timeouts'] += 1
|
|
if retry_count < self.max_retries:
|
|
wait_time = min(2 ** retry_count, 30)
|
|
logger.warning(f"Timeout for {indicator} (attempt {retry_count + 1}/{self.max_retries}), retrying in {wait_time}s")
|
|
time.sleep(wait_time)
|
|
self.stats['retries'] += 1
|
|
return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
|
|
else:
|
|
logger.error(f"Max retries exceeded for {indicator}: {e}")
|
|
self.stats['api_calls_failed'] += 1
|
|
self.failed_indicators.add(indicator)
|
|
return []
|
|
|
|
except requests.exceptions.ConnectionError as e:
|
|
self.stats['api_calls_failed'] += 1
|
|
if retry_count < self.max_retries:
|
|
wait_time = min(5 * (retry_count + 1), 60)
|
|
logger.warning(f"Connection error for {indicator} (attempt {retry_count + 1}), retrying in {wait_time}s: {e}")
|
|
time.sleep(wait_time)
|
|
self.stats['retries'] += 1
|
|
return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
|
|
else:
|
|
logger.error(f"Connection failed permanently for {indicator}: {e}")
|
|
self.failed_indicators.add(indicator)
|
|
return []
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error querying {indicator}: {e}")
|
|
self.stats['api_calls_failed'] += 1
|
|
return []
|
|
|
|
return results
|
|
|
|
def extract_indicators_from_event(self, event: Any, attr: str, timesketch_attr: str) -> List[str]:
|
|
"""Extract and validate indicators from event."""
|
|
try:
|
|
loc = event.source.get(timesketch_attr)
|
|
if not loc:
|
|
return []
|
|
|
|
indicators = []
|
|
loc_str = str(loc)
|
|
|
|
if attr == "ip":
|
|
if timesketch_attr == "message":
|
|
# Extract IPs from nginx access log messages
|
|
ip_matches = self.ip_pattern.findall(loc_str)
|
|
indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
|
|
elif timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
|
|
if self._is_valid_ip(loc_str):
|
|
indicators = [loc_str]
|
|
|
|
elif attr in ["md5", "sha1", "sha256"]:
|
|
if self._is_valid_hash(loc_str, attr):
|
|
indicators = [loc_str.lower().strip()]
|
|
|
|
elif attr == "filename":
|
|
filename = ntpath.basename(loc_str).strip()
|
|
if filename and len(filename) > 3 and '.' in filename:
|
|
indicators = [filename]
|
|
|
|
return indicators
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Error extracting indicators from event: {e}")
|
|
return []
|
|
|
|
def mark_event_with_intelligence(self, event: Any, misp_results: List[Dict], attr: str) -> None:
|
|
"""Mark event with MISP intelligence information."""
|
|
try:
|
|
event_id = event.source.get('_id', '')
|
|
if event_id in self.marked_events:
|
|
return
|
|
|
|
self.marked_events.add(event_id)
|
|
|
|
# Build intelligence message
|
|
if attr == "ip":
|
|
msg_prefix = "MISP: Threat IP"
|
|
elif attr in ["md5", "sha1", "sha256"]:
|
|
msg_prefix = f"MISP: Malicious {attr.upper()}"
|
|
else:
|
|
msg_prefix = f"MISP: Known {attr.upper()}"
|
|
|
|
# Extract key information
|
|
events_info = []
|
|
orgs_info = set()
|
|
threat_levels = []
|
|
|
|
for misp_attr in misp_results[:3]: # Limit to first 3 for message clarity
|
|
event_info = misp_attr.get("Event", {})
|
|
event_desc = event_info.get("info", "Unknown")[:40] # Truncate
|
|
org_name = event_info.get("Orgc", {}).get("name", "Unknown")
|
|
threat_level = event_info.get("threat_level_id")
|
|
|
|
if event_desc and event_desc != "Unknown":
|
|
events_info.append(event_desc)
|
|
if org_name and org_name != "Unknown":
|
|
orgs_info.add(org_name)
|
|
if threat_level:
|
|
threat_levels.append(int(threat_level))
|
|
|
|
# Build comprehensive message
|
|
msg_parts = [msg_prefix]
|
|
|
|
if events_info:
|
|
msg_parts.append(f"Events: {' | '.join(events_info[:2])}")
|
|
|
|
if len(misp_results) > 3:
|
|
msg_parts.append(f"({len(misp_results)} total matches)")
|
|
|
|
if len(orgs_info) > 1:
|
|
msg_parts.append(f"Sources: {', '.join(list(orgs_info)[:2])}")
|
|
elif orgs_info and list(orgs_info)[0] != "Unknown":
|
|
msg_parts.append(f"Source: {list(orgs_info)[0]}")
|
|
|
|
if threat_levels:
|
|
min_threat = min(threat_levels) # Lower = higher threat
|
|
threat_names = {1: "HIGH", 2: "MEDIUM", 3: "LOW", 4: "UNDEFINED"}
|
|
msg_parts.append(f"Threat: {threat_names.get(min_threat, 'UNKNOWN')}")
|
|
|
|
final_message = " | ".join(msg_parts)
|
|
|
|
# Add tags
|
|
tags = [f"MISP-{attr}", "threat-intel"]
|
|
if self.include_community and len(orgs_info) > 1:
|
|
tags.append("cross-org-intel")
|
|
|
|
event.add_comment(final_message)
|
|
event.add_tags(tags)
|
|
event.commit()
|
|
|
|
self.stats['events_marked'] += 1
|
|
self.stats['total_matches'] += len(misp_results)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error marking event: {e}")
|
|
|
|
def process_indicators_batch(self, indicators: List[str], attr: str) -> Dict[str, List[Dict]]:
|
|
"""Process indicators with careful rate limiting."""
|
|
results = {}
|
|
|
|
for i, indicator in enumerate(indicators):
|
|
if indicator in self.failed_indicators:
|
|
continue
|
|
|
|
logger.debug(f"Processing indicator {i+1}/{len(indicators)}: {indicator}")
|
|
|
|
misp_results = self._make_misp_request_single(indicator, attr)
|
|
|
|
if misp_results:
|
|
results[indicator] = misp_results
|
|
logger.info(f"MISP hit: {indicator} ({len(misp_results)} matches)")
|
|
|
|
# Rate limiting between requests
|
|
time.sleep(self.request_delay)
|
|
|
|
# Progress update every 50 indicators
|
|
if (i + 1) % 50 == 0:
|
|
logger.info(f"Processed {i+1}/{len(indicators)} indicators, "
|
|
f"{len(results)} hits, "
|
|
f"{self.stats['timeouts']} timeouts, "
|
|
f"{self.stats['api_calls_failed']} failures")
|
|
|
|
return results
|
|
|
|
def query_misp(self, query: str, attr: str, timesketch_attr: str) -> None:
|
|
"""Main processing with ultra-reliable chunked approach."""
|
|
logger.info(f"Starting ultra-reliable MISP analysis for {attr} in {timesketch_attr}")
|
|
logger.info(f"Configuration: chunk_size={self.chunk_size}, "
|
|
f"max_retries={self.max_retries}, "
|
|
f"request_delay={self.request_delay}s, "
|
|
f"include_community={self.include_community}")
|
|
|
|
# Process events in chunks
|
|
events_stream = self.event_stream(
|
|
query_string=query,
|
|
return_fields=[timesketch_attr, '_id', 'timestamp']
|
|
)
|
|
|
|
current_chunk = []
|
|
|
|
try:
|
|
for event in events_stream:
|
|
current_chunk.append(event)
|
|
self.stats['events_processed'] += 1
|
|
|
|
# Process when chunk is full
|
|
if len(current_chunk) >= self.chunk_size:
|
|
self._process_events_chunk(current_chunk, attr, timesketch_attr)
|
|
current_chunk = []
|
|
|
|
# Progress logging
|
|
if self.stats['events_processed'] % 5000 == 0:
|
|
success_rate = (self.stats['api_calls_successful'] /
|
|
max(1, self.stats['api_calls_successful'] + self.stats['api_calls_failed']) * 100)
|
|
logger.info(f"Progress: {self.stats['events_processed']} events, "
|
|
f"{self.stats['events_marked']} marked, "
|
|
f"{len(self.processed_indicators)} indicators processed, "
|
|
f"{success_rate:.1f}% API success rate")
|
|
|
|
# Process final chunk
|
|
if current_chunk:
|
|
self._process_events_chunk(current_chunk, attr, timesketch_attr)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Critical error during processing: {e}")
|
|
raise
|
|
|
|
def _process_events_chunk(self, events_chunk: List[Any], attr: str, timesketch_attr: str) -> None:
|
|
"""Process a chunk of events with indicator extraction and MISP queries."""
|
|
# Extract all unique indicators from chunk
|
|
chunk_indicators = []
|
|
event_to_indicators = {}
|
|
|
|
for event in events_chunk:
|
|
indicators = self.extract_indicators_from_event(event, attr, timesketch_attr)
|
|
if indicators:
|
|
event_id = event.source.get('_id', '')
|
|
event_to_indicators[event_id] = (event, indicators)
|
|
chunk_indicators.extend(indicators)
|
|
|
|
# Get unique new indicators
|
|
unique_indicators = list(dict.fromkeys(chunk_indicators))
|
|
new_indicators = [ind for ind in unique_indicators
|
|
if ind not in self.processed_indicators and ind not in self.failed_indicators]
|
|
|
|
if not new_indicators:
|
|
return
|
|
|
|
logger.info(f"Processing {len(new_indicators)} new {attr} indicators from {len(events_chunk)} events")
|
|
self.stats['indicators_extracted'] += len(new_indicators)
|
|
|
|
# Query MISP for new indicators
|
|
misp_results = self.process_indicators_batch(new_indicators, attr)
|
|
|
|
# Update cache
|
|
self.processed_indicators.update(new_indicators)
|
|
for indicator, results in misp_results.items():
|
|
self.result_dict[f"{attr}:{indicator}"] = results
|
|
|
|
# Mark matching events
|
|
for event_id, (event, indicators) in event_to_indicators.items():
|
|
if event_id in self.marked_events:
|
|
continue
|
|
|
|
matching_results = []
|
|
for indicator in indicators:
|
|
key = f"{attr}:{indicator}"
|
|
if key in self.result_dict:
|
|
matching_results.extend(self.result_dict[key])
|
|
|
|
if matching_results:
|
|
self.mark_event_with_intelligence(event, matching_results, attr)
|
|
|
|
def run(self) -> str:
|
|
"""Entry point with comprehensive error handling and reporting."""
|
|
if not self.misp_url or not self.misp_api_key:
|
|
return "Error: MISP configuration missing"
|
|
|
|
start_time = time.time()
|
|
|
|
try:
|
|
self.query_misp(self._query_string, self._attr, self._timesketch_attr)
|
|
|
|
# Create view for matches
|
|
if self.stats['events_marked'] > 0:
|
|
view_name = f"MISP {self._attr.upper()} Threats"
|
|
if self.include_community:
|
|
view_name += " (Cross-Org)"
|
|
|
|
self.sketch.add_view(
|
|
view_name=view_name,
|
|
analyzer_name=self.NAME,
|
|
query_string=f'tag:"MISP-{self._attr}" OR tag:"threat-intel"',
|
|
)
|
|
|
|
# Comprehensive results
|
|
elapsed = time.time() - start_time
|
|
total_api_calls = self.stats['api_calls_successful'] + self.stats['api_calls_failed']
|
|
success_rate = (self.stats['api_calls_successful'] / max(1, total_api_calls)) * 100
|
|
|
|
result = (f"[{self._timesketch_attr}] MISP Analysis: "
|
|
f"{self.stats['events_marked']}/{self.stats['events_processed']} events marked | "
|
|
f"{len(self.processed_indicators)} indicators processed | "
|
|
f"{total_api_calls} API calls ({success_rate:.1f}% success) | "
|
|
f"{self.stats['timeouts']} timeouts | "
|
|
f"{elapsed:.0f}s")
|
|
|
|
logger.info(result)
|
|
return result
|
|
|
|
except Exception as e:
|
|
logger.error(f"MISP analyzer failed: {e}")
|
|
return f"[{self._timesketch_attr}] MISP Error: {str(e)}"
|
|
finally:
|
|
try:
|
|
self.session.close()
|
|
except:
|
|
pass
|
|
|
|
|
|
manager.AnalysisManager.register_analyzer(MispAnalyzer) |