misp_analyzer.py aktualisiert
This commit is contained in:
		
							parent
							
								
									59ea0dd658
								
							
						
					
					
						commit
						f52710cfb6
					
				
							
								
								
									
										592
									
								
								misp_analyzer.py
									
									
									
									
									
								
							
							
						
						
									
										592
									
								
								misp_analyzer.py
									
									
									
									
									
								
							@ -1,30 +1,25 @@
 | 
			
		||||
"""Index analyzer plugin for MISP"""
 | 
			
		||||
"""Index analyzer plugin for MISP - Simple and reliable for large-scale processing."""
 | 
			
		||||
 | 
			
		||||
import logging
 | 
			
		||||
import ntpath
 | 
			
		||||
import re
 | 
			
		||||
import requests
 | 
			
		||||
import time
 | 
			
		||||
import json
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
from typing import List, Dict, Set, Any, Optional
 | 
			
		||||
from urllib3.exceptions import InsecureRequestWarning
 | 
			
		||||
 | 
			
		||||
from flask import current_app
 | 
			
		||||
from timesketch.lib.analyzers import interface
 | 
			
		||||
from timesketch.lib.analyzers import manager
 | 
			
		||||
 | 
			
		||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger("timesketch.analyzers.misp")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MispAnalyzer(interface.BaseAnalyzer):
 | 
			
		||||
    """Ultra-reliable MISP Analyzer for large-scale processing."""
 | 
			
		||||
    """Simple, reliable MISP Analyzer for large-scale processing."""
 | 
			
		||||
 | 
			
		||||
    NAME = "misp_analyzer"
 | 
			
		||||
    DISPLAY_NAME = "MISP Enhanced"
 | 
			
		||||
    DESCRIPTION = "Mark events using MISP with ultra-reliable large-scale support"
 | 
			
		||||
    DISPLAY_NAME = "MISP"
 | 
			
		||||
    DESCRIPTION = "Mark events using MISP - Simple and Reliable"
 | 
			
		||||
 | 
			
		||||
    def __init__(self, index_name, sketch_id, timeline_id=None, **kwargs):
 | 
			
		||||
        """Initialize the Analyzer."""
 | 
			
		||||
@ -37,383 +32,271 @@ class MispAnalyzer(interface.BaseAnalyzer):
 | 
			
		||||
        self._attr = kwargs.get("attr")
 | 
			
		||||
        self._timesketch_attr = kwargs.get("timesketch_attr")
 | 
			
		||||
        
 | 
			
		||||
        self.include_community = kwargs.get("include_community", True)
 | 
			
		||||
        self.chunk_size = kwargs.get("chunk_size", 500) 
 | 
			
		||||
        self.max_retries = kwargs.get("max_retries", 5) 
 | 
			
		||||
        self.base_timeout = kwargs.get("base_timeout", 30) 
 | 
			
		||||
        self.max_timeout = kwargs.get("max_timeout", 180)  
 | 
			
		||||
        self.request_delay = kwargs.get("request_delay", 1.0)  
 | 
			
		||||
        self.max_indicators_per_batch = kwargs.get("max_indicators_per_batch", 10) 
 | 
			
		||||
        # Simple configuration for reliability
 | 
			
		||||
        self.include_community = kwargs.get("include_community", False)  # Default to false for reliability
 | 
			
		||||
        self.chunk_size = kwargs.get("chunk_size", 1000)  # Process in chunks
 | 
			
		||||
        self.max_retries = kwargs.get("max_retries", 2)  # Minimal retries
 | 
			
		||||
        self.request_delay = kwargs.get("request_delay", 0.5)  # Small delay between requests
 | 
			
		||||
        
 | 
			
		||||
        self.ip_pattern = re.compile(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b')
 | 
			
		||||
        
 | 
			
		||||
        # Tracking sets
 | 
			
		||||
        self.marked_events = set()
 | 
			
		||||
        # Track processed items
 | 
			
		||||
        self.processed_indicators = set()
 | 
			
		||||
        self.failed_indicators = set()
 | 
			
		||||
        
 | 
			
		||||
        # Simple stats
 | 
			
		||||
        self.stats = {
 | 
			
		||||
            'events_processed': 0,
 | 
			
		||||
            'indicators_extracted': 0,
 | 
			
		||||
            'api_calls_successful': 0,
 | 
			
		||||
            'api_calls_failed': 0,
 | 
			
		||||
            'events_marked': 0,
 | 
			
		||||
            'total_matches': 0,
 | 
			
		||||
            'timeouts': 0,
 | 
			
		||||
            'retries': 0
 | 
			
		||||
            'indicators_found': 0,
 | 
			
		||||
            'api_calls': 0,
 | 
			
		||||
            'api_timeouts': 0,
 | 
			
		||||
            'events_marked': 0
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        # Session for connection reuse
 | 
			
		||||
        self.session = requests.Session()
 | 
			
		||||
        self.session.verify = False
 | 
			
		||||
        self.session.headers.update({
 | 
			
		||||
            "Authorization": self.misp_api_key,
 | 
			
		||||
            "Content-Type": "application/json",
 | 
			
		||||
            "User-Agent": "Timesketch-MISP-Analyzer/1.0"
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def get_kwargs():
 | 
			
		||||
        """Get kwargs for the analyzer with ultra-reliable settings."""
 | 
			
		||||
        base_config = {
 | 
			
		||||
            "include_community": True,
 | 
			
		||||
            "chunk_size": 500,
 | 
			
		||||
            "max_retries": 5,
 | 
			
		||||
            "base_timeout": 30,
 | 
			
		||||
            "request_delay": 1.0,
 | 
			
		||||
            "max_indicators_per_batch": 10,
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
        """Get kwargs for the analyzer - keeping original working structure."""
 | 
			
		||||
        to_query = [
 | 
			
		||||
            {
 | 
			
		||||
                "query_string": "md5_hash:*",
 | 
			
		||||
                "attr": "md5",
 | 
			
		||||
                "timesketch_attr": "md5_hash",
 | 
			
		||||
                **base_config
 | 
			
		||||
                "include_community": False,  # Start with own org only
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "query_string": "sha1_hash:*",
 | 
			
		||||
                "attr": "sha1",
 | 
			
		||||
                "timesketch_attr": "sha1_hash",
 | 
			
		||||
                **base_config
 | 
			
		||||
                "include_community": False,
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "query_string": "sha256_hash:*",
 | 
			
		||||
                "attr": "sha256",
 | 
			
		||||
                "timesketch_attr": "sha256_hash",
 | 
			
		||||
                **base_config
 | 
			
		||||
                "include_community": False,
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "query_string": "filename:*",
 | 
			
		||||
                "attr": "filename",
 | 
			
		||||
                "timesketch_attr": "filename",
 | 
			
		||||
                **base_config
 | 
			
		||||
                "include_community": False,
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "query_string": "message:*",
 | 
			
		||||
                "attr": "ip",
 | 
			
		||||
                "attr": "ip-src",
 | 
			
		||||
                "timesketch_attr": "message",
 | 
			
		||||
                **base_config
 | 
			
		||||
                "include_community": False,
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "query_string": "source_ip:* OR src_ip:* OR client_ip:*",
 | 
			
		||||
                "attr": "ip",
 | 
			
		||||
                "query_string": "message:*",
 | 
			
		||||
                "attr": "ip-dst", 
 | 
			
		||||
                "timesketch_attr": "message",
 | 
			
		||||
                "include_community": False,
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "query_string": "source_ip:*",
 | 
			
		||||
                "attr": "ip-src",
 | 
			
		||||
                "timesketch_attr": "source_ip",
 | 
			
		||||
                **base_config
 | 
			
		||||
                "include_community": False,
 | 
			
		||||
            },
 | 
			
		||||
        ]
 | 
			
		||||
        return to_query
 | 
			
		||||
 | 
			
		||||
    def _is_valid_ip(self, ip_str: str) -> bool:
 | 
			
		||||
        """Enhanced IP validation for nginx logs."""
 | 
			
		||||
    def _is_valid_ip(self, ip_str):
 | 
			
		||||
        """Simple IP validation - keeping original working version."""
 | 
			
		||||
        try:
 | 
			
		||||
            import ipaddress
 | 
			
		||||
            ip_str = ip_str.strip()
 | 
			
		||||
            
 | 
			
		||||
            # Basic format check first
 | 
			
		||||
            if not re.match(r'^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$', ip_str):
 | 
			
		||||
            ipaddress.ip_address(ip_str)
 | 
			
		||||
            # Filter out invalid ranges
 | 
			
		||||
            if ip_str.startswith(('0.', '127.', '255.255.255.255', '10.', '192.168.', '172.')):
 | 
			
		||||
                return False
 | 
			
		||||
            
 | 
			
		||||
            ip_obj = ipaddress.ip_address(ip_str)
 | 
			
		||||
            
 | 
			
		||||
            # Filter out non-routable addresses
 | 
			
		||||
            if (ip_obj.is_private or ip_obj.is_loopback or 
 | 
			
		||||
                ip_obj.is_multicast or ip_obj.is_reserved or
 | 
			
		||||
                ip_obj.is_link_local or ip_obj.is_unspecified):
 | 
			
		||||
                return False
 | 
			
		||||
                
 | 
			
		||||
            # Nginx-specific filters
 | 
			
		||||
            if (ip_str.startswith(('0.', '10.', '172.', '192.168.', '127.', '169.254.', '224.')) or
 | 
			
		||||
                ip_str in ['255.255.255.255', '0.0.0.0']):
 | 
			
		||||
                return False
 | 
			
		||||
                
 | 
			
		||||
            return True
 | 
			
		||||
        except (ValueError, AttributeError, TypeError):
 | 
			
		||||
        except (ValueError, AttributeError):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
    def _is_valid_hash(self, hash_str: str, hash_type: str) -> bool:
 | 
			
		||||
        """Validate hash format with strict checking."""
 | 
			
		||||
        if not hash_str or not isinstance(hash_str, str):
 | 
			
		||||
    def _is_valid_hash(self, hash_str, hash_type):
 | 
			
		||||
        """Simple hash validation - keeping original working version."""
 | 
			
		||||
        if not hash_str:
 | 
			
		||||
            return False
 | 
			
		||||
            
 | 
			
		||||
        hash_str = hash_str.strip().lower()
 | 
			
		||||
        
 | 
			
		||||
        # Check for obvious non-hash patterns
 | 
			
		||||
        if not hash_str or hash_str in ['null', 'none', '0', '-']:
 | 
			
		||||
            return False
 | 
			
		||||
        if hash_type == "md5":
 | 
			
		||||
            return len(hash_str) == 32 and all(c in '0123456789abcdef' for c in hash_str)
 | 
			
		||||
        elif hash_type == "sha1":
 | 
			
		||||
            return len(hash_str) == 40 and all(c in '0123456789abcdef' for c in hash_str)
 | 
			
		||||
        elif hash_type == "sha256":
 | 
			
		||||
            return len(hash_str) == 64 and all(c in '0123456789abcdef' for c in hash_str)
 | 
			
		||||
        
 | 
			
		||||
        hash_lengths = {"md5": 32, "sha1": 40, "sha256": 64}
 | 
			
		||||
        expected_length = hash_lengths.get(hash_type)
 | 
			
		||||
        
 | 
			
		||||
        if not expected_length or len(hash_str) != expected_length:
 | 
			
		||||
            return False
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def query_misp_single(self, value, attr, retry_count=0):
 | 
			
		||||
        """Query MISP for a single value - enhanced with minimal retry logic."""
 | 
			
		||||
        if value in self.failed_indicators:
 | 
			
		||||
            return []
 | 
			
		||||
            
 | 
			
		||||
        return all(c in '0123456789abcdef' for c in hash_str)
 | 
			
		||||
 | 
			
		||||
    def _calculate_payload_size(self, payload: Dict[str, Any]) -> int:
 | 
			
		||||
        """Calculate approximate payload size in bytes."""
 | 
			
		||||
        try:
 | 
			
		||||
            return len(json.dumps(payload).encode('utf-8'))
 | 
			
		||||
        except:
 | 
			
		||||
            return 0
 | 
			
		||||
            # Build basic payload
 | 
			
		||||
            payload = {"returnFormat": "json", "value": value, "type": attr}
 | 
			
		||||
            
 | 
			
		||||
            # Add community search if enabled
 | 
			
		||||
            if self.include_community:
 | 
			
		||||
                payload["distribution"] = [0, 1, 2]  # Own, community, connected
 | 
			
		||||
            
 | 
			
		||||
            self.stats['api_calls'] += 1
 | 
			
		||||
            
 | 
			
		||||
            response = requests.post(
 | 
			
		||||
                f"{self.misp_url}/attributes/restSearch/",
 | 
			
		||||
                json=payload,
 | 
			
		||||
                headers={"Authorization": self.misp_api_key},
 | 
			
		||||
                verify=False,
 | 
			
		||||
                timeout=45,  # Slightly increased from original 30s
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    def _make_misp_request_single(self, indicator: str, attr_type: str, retry_count: int = 0) -> List[Dict]:
 | 
			
		||||
        """Make single indicator MISP request with progressive timeout."""
 | 
			
		||||
        timeout = min(self.base_timeout + (retry_count * 10), self.max_timeout)
 | 
			
		||||
        
 | 
			
		||||
        # Determine search types
 | 
			
		||||
        search_types = ["ip-src", "ip-dst"] if attr_type == "ip" else [attr_type]
 | 
			
		||||
        results = []
 | 
			
		||||
        
 | 
			
		||||
        for search_type in search_types:
 | 
			
		||||
            try:
 | 
			
		||||
                # Build minimal payload
 | 
			
		||||
                distribution_levels = [0]  # Start with own org only
 | 
			
		||||
                if self.include_community:
 | 
			
		||||
                    distribution_levels.extend([1, 2])
 | 
			
		||||
                
 | 
			
		||||
                payload = {
 | 
			
		||||
                    "returnFormat": "json",
 | 
			
		||||
                    "value": indicator,
 | 
			
		||||
                    "type": search_type,
 | 
			
		||||
                    "enforceWarninglist": False,
 | 
			
		||||
                    "includeDecayScore": False,
 | 
			
		||||
                    "includeFullModel": False,
 | 
			
		||||
                    "distribution": distribution_levels,
 | 
			
		||||
                    "limit": 100,  # Conservative limit
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
                payload_size = self._calculate_payload_size(payload)
 | 
			
		||||
                logger.debug(f"Querying {indicator} ({search_type}) - payload: {payload_size} bytes, timeout: {timeout}s")
 | 
			
		||||
                
 | 
			
		||||
                response = self.session.post(
 | 
			
		||||
                    f"{self.misp_url}/attributes/restSearch/",
 | 
			
		||||
                    json=payload,
 | 
			
		||||
                    timeout=timeout,
 | 
			
		||||
                )
 | 
			
		||||
                
 | 
			
		||||
                if response.status_code == 200:
 | 
			
		||||
                    data = response.json()
 | 
			
		||||
                    attributes = data.get("response", {}).get("Attribute", [])
 | 
			
		||||
                    results.extend(attributes)
 | 
			
		||||
                    self.stats['api_calls_successful'] += 1
 | 
			
		||||
                    
 | 
			
		||||
                elif response.status_code == 429:  # Rate limited
 | 
			
		||||
                    wait_time = min(5 * (retry_count + 1), 30)
 | 
			
		||||
                    logger.warning(f"Rate limited for {indicator}, waiting {wait_time}s")
 | 
			
		||||
                    time.sleep(wait_time)
 | 
			
		||||
                    raise requests.exceptions.RequestException("Rate limited")
 | 
			
		||||
                    
 | 
			
		||||
                elif response.status_code >= 500:  # Server error
 | 
			
		||||
                    logger.warning(f"Server error {response.status_code} for {indicator}")
 | 
			
		||||
                    if retry_count < self.max_retries:
 | 
			
		||||
                        raise requests.exceptions.RequestException(f"Server error {response.status_code}")
 | 
			
		||||
                    
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.debug(f"No results for {indicator} ({search_type}): status {response.status_code}")
 | 
			
		||||
                
 | 
			
		||||
                # Delay between search types
 | 
			
		||||
                time.sleep(0.2)
 | 
			
		||||
                
 | 
			
		||||
            except (requests.exceptions.Timeout, TimeoutError) as e:
 | 
			
		||||
                self.stats['timeouts'] += 1
 | 
			
		||||
                if retry_count < self.max_retries:
 | 
			
		||||
                    wait_time = min(2 ** retry_count, 30)
 | 
			
		||||
                    logger.warning(f"Timeout for {indicator} (attempt {retry_count + 1}/{self.max_retries}), retrying in {wait_time}s")
 | 
			
		||||
                    time.sleep(wait_time)
 | 
			
		||||
                    self.stats['retries'] += 1
 | 
			
		||||
                    return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.error(f"Max retries exceeded for {indicator}: {e}")
 | 
			
		||||
                    self.stats['api_calls_failed'] += 1
 | 
			
		||||
                    self.failed_indicators.add(indicator)
 | 
			
		||||
                    return []
 | 
			
		||||
                    
 | 
			
		||||
            except requests.exceptions.ConnectionError as e:
 | 
			
		||||
                self.stats['api_calls_failed'] += 1
 | 
			
		||||
                if retry_count < self.max_retries:
 | 
			
		||||
                    wait_time = min(5 * (retry_count + 1), 60)
 | 
			
		||||
                    logger.warning(f"Connection error for {indicator} (attempt {retry_count + 1}), retrying in {wait_time}s: {e}")
 | 
			
		||||
                    time.sleep(wait_time)
 | 
			
		||||
                    self.stats['retries'] += 1
 | 
			
		||||
                    return self._make_misp_request_single(indicator, attr_type, retry_count + 1)
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.error(f"Connection failed permanently for {indicator}: {e}")
 | 
			
		||||
                    self.failed_indicators.add(indicator)
 | 
			
		||||
                    return []
 | 
			
		||||
                    
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.error(f"Unexpected error querying {indicator}: {e}")
 | 
			
		||||
                self.stats['api_calls_failed'] += 1
 | 
			
		||||
                return []
 | 
			
		||||
        
 | 
			
		||||
        return results
 | 
			
		||||
 | 
			
		||||
    def extract_indicators_from_event(self, event: Any, attr: str, timesketch_attr: str) -> List[str]:
 | 
			
		||||
        """Extract and validate indicators from event."""
 | 
			
		||||
        try:
 | 
			
		||||
            loc = event.source.get(timesketch_attr)
 | 
			
		||||
            if not loc:
 | 
			
		||||
            if response.status_code != 200:
 | 
			
		||||
                logger.debug(f"MISP API returned status {response.status_code} for {value}")
 | 
			
		||||
                return []
 | 
			
		||||
            
 | 
			
		||||
            indicators = []
 | 
			
		||||
            loc_str = str(loc)
 | 
			
		||||
            data = response.json()
 | 
			
		||||
            return data.get("response", {}).get("Attribute", [])
 | 
			
		||||
            
 | 
			
		||||
            if attr == "ip":
 | 
			
		||||
                if timesketch_attr == "message":
 | 
			
		||||
                    # Extract IPs from nginx access log messages
 | 
			
		||||
                    ip_matches = self.ip_pattern.findall(loc_str)
 | 
			
		||||
                    indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
 | 
			
		||||
                elif timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
 | 
			
		||||
                    if self._is_valid_ip(loc_str):
 | 
			
		||||
                        indicators = [loc_str]
 | 
			
		||||
                        
 | 
			
		||||
            elif attr in ["md5", "sha1", "sha256"]:
 | 
			
		||||
                if self._is_valid_hash(loc_str, attr):
 | 
			
		||||
                    indicators = [loc_str.lower().strip()]
 | 
			
		||||
                    
 | 
			
		||||
            elif attr == "filename":
 | 
			
		||||
                filename = ntpath.basename(loc_str).strip()
 | 
			
		||||
                if filename and len(filename) > 3 and '.' in filename:
 | 
			
		||||
                    indicators = [filename]
 | 
			
		||||
            
 | 
			
		||||
            return indicators
 | 
			
		||||
        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
 | 
			
		||||
            self.stats['api_timeouts'] += 1
 | 
			
		||||
            
 | 
			
		||||
            if retry_count < self.max_retries:
 | 
			
		||||
                wait_time = (retry_count + 1) * 2  # Simple backoff: 2s, 4s
 | 
			
		||||
                logger.warning(f"Timeout for {value}, retrying in {wait_time}s (attempt {retry_count + 1})")
 | 
			
		||||
                time.sleep(wait_time)
 | 
			
		||||
                return self.query_misp_single(value, attr, retry_count + 1)
 | 
			
		||||
            else:
 | 
			
		||||
                logger.error(f"Max retries exceeded for {value}: {e}")
 | 
			
		||||
                self.failed_indicators.add(value)
 | 
			
		||||
                return []
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.debug(f"Error extracting indicators from event: {e}")
 | 
			
		||||
            logger.debug(f"Error querying MISP for {value}: {e}")
 | 
			
		||||
            return []
 | 
			
		||||
 | 
			
		||||
    def mark_event_with_intelligence(self, event: Any, misp_results: List[Dict], attr: str) -> None:
 | 
			
		||||
        """Mark event with MISP intelligence information."""
 | 
			
		||||
    def mark_event(self, event, result, attr):
 | 
			
		||||
        """Mark event with MISP intelligence - keeping original working version."""
 | 
			
		||||
        try:
 | 
			
		||||
            event_id = event.source.get('_id', '')
 | 
			
		||||
            if event_id in self.marked_events:
 | 
			
		||||
                return
 | 
			
		||||
            
 | 
			
		||||
            self.marked_events.add(event_id)
 | 
			
		||||
            
 | 
			
		||||
            # Build intelligence message
 | 
			
		||||
            if attr == "ip":
 | 
			
		||||
                msg_prefix = "MISP: Threat IP"
 | 
			
		||||
            elif attr in ["md5", "sha1", "sha256"]:
 | 
			
		||||
                msg_prefix = f"MISP: Malicious {attr.upper()}"
 | 
			
		||||
            if attr.startswith("ip-"):
 | 
			
		||||
                msg = "MISP: Malicious IP - "
 | 
			
		||||
            else:
 | 
			
		||||
                msg_prefix = f"MISP: Known {attr.upper()}"
 | 
			
		||||
                msg = "MISP: Known indicator - "
 | 
			
		||||
            
 | 
			
		||||
            # Extract key information
 | 
			
		||||
            events_info = []
 | 
			
		||||
            orgs_info = set()
 | 
			
		||||
            threat_levels = []
 | 
			
		||||
            event_info = result[0].get("Event", {}).get("info", "Unknown")
 | 
			
		||||
            msg += event_info
 | 
			
		||||
            
 | 
			
		||||
            for misp_attr in misp_results[:3]:  # Limit to first 3 for message clarity
 | 
			
		||||
                event_info = misp_attr.get("Event", {})
 | 
			
		||||
                event_desc = event_info.get("info", "Unknown")[:40]  # Truncate
 | 
			
		||||
                org_name = event_info.get("Orgc", {}).get("name", "Unknown")
 | 
			
		||||
                threat_level = event_info.get("threat_level_id")
 | 
			
		||||
                
 | 
			
		||||
                if event_desc and event_desc != "Unknown":
 | 
			
		||||
                    events_info.append(event_desc)
 | 
			
		||||
                if org_name and org_name != "Unknown":
 | 
			
		||||
                    orgs_info.add(org_name)
 | 
			
		||||
                if threat_level:
 | 
			
		||||
                    threat_levels.append(int(threat_level))
 | 
			
		||||
            
 | 
			
		||||
            # Build comprehensive message
 | 
			
		||||
            msg_parts = [msg_prefix]
 | 
			
		||||
            
 | 
			
		||||
            if events_info:
 | 
			
		||||
                msg_parts.append(f"Events: {' | '.join(events_info[:2])}")
 | 
			
		||||
            
 | 
			
		||||
            if len(misp_results) > 3:
 | 
			
		||||
                msg_parts.append(f"({len(misp_results)} total matches)")
 | 
			
		||||
            
 | 
			
		||||
            if len(orgs_info) > 1:
 | 
			
		||||
                msg_parts.append(f"Sources: {', '.join(list(orgs_info)[:2])}")
 | 
			
		||||
            elif orgs_info and list(orgs_info)[0] != "Unknown":
 | 
			
		||||
                msg_parts.append(f"Source: {list(orgs_info)[0]}")
 | 
			
		||||
            
 | 
			
		||||
            if threat_levels:
 | 
			
		||||
                min_threat = min(threat_levels)  # Lower = higher threat
 | 
			
		||||
                threat_names = {1: "HIGH", 2: "MEDIUM", 3: "LOW", 4: "UNDEFINED"}
 | 
			
		||||
                msg_parts.append(f"Threat: {threat_names.get(min_threat, 'UNKNOWN')}")
 | 
			
		||||
            
 | 
			
		||||
            final_message = " | ".join(msg_parts)
 | 
			
		||||
            
 | 
			
		||||
            # Add tags
 | 
			
		||||
            tags = [f"MISP-{attr}", "threat-intel"]
 | 
			
		||||
            if self.include_community and len(orgs_info) > 1:
 | 
			
		||||
                tags.append("cross-org-intel")
 | 
			
		||||
            
 | 
			
		||||
            event.add_comment(final_message)
 | 
			
		||||
            event.add_tags(tags)
 | 
			
		||||
            if len(result) > 1:
 | 
			
		||||
                msg += f" (+{len(result)-1} more)"
 | 
			
		||||
 | 
			
		||||
            event.add_comment(msg)
 | 
			
		||||
            event.add_tags([f"MISP-{attr}", "threat-intel"])
 | 
			
		||||
            event.commit()
 | 
			
		||||
            
 | 
			
		||||
            self.stats['events_marked'] += 1
 | 
			
		||||
            self.stats['total_matches'] += len(misp_results)
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Error marking event: {e}")
 | 
			
		||||
 | 
			
		||||
    def process_indicators_batch(self, indicators: List[str], attr: str) -> Dict[str, List[Dict]]:
 | 
			
		||||
        """Process indicators with careful rate limiting."""
 | 
			
		||||
        results = {}
 | 
			
		||||
    def extract_indicators_from_chunk(self, events_chunk, attr, timesketch_attr):
 | 
			
		||||
        """Extract indicators from a chunk of events."""
 | 
			
		||||
        chunk_indicators = []
 | 
			
		||||
        events_with_indicators = []
 | 
			
		||||
        
 | 
			
		||||
        for i, indicator in enumerate(indicators):
 | 
			
		||||
        for event in events_chunk:
 | 
			
		||||
            self.stats['events_processed'] += 1
 | 
			
		||||
            
 | 
			
		||||
            loc = event.source.get(timesketch_attr)
 | 
			
		||||
            if not loc:
 | 
			
		||||
                continue
 | 
			
		||||
                
 | 
			
		||||
            indicators = []
 | 
			
		||||
 | 
			
		||||
            # Extract based on attribute type - SAME AS ORIGINAL
 | 
			
		||||
            if attr.startswith("ip-") and timesketch_attr == "message":
 | 
			
		||||
                ip_matches = self.ip_pattern.findall(str(loc))
 | 
			
		||||
                indicators = [ip for ip in ip_matches if self._is_valid_ip(ip)]
 | 
			
		||||
                
 | 
			
		||||
            elif attr.startswith("ip-") and timesketch_attr in ["source_ip", "src_ip", "client_ip"]:
 | 
			
		||||
                if self._is_valid_ip(str(loc)):
 | 
			
		||||
                    indicators = [str(loc)]
 | 
			
		||||
                    
 | 
			
		||||
            elif attr in ["md5", "sha1", "sha256"]:
 | 
			
		||||
                if self._is_valid_hash(str(loc), attr):
 | 
			
		||||
                    indicators = [str(loc)]
 | 
			
		||||
                    
 | 
			
		||||
            elif attr == "filename":
 | 
			
		||||
                filename = ntpath.basename(str(loc))
 | 
			
		||||
                if filename and len(filename) > 1:
 | 
			
		||||
                    indicators = [filename]
 | 
			
		||||
 | 
			
		||||
            # Store event with its indicators
 | 
			
		||||
            if indicators:
 | 
			
		||||
                events_with_indicators.append((event, indicators))
 | 
			
		||||
                chunk_indicators.extend(indicators)
 | 
			
		||||
        
 | 
			
		||||
        return events_with_indicators, chunk_indicators
 | 
			
		||||
 | 
			
		||||
    def process_chunk(self, events_chunk, attr, timesketch_attr):
 | 
			
		||||
        """Process a chunk of events."""
 | 
			
		||||
        events_with_indicators, chunk_indicators = self.extract_indicators_from_chunk(
 | 
			
		||||
            events_chunk, attr, timesketch_attr
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        if not chunk_indicators:
 | 
			
		||||
            return
 | 
			
		||||
        
 | 
			
		||||
        # Get unique new indicators
 | 
			
		||||
        unique_indicators = list(dict.fromkeys(chunk_indicators))
 | 
			
		||||
        new_indicators = [ind for ind in unique_indicators if ind not in self.processed_indicators]
 | 
			
		||||
        
 | 
			
		||||
        if not new_indicators:
 | 
			
		||||
            # Still need to check existing cache for matches
 | 
			
		||||
            self.check_existing_matches(events_with_indicators, attr)
 | 
			
		||||
            return
 | 
			
		||||
        
 | 
			
		||||
        logger.info(f"Processing {len(new_indicators)} new {attr} indicators from {len(events_chunk)} events")
 | 
			
		||||
        self.stats['indicators_found'] += len(new_indicators)
 | 
			
		||||
 | 
			
		||||
        # Query MISP for each new indicator
 | 
			
		||||
        for indicator in new_indicators:
 | 
			
		||||
            if indicator in self.failed_indicators:
 | 
			
		||||
                continue
 | 
			
		||||
                
 | 
			
		||||
            logger.debug(f"Processing indicator {i+1}/{len(indicators)}: {indicator}")
 | 
			
		||||
            result = self.query_misp_single(indicator, attr)
 | 
			
		||||
            if result:
 | 
			
		||||
                self.result_dict[f"{attr}:{indicator}"] = result
 | 
			
		||||
                logger.info(f"MISP hit: {indicator} ({len(result)} matches)")
 | 
			
		||||
            
 | 
			
		||||
            misp_results = self._make_misp_request_single(indicator, attr)
 | 
			
		||||
            self.processed_indicators.add(indicator)
 | 
			
		||||
            
 | 
			
		||||
            if misp_results:
 | 
			
		||||
                results[indicator] = misp_results
 | 
			
		||||
                logger.info(f"MISP hit: {indicator} ({len(misp_results)} matches)")
 | 
			
		||||
            
 | 
			
		||||
            # Rate limiting between requests
 | 
			
		||||
            # Small delay to be nice to MISP server
 | 
			
		||||
            time.sleep(self.request_delay)
 | 
			
		||||
            
 | 
			
		||||
            # Progress update every 50 indicators
 | 
			
		||||
            if (i + 1) % 50 == 0:
 | 
			
		||||
                logger.info(f"Processed {i+1}/{len(indicators)} indicators, "
 | 
			
		||||
                          f"{len(results)} hits, "
 | 
			
		||||
                          f"{self.stats['timeouts']} timeouts, "
 | 
			
		||||
                          f"{self.stats['api_calls_failed']} failures")
 | 
			
		||||
        
 | 
			
		||||
        return results
 | 
			
		||||
 | 
			
		||||
    def query_misp(self, query: str, attr: str, timesketch_attr: str) -> None:
 | 
			
		||||
        """Main processing with ultra-reliable chunked approach."""
 | 
			
		||||
        logger.info(f"Starting ultra-reliable MISP analysis for {attr} in {timesketch_attr}")
 | 
			
		||||
        logger.info(f"Configuration: chunk_size={self.chunk_size}, "
 | 
			
		||||
                   f"max_retries={self.max_retries}, "
 | 
			
		||||
                   f"request_delay={self.request_delay}s, "
 | 
			
		||||
                   f"include_community={self.include_community}")
 | 
			
		||||
        # Mark events that have matches
 | 
			
		||||
        self.check_existing_matches(events_with_indicators, attr)
 | 
			
		||||
 | 
			
		||||
    def check_existing_matches(self, events_with_indicators, attr):
 | 
			
		||||
        """Check events against existing MISP results."""
 | 
			
		||||
        for event, indicators in events_with_indicators:
 | 
			
		||||
            # Check if any indicator has MISP match
 | 
			
		||||
            for indicator in indicators:
 | 
			
		||||
                key = f"{attr}:{indicator}"
 | 
			
		||||
                if key in self.result_dict and self.result_dict[key]:
 | 
			
		||||
                    self.mark_event(event, self.result_dict[key], attr)
 | 
			
		||||
                    break  # Only mark once per event
 | 
			
		||||
 | 
			
		||||
    def query_misp(self, query, attr, timesketch_attr):
 | 
			
		||||
        """Process events in chunks - enhanced for large datasets."""
 | 
			
		||||
        logger.info(f"Starting MISP analysis for {attr} in {timesketch_attr}")
 | 
			
		||||
        logger.info(f"Community search: {'enabled' if self.include_community else 'disabled'}")
 | 
			
		||||
        
 | 
			
		||||
        # Process events in chunks
 | 
			
		||||
        # Get event stream 
 | 
			
		||||
        events_stream = self.event_stream(
 | 
			
		||||
            query_string=query,
 | 
			
		||||
            return_fields=[timesketch_attr, '_id', 'timestamp']
 | 
			
		||||
            query_string=query, 
 | 
			
		||||
            return_fields=[timesketch_attr, '_id']
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        current_chunk = []
 | 
			
		||||
@ -421,121 +304,64 @@ class MispAnalyzer(interface.BaseAnalyzer):
 | 
			
		||||
        try:
 | 
			
		||||
            for event in events_stream:
 | 
			
		||||
                current_chunk.append(event)
 | 
			
		||||
                self.stats['events_processed'] += 1
 | 
			
		||||
                
 | 
			
		||||
                # Process when chunk is full
 | 
			
		||||
                if len(current_chunk) >= self.chunk_size:
 | 
			
		||||
                    self._process_events_chunk(current_chunk, attr, timesketch_attr)
 | 
			
		||||
                    self.process_chunk(current_chunk, attr, timesketch_attr)
 | 
			
		||||
                    current_chunk = []
 | 
			
		||||
                    
 | 
			
		||||
                    # Progress logging
 | 
			
		||||
                    if self.stats['events_processed'] % 5000 == 0:
 | 
			
		||||
                        success_rate = (self.stats['api_calls_successful'] / 
 | 
			
		||||
                                      max(1, self.stats['api_calls_successful'] + self.stats['api_calls_failed']) * 100)
 | 
			
		||||
                        logger.info(f"Progress: {self.stats['events_processed']} events, "
 | 
			
		||||
                    if self.stats['events_processed'] % 10000 == 0:
 | 
			
		||||
                        logger.info(f"Progress: {self.stats['events_processed']} events processed, "
 | 
			
		||||
                                  f"{self.stats['events_marked']} marked, "
 | 
			
		||||
                                  f"{len(self.processed_indicators)} indicators processed, "
 | 
			
		||||
                                  f"{success_rate:.1f}% API success rate")
 | 
			
		||||
                                  f"{self.stats['api_calls']} API calls, "
 | 
			
		||||
                                  f"{self.stats['api_timeouts']} timeouts")
 | 
			
		||||
            
 | 
			
		||||
            # Process final chunk
 | 
			
		||||
            if current_chunk:
 | 
			
		||||
                self._process_events_chunk(current_chunk, attr, timesketch_attr)
 | 
			
		||||
                self.process_chunk(current_chunk, attr, timesketch_attr)
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Critical error during processing: {e}")
 | 
			
		||||
            logger.error(f"Error during chunk processing: {e}")
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
    def _process_events_chunk(self, events_chunk: List[Any], attr: str, timesketch_attr: str) -> None:
 | 
			
		||||
        """Process a chunk of events with indicator extraction and MISP queries."""
 | 
			
		||||
        # Extract all unique indicators from chunk
 | 
			
		||||
        chunk_indicators = []
 | 
			
		||||
        event_to_indicators = {}
 | 
			
		||||
        
 | 
			
		||||
        for event in events_chunk:
 | 
			
		||||
            indicators = self.extract_indicators_from_event(event, attr, timesketch_attr)
 | 
			
		||||
            if indicators:
 | 
			
		||||
                event_id = event.source.get('_id', '')
 | 
			
		||||
                event_to_indicators[event_id] = (event, indicators)
 | 
			
		||||
                chunk_indicators.extend(indicators)
 | 
			
		||||
        
 | 
			
		||||
        # Get unique new indicators
 | 
			
		||||
        unique_indicators = list(dict.fromkeys(chunk_indicators))
 | 
			
		||||
        new_indicators = [ind for ind in unique_indicators 
 | 
			
		||||
                         if ind not in self.processed_indicators and ind not in self.failed_indicators]
 | 
			
		||||
        
 | 
			
		||||
        if not new_indicators:
 | 
			
		||||
            return
 | 
			
		||||
        
 | 
			
		||||
        logger.info(f"Processing {len(new_indicators)} new {attr} indicators from {len(events_chunk)} events")
 | 
			
		||||
        self.stats['indicators_extracted'] += len(new_indicators)
 | 
			
		||||
        
 | 
			
		||||
        # Query MISP for new indicators
 | 
			
		||||
        misp_results = self.process_indicators_batch(new_indicators, attr)
 | 
			
		||||
        
 | 
			
		||||
        # Update cache
 | 
			
		||||
        self.processed_indicators.update(new_indicators)
 | 
			
		||||
        for indicator, results in misp_results.items():
 | 
			
		||||
            self.result_dict[f"{attr}:{indicator}"] = results
 | 
			
		||||
        
 | 
			
		||||
        # Mark matching events
 | 
			
		||||
        for event_id, (event, indicators) in event_to_indicators.items():
 | 
			
		||||
            if event_id in self.marked_events:
 | 
			
		||||
                continue
 | 
			
		||||
        # Create view if we found matches
 | 
			
		||||
        if self.stats['events_marked'] > 0:
 | 
			
		||||
            view_name = "MISP Threat Intelligence"
 | 
			
		||||
            if self.include_community:
 | 
			
		||||
                view_name += " (Community)"
 | 
			
		||||
                
 | 
			
		||||
            matching_results = []
 | 
			
		||||
            for indicator in indicators:
 | 
			
		||||
                key = f"{attr}:{indicator}"
 | 
			
		||||
                if key in self.result_dict:
 | 
			
		||||
                    matching_results.extend(self.result_dict[key])
 | 
			
		||||
            
 | 
			
		||||
            if matching_results:
 | 
			
		||||
                self.mark_event_with_intelligence(event, matching_results, attr)
 | 
			
		||||
            self.sketch.add_view(
 | 
			
		||||
                view_name=view_name,
 | 
			
		||||
                analyzer_name=self.NAME,
 | 
			
		||||
                query_string='tag:"MISP-*" OR tag:"threat-intel"',
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    def run(self) -> str:
 | 
			
		||||
        """Entry point with comprehensive error handling and reporting."""
 | 
			
		||||
    def run(self):
 | 
			
		||||
        """Entry point for the analyzer."""
 | 
			
		||||
        if not self.misp_url or not self.misp_api_key:
 | 
			
		||||
            return "Error: MISP configuration missing"
 | 
			
		||||
            return "No MISP configuration found"
 | 
			
		||||
 | 
			
		||||
        start_time = time.time()
 | 
			
		||||
        
 | 
			
		||||
        try:
 | 
			
		||||
            self.query_misp(self._query_string, self._attr, self._timesketch_attr)
 | 
			
		||||
            
 | 
			
		||||
            # Create view for matches
 | 
			
		||||
            if self.stats['events_marked'] > 0:
 | 
			
		||||
                view_name = f"MISP {self._attr.upper()} Threats"
 | 
			
		||||
                if self.include_community:
 | 
			
		||||
                    view_name += " (Cross-Org)"
 | 
			
		||||
                    
 | 
			
		||||
                self.sketch.add_view(
 | 
			
		||||
                    view_name=view_name,
 | 
			
		||||
                    analyzer_name=self.NAME,
 | 
			
		||||
                    query_string=f'tag:"MISP-{self._attr}" OR tag:"threat-intel"',
 | 
			
		||||
                )
 | 
			
		||||
            
 | 
			
		||||
            # Comprehensive results
 | 
			
		||||
            elapsed = time.time() - start_time
 | 
			
		||||
            total_api_calls = self.stats['api_calls_successful'] + self.stats['api_calls_failed']
 | 
			
		||||
            success_rate = (self.stats['api_calls_successful'] / max(1, total_api_calls)) * 100
 | 
			
		||||
            success_rate = ((self.stats['api_calls'] - self.stats['api_timeouts']) / 
 | 
			
		||||
                          max(1, self.stats['api_calls']) * 100)
 | 
			
		||||
            
 | 
			
		||||
            result = (f"[{self._timesketch_attr}] MISP Analysis: "
 | 
			
		||||
            result = (f"[{self._timesketch_attr}] MISP Analysis Complete: "
 | 
			
		||||
                     f"{self.stats['events_marked']}/{self.stats['events_processed']} events marked | "
 | 
			
		||||
                     f"{len(self.processed_indicators)} indicators processed | "
 | 
			
		||||
                     f"{total_api_calls} API calls ({success_rate:.1f}% success) | "
 | 
			
		||||
                     f"{self.stats['timeouts']} timeouts | "
 | 
			
		||||
                     f"{self.stats['api_calls']} API calls ({success_rate:.1f}% success) | "
 | 
			
		||||
                     f"{elapsed:.0f}s")
 | 
			
		||||
            
 | 
			
		||||
            logger.info(result)
 | 
			
		||||
            return result
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"MISP analyzer failed: {e}")
 | 
			
		||||
            logger.error(f"MISP analyzer error: {e}")
 | 
			
		||||
            return f"[{self._timesketch_attr}] MISP Error: {str(e)}"
 | 
			
		||||
        finally:
 | 
			
		||||
            try:
 | 
			
		||||
                self.session.close()
 | 
			
		||||
            except:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
manager.AnalysisManager.register_analyzer(MispAnalyzer)
 | 
			
		||||
		Reference in New Issue
	
	Block a user